是否有任何免费的 PHP 库可以解析 .mobi 文件以获取:
- 作者
- 标题
- 出版商
- 覆盖
编辑:
对于那些认为这是与是否存在 PHP 库以使用 PRC/MOBI 文件完全相同的副本的每个人,您显然懒得阅读这些问题。
那位提问者想知道如何使用 PHP 库生成 .mobi 文件。我想知道如何分解或解析已经创建的 .mobi 文件以获取某些信息。因此,该问题的解决方案phpMobi将不起作用,因为它是从 HTML 生成 .mobi 文件的脚本,而不是解析 .mobi 文件。
是否有任何免费的 PHP 库可以解析 .mobi 文件以获取:
编辑:
对于那些认为这是与是否存在 PHP 库以使用 PRC/MOBI 文件完全相同的副本的每个人,您显然懒得阅读这些问题。
那位提问者想知道如何使用 PHP 库生成 .mobi 文件。我想知道如何分解或解析已经创建的 .mobi 文件以获取某些信息。因此,该问题的解决方案phpMobi将不起作用,因为它是从 HTML 生成 .mobi 文件的脚本,而不是解析 .mobi 文件。
一个非常非常蹩脚的例子,但如果你绝望了,你可以尝试这样的事情:
$data = file_get_contents("A Young Girl's Diary - Freud, Sigmund.mobi");
$chunk = mb_substr($data, mb_strpos($data, 'EXTH'), 512);
$chunks = explode("\x00", $chunk);
array_shift($chunks);
$chunks = array_filter($chunks, function($str){return preg_match('#([A-Z])#', $str) && mb_strlen($str) > 2;});
$chunks = array_combine(array('author', 'publisher', 'title'), $chunks);
print_r($chunks);
输出:
Array
(
[author] => Freud, Sigmund
[publisher] => Webarto
[title] => A Young Girl's Diary
)
使用的文件:http: //freekindlebooks.org/Freud/752-h.mobi(使用 Calibre 编辑出版商元数据)
文件解析甚至不是一件容易或有趣的事情。看看这个:http ://code.google.com/p/xee/source/browse/XeePhotoshopLoader.m?r=a70d7396356997114b548f4ab2cbd49badd7d285#107
您应该做的是逐字节读取,但是由于没有详细的文档,恐怕这不是一件容易的事。
PS我还没有尝试获取封面照片。
如果有人仍然感兴趣,这里有一个 mobi 元数据阅读示例:
class palmDOCHeader
{
public $Compression = 0;
public $TextLength = 0;
public $Records = 0;
public $RecordSize = 0;
}
class palmHeader
{
public $Records = array();
}
class palmRecord
{
public $Offset = 0;
public $Attributes = 0;
public $Id = 0;
}
class mobiHeader
{
public $Length = 0;
public $Type = 0;
public $Encoding = 0;
public $Id = 0;
public $FileVersion = 0;
}
class exthHeader
{
public $Length = 0;
public $Records = array();
}
class exthRecord
{
public $Type = 0;
public $Length = 0;
public $Data = "";
}
class mobi {
protected $mobiHeader;
protected $exthHeader;
public function __construct($file){
$handle = fopen($file, "r");
if ($handle){
fseek($handle, 60, SEEK_SET);
$content = fread($handle, 8);
if ($content != "BOOKMOBI"){
echo "Invalid file format";
fclose($handle);
return;
}
// Palm Database
echo "\nPalm database:\n";
$palmHeader = new palmHeader();
fseek($handle, 0, SEEK_SET);
$name = fread($handle, 32);
echo "Name: ".$name."\n";
fseek($handle, 76, SEEK_SET);
$content = fread($handle, 2);
$records = hexdec(bin2hex($content));
echo "Records: ".$records."\n";
fseek($handle, 78, SEEK_SET);
for ($i=0; $i<$records; $i++){
$record = new palmRecord();
$content = fread($handle, 4);
$record->Offset = hexdec(bin2hex($content));
$content = fread($handle, 1);
$record->Attributes = hexdec(bin2hex($content));
$content = fread($handle, 3);
$record->Id = hexdec(bin2hex($content));
array_push($palmHeader->Records, $record);
echo "Record ".$i." offset: ".$record->Offset." attributes: ".$record->Attributes." id : ".$record->Id."\n";
}
// PalmDOC Header
$palmDOCHeader = new palmDOCHeader();
fseek($handle, $palmHeader->Records[0]->Offset, SEEK_SET);
$content = fread($handle, 2);
$palmDOCHeader->Compression = hexdec(bin2hex($content));
$content = fread($handle, 2);
$content = fread($handle, 4);
$palmDOCHeader->TextLength = hexdec(bin2hex($content));
$content = fread($handle, 2);
$palmDOCHeader->Records = hexdec(bin2hex($content));
$content = fread($handle, 2);
$palmDOCHeader->RecordSize = hexdec(bin2hex($content));
$content = fread($handle, 4);
echo "\nPalmDOC Header:\n";
echo "Compression:".$palmDOCHeader->Compression."\n";
echo "TextLength:".$palmDOCHeader->TextLength."\n";
echo "Records:".$palmDOCHeader->Records."\n";
echo "RecordSize:".$palmDOCHeader->RecordSize."\n";
// MOBI Header
$mobiStart = ftell($handle);
$content = fread($handle, 4);
if ($content == "MOBI"){
$this->mobiHeader = new mobiHeader();
echo "\nMOBI header:\n";
$content = fread($handle, 4);
$this->mobiHeader->Length = hexdec(bin2hex($content));
$content = fread($handle, 4);
$this->mobiHeader->Type = hexdec(bin2hex($content));
$content = fread($handle, 4);
$this->mobiHeader->Encoding = hexdec(bin2hex($content));
$content = fread($handle, 4);
$this->mobiHeader->Id = hexdec(bin2hex($content));
echo "Header length: ".$this->mobiHeader->Length."\n";
echo "Type: ".$this->mobiHeader->Type."\n";
echo "Encoding: ".$this->mobiHeader->Encoding."\n";
echo "Id: ".$this->mobiHeader->Id."\n";
fseek($handle, $mobiStart+$this->mobiHeader->Length, SEEK_SET);
$content = fread($handle, 4);
if ($content == "EXTH"){
$this->exthHeader = new exthHeader();
echo "\nEXTH header:\n";
$content = fread($handle, 4);
$this->exthHeader->Length = hexdec(bin2hex($content));
$content = fread($handle, 4);
$records = hexdec(bin2hex($content));
echo "Records: ".$records."\n";
for ($i=0; $i<$records; $i++){
$record = new exthRecord();
$content = fread($handle, 4);
$record->Type = hexdec(bin2hex($content));
$content = fread($handle, 4);
$record->Length = hexdec(bin2hex($content));
$record->Data = fread($handle, $record->Length - 8);
array_push($this->exthHeader->Records, $record);
echo "Record ".$i." type: ".$record->Type." length: ".$record->Length."\n";
echo " data: ".$record->Data."\n";
}
}
}
fclose($handle);
}
}
protected function GetRecord($type)
{
foreach ($this->exthHeader->Records as $record){
if ($record->Type == $type)
return $record;
}
return NULL;
}
protected function GetRecordData($type)
{
$record = $this->GetRecord($type);
if ($record)
return $record->Data;
return "";
}
public function Title()
{
return $this->GetRecordData(503);
}
public function Author()
{
return $this->GetRecordData(100);
}
public function Isbn()
{
return $this->GetRecordData(104);
}
public function Subject()
{
return $this->GetRecordData(105);
}
public function Publisher()
{
return $this->GetRecordData(101);
}
}
$mobi = new mobi("test.mobi");
echo "\nTitle: ".$mobi->Title();
echo "\nAuthor: ".$mobi->Author();
echo "\nIsbn: ".$mobi->Isbn();
echo "\nSubject: ".$mobi->Subject();
echo "\nPublisher: ".$mobi->Publisher();
有同样的问题,没有找到任何 PHP 解析器,不得不自己编写(不幸的是我不能透露我的代码)。这是关于 .mobi 结构的一个很好的资源http://wiki.mobileread.com/wiki/MOBI