我最近继承了一个 RSS/XML 解析器,虽然它似乎工作得很好,但我发现有些东西不见了。
例如,从博客中提取 RSS 提要。它缺少项目中的所有类别。它显示为每个项目只有一个类别,而实际上它应该显示为具有多个类别。
演示链接: http: //dev.o7t.in/rss/
链接到实际提要: http: //o7thblog.com/feed/
您可以看到第一项feed
本身如何在第一项中共有 8 个类别。(可能需要查看源码)
但是,在Demo
您可以看到它只显示 1 个类别
这是我的整个课程代码:
<?php
class o7thRssFeedPuller{
public $FeedUrl = ''; // URL of the feed to pull in
public $ReturnJson = false; // Return the array as a JSON encoded string instead?
public $MaxItems = 0; // 0 = unlimited (except by feed), only applicable to GetItems
// Internal holders
private $document;
private $channel;
private $items;
// Get the full RSS feed
public function GetRSS($includeAttributes = false) {
// Pull in our feed
$this->loadParser(file_get_contents($this->FeedUrl, false, $this->randomContext()));
if($includeAttributes) {
// only if we are including attributes
return ($this->ReturnJson) ? json_encode($this->document) : $this->document;
}
// Return either an array or a json encoded string
return ($this->ReturnJson) ? json_encode($this->valueReturner()) : $this->valueReturner();
}
// Get the channel data
public function GetChannel($includeAttributes = false) {
// Pull in our feed
$this->loadParser(file_get_contents($this->FeedUrl, false, $this->randomContext()));
if($includeAttributes) {
// only if we are including attributes
return ($this->ReturnJson) ? json_encode($this->channel) : $this->channel;
}
// Return either an array or a json encoded string
return ($this->ReturnJson) ? json_encode($this->valueReturner($this->channel)) : $this->valueReturner($this->channel);
}
// Get the items
public function GetItems($includeAttributes=false) {
// Pull in our feed
$this->loadParser(file_get_contents($this->FeedUrl, false, $this->randomContext()));
if($includeAttributes) {
// only if we are including attributes
$arr = ($this->MaxItems == 0) ? $this->items : array_slice($this->items, 0, $this->MaxItems);
return ($this->ReturnJson) ? json_encode($arr) : $arr;
}
// Return either an array or a json encoded string
$arr = ($this->MaxItems == 0) ? $this->valueReturner($this->items) : array_slice($this->valueReturner($this->items), 0, $this->MaxItems);
return ($this->ReturnJson) ? json_encode($arr) : $arr;
}
// -------------------------------------------------------------------------------------------------
// Internal Methods
private function loadParser($rss=false) {
if($rss) {
$this->document = array();
$this->channel = array();
$this->items = array();
$DOMDocument = new DOMDocument;
$DOMDocument->strictErrorChecking = false;
$DOMDocument->loadXML($rss);
$this->document = $this->extractDOM($DOMDocument->childNodes);
}
}
private function valueReturner($valueBlock=false) {
if(!$valueBlock) {
$valueBlock = $this->document;
}
foreach($valueBlock as $valueName => $values) {
if(isset($values['value'])) {
$values = $values['value'];
}
if(is_array($values)) {
$valueBlock[$valueName] = $this->valueReturner($values);
} else {
$valueBlock[$valueName] = $values;
}
}
return $valueBlock;
}
private function extractDOM($nodeList,$parentNodeName=false) {
$itemCounter = 0;
foreach($nodeList as $values) {
if(substr($values->nodeName,0,1) != '#') {
if($values->nodeName == 'item') {
$nodeName = $values->nodeName.':'.$itemCounter;
$itemCounter++;
} else {
$nodeName = $values->nodeName;
}
$tempNode[$nodeName] = array();
if($values->attributes) {
for($i=0;$values->attributes->item($i);$i++) {
$tempNode[$nodeName]['properties'][$values->attributes->item($i)->nodeName] = $values->attributes->item($i)->nodeValue;
}
}
if(!$values->firstChild) {
$tempNode[$nodeName]['value'] = $values->textContent;
} else {
$tempNode[$nodeName]['value'] = $this->extractDOM($values->childNodes, $values->nodeName);
}
if(in_array($parentNodeName, array('channel','rdf:RDF'))) {
if($values->nodeName == 'item') {
$this->items[] = $tempNode[$nodeName]['value'];
} elseif(!in_array($values->nodeName, array('rss','channel'))) {
$this->channel[$values->nodeName] = $tempNode[$nodeName];
}
}
} elseif(substr($values->nodeName,1) == 'text') {
$tempValue = trim(preg_replace('/\s\s+/',' ',str_replace("\n",' ', $values->textContent)));
if($tempValue) {
$tempNode = $tempValue;
}
} elseif(substr($values->nodeName,1) == 'cdata-section'){
$tempNode = $values->textContent;
}
}
return (!isset($tempNode)) ? null : $tempNode;
}
// Load in a random header to pass
private function randomContext() {
$headerstrings = array();
$headerstrings['User-Agent'] = 'Mozilla/5.0 (Windows; U; Windows NT 5.'.rand(0,2).'; en-US; rv:1.'.rand(2,9).'.'.rand(0,4).'.'.rand(1,9).') Gecko/2007'.rand(10,12).rand(10,30).' Firefox/2.0.'.rand(0,1).'.'.rand(1,9);
$headerstrings['Accept-Charset'] = rand(0,1) ? 'en-gb,en;q=0.'.rand(3,8) : 'en-us,en;q=0.'.rand(3,8);
$headerstrings['Accept-Language'] = 'en-us,en;q=0.'.rand(4,6);
$setHeaders = 'Accept: text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5'."\r\n".
'Accept-Charset: '.$headerstrings['Accept-Charset']."\r\n".
'Accept-Language: '.$headerstrings['Accept-Language']."\r\n".
'User-Agent: '.$headerstrings['User-Agent']."\r\n";
$contextOptions = array(
'http'=>array(
'method'=>"GET",
'header'=>$setHeaders
)
);
return stream_context_create($contextOptions);
}
}
?>
对于演示页面:
<?php
require_once($_SERVER['DOCUMENT_ROOT'] . '/rss/o7th.rss.feed.puller.php');
$fp = new o7thRssFeedPuller();
$fp->FeedUrl = 'http://o7thblog.com/feed';
$fp->MaxItems = 2;
echo '<table width="100%" cellpadding="0" cellspacing="0">';
echo ' <tr>';
echo ' <td>';
echo ' <textarea cols="120" rows="30">';
print_r($fp->GetItems());
echo ' </textarea>';
echo ' </td>';
echo ' </tr>';
echo '</table>';
?>
所以,我假设问题出在valueReturner
方法或extractDOM
方法的某个地方,但我只是不确定在哪里,也不确定我能做些什么来获取返回数组中的所有类别。
你能帮我吗?