8

我想创建一个爬虫,从数字 0 到 10 000 000 一个一个地获取所有页面的信息。我不管花多少时间。我只希望它有效。这是我得到的错误

致命错误:达到“100”的最大函数嵌套级别,正在中止!在第 25 行的 D:\wamp\www\crawler\index.php

第 25 行是

$htmlstr = (string)$this->curlGet($url);

还有我的完整脚本。

感谢您的帮助 !

header('Content-Type: text/html; charset=utf-8'); 
ini_set('max_input_nesting_level','100000');
ini_set('max_execution_time','100000'); 

class crawler{

    private $url;
    private $page;
    private $bothurl;
    private $innerDom = null;
    public $prop;
    public $entry;

    function __construct($entry){
        $this->entry = $entry;
        $this->bothurl = array('http://www.remax-quebec.com/fr/inscription/Q/'.$entry.'.rmx','http://www.remax-quebec.com/en/inscription/Q/'.$entry.'.rmx');
        $this->scan();
    }

    private function scan(){
        $i =0;
        foreach($this->bothurl as $url){
            $this->url = $url;
            $this->lang = ($i==0)?'fr':'en';
            $htmlstr = (string)$this->curlGet($url);
            $dom = new DOMDocument;
            @$dom->loadHTML($htmlstr);
            $this->page = $dom;
            $this->htmlInfos();
            $this->getInfos();
            $i++;
        }
    }

    private function htmlInfos(){
        $divs = $this->page->getElementsByTagName('div');
        foreach($divs as $div){
            if($div->hasAttribute('class') && $div->getAttribute('class') == 'bloc specs'){
                $innerDom = new DOMDocument(); 
                @$innerDom->loadHTML($this->innerHTML($div));
                $this->innerDom = $innerDom;
            }
        }
        if($this->innerDom === null) $this->changeEntry();
    }

    private function getInfos(){
        $sect = 0;

        foreach($this->innerDom->getElementsByTagName('div') as $div){
        # obtenir la description
             $this->getDesc($div->getAttribute('class'),$div);
        # obtenir les caractéristiques
             $this->getCaract($div->getAttribute('class'),$div);
        # obtenir les informations interieur, exterieur et evaluation
             if($div->getAttribute('class') == 'section deux-colonnes'){
                switch($sect){
                    case 0: $this->getSpecInfos($div,'interieur'); break;
                    case 1: $this->getSpecInfos($div,'exterieur'); break;
                    case 2: $this->getSpecInfos($div,'evaluation'); break;
                    case 3: $this->getSpecInfos($div,'equipement'); break;
                    case 4: $this->getSpecInfos($div,'services'); break;
                }
                $sect++;
             }else if($div->getAttribute('class') == 'section'){
        # obtenir les détails des pièces
                foreach($div->getElementsByTagName('table') as $table){
                    if($table->getAttribute('class') == 'details-pieces'){
                        $this->detailPieces($table);
                    }
                }
             }
        }
    }

    private function getDesc($class,$obj){
        if($class == 'section description'){
            $p = $obj->getElementsByTagName('p')->item(0);
            $text = (string)$p->nodeValue;
            $this->prop[$this->lang]['description'] = $text;
        }
    }

    private function getCaract($class,$obj){
        if($class == 'section characteristiques'){
            foreach($obj->getElementsByTagName('div') as $div){
                if(substr($div->getAttribute('class'),0,4) == "item"){
                    $text = (string)$div->nodeValue;
                    $this->prop[$this->lang]['caracteritiques'][substr($div->getAttribute('class'),5)] = $text;
                }
            }
        }
    }

    private function getSpecInfos($obj,$nomInfo){
        foreach($obj->getElementsByTagName('table') as $table){
            foreach($table->getElementsByTagName('tr') as $tr){
                $name = $tr->getElementsByTagName('td')->item(0);
                $value = $tr->getElementsByTagName('td')->item(1);
                $name = substr((string)$name->nodeValue,0,-2);
                $value = (string)$value->nodeValue;
                $this->prop[$this->lang][$nomInfo][$this->noAccents($name)] = $value;
            }
        }
    }

    private function detailPieces($obj){
        $tbody = $obj->getElementsByTagName('tbody')->item(0);
        foreach($tbody->getElementsByTagName('tr') as $tr){
            $name = $tr->getElementsByTagName('td')->item(0);
            $name = (string)$name->nodeValue;
            $level = $tr->getElementsByTagName('td')->item(1);
            $level = (string)$level->nodeValue;
            $dimensions = $tr->getElementsByTagName('td')->item(2);
            $dimensions = (string)$dimensions->nodeValue;
            $floor = $tr->getElementsByTagName('td')->item(3);
            $floor = (string)$floor->nodeValue;
            $desc = $tr->getElementsByTagName('td')->item(4);
            $desc = (string)$desc->nodeValue;

            $this->prop[$this->lang]['pieces'][$this->noAccents($name)]['etage'] = $level;
            $this->prop[$this->lang]['pieces'][$this->noAccents($name)]['dimensions'] = $dimensions;
            $this->prop[$this->lang]['pieces'][$this->noAccents($name)]['revetement'] = $floor;
            $this->prop[$this->lang]['pieces'][$this->noAccents($name)]['description'] = $desc;
        }
    }

    private function innerHTML($element){ 
        $innerHTML = ""; 
        $children = $element->childNodes; 
        foreach ($children as $child) 
        { 
            $tmp_dom = new DOMDocument(); 
            $tmp_dom->appendChild($tmp_dom->importNode($child, true)); 
            $innerHTML.=trim($tmp_dom->saveHTML()); 
        } 
        return $innerHTML; 
    }

    private function noAccents($value){
        $string= strtr($chaine,"ÀÁÂÃÄÅàáâãäåÒÓÔÕÖØòóôõöøÈÉÊËèéêëÇçÌÍÎÏìíîïÙÚÛÜùúûüÿÑñ","aaaaaaaaaaaaooooooooooooeeeeeeeecciiiiiiiiuuuuuuuuynn");
    }

    private function changeEntry(){
        $this->entry++;
        echo $this->entry;
        $this->scan();
    }

    private function curlGet($url){
        $curl = curl_init();
        curl_setopt($curl, CURLOPT_URL, $url);
        curl_setopt($curl, CURLOPT_ENCODING, "gzip");
        curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
        curl_setopt($curl, CURLOPT_FOLLOWLOCATION, 1);
        $data = curl_exec($curl);
        curl_close($curl);
        return $data;
    }
}

$entry = 8678057;

$crawler = new crawler($entry);

echo '<pre>';
print_r($crawler->prop);
echo '</pre>';
4

4 回答 4

33

假设您使用的是 xdebug,您可以设置自己的限制

ini_set('xdebug.max_nesting_level', $limit)
于 2012-04-14T21:51:42.587 回答
3

将文件 /etc/mysql/my.cnf 参数更改为 max_allowed_pa​​cket = 512M

确保您已安装 xdebug(使用 phpinfo()),然后更改文件 /etc/php5/fpm/php.ini 添加或编辑行:xdebug.max_nesting_level=1000

重启两个服务 sudo service mysql restart sudo service php5-fpm restart

如果它不起作用,您仍然可以在 /etc/php5/fpm/php.ini xdebug.remote_autostart=0 xdebug.remote_enable=0 处将这两个参数设置为 false

于 2013-04-04T04:49:20.160 回答
1

就我而言,它与作曲家有关。在composer.json文件中更新了一些供应商,但我忘记运行命令composer updatecomposer install。系统产生了一系列错误,导致了这个“最大嵌套级别”。

执行这些命令后,问题已解决

于 2014-07-08T15:34:08.047 回答
0

假设您没有犯死机错误,只需更改 xdebug 的限制。

我通过更改xdebug.ini文件解决了这个问题。(在我的mac中,路径是/usr/local/php5-5.6.17-20160108-103504/php.d/50-extension-xdebug.ini,也许你的会有点不同。)

在 xdebug.ini 文件的底部添加一个新行:

xdebug.max_nesting_level=500


请记住:您必须更改与您正在使用的 php 对应的 xdebug.ini。例如,如果您在计算机中安装了 php5 和 xampp,则需要弄清楚您使用的是哪个 php。

于 2016-03-31T16:01:30.180 回答