1

我在 php.ini 中编写了以下对象。它工作正常,但是在我的结构中更改某些内容后,目录爬虫仅保存数组中最后找到的目录。

<?php

include_once('root.php');

class crawler
{

private $pages = array();

/**
 * @param string $dir
 * @return array current_dir
 */
function found_dir($dir)
{
//         echo "Found (dir): {$dir}\n";
    // add $dir to $pages
    $parts = explode("/", $dir);
    $current = &$this->pages;
    // e.g. ./content/page/sub_page/
    //  [0] => '.'
    //  [1] => 'content'
    // [-1] => ''
    for ($i = 2; $i < count($parts) - 1; $i++) {
        if (!in_array('sub', $current))
            $current['sub'] = array();
        if (!in_array($parts[$i], $current['sub']))
            $current['sub'][$parts[$i]] = array('name' => $parts[$i]);
        $current = &$current['sub'][$parts[$i]];
    }
    return $current;
}

/**
 * @param string $file
 */
function found_file($file)
{
//        echo "Found (file): {$file}\n";
    // move to current $dir
    $current = &$this->found_dir($file);
    // add $file to $pages
    $parts = explode(".", substr($file, strrpos($file, "/") + 1));
    if (count($parts) == 1) {
        $current[$parts[0]] = file_get_contents($file);
    } else {
        if (strlen($parts[0]) == 0)
            $current[$parts[1]] = true;
        else if (!in_array($parts[0], $current))
            $current[$parts[0]] = array($parts[1] => file_get_contents($file));
        else
            $current[$parts[0]][$parts[1]] = file_get_contents($file);
    }
}

/**
 * @param string $root
 */
function read_all_files($root = '.')
{
    // vars for handling
    $directories = array();
    $last_letter = $root[strlen($root) - 1];
    $root = ($last_letter == '\\' || $last_letter == '/') ? $root : $root . DIRECTORY_SEPARATOR;
    $directories[] = $root;

    while (sizeof($directories)) {
        $dir = array_pop($directories);
        if ($handle = opendir($dir)) {
            while (false !== ($file = readdir($handle))) {
                if ($file == '.' || $file == '..') {
                    continue;
                }
                $file = $dir . $file;
                if (is_dir($file)) {
                    $directory_path = $file . DIRECTORY_SEPARATOR;
                    array_push($directories, $directory_path);

                } elseif (is_file($file))
                    $this->found_file($file);
            }
            closedir($handle);
        }
    }
}

/**
 * @return root
 */
public function generatePages()
{
    // get available files, create temp array
    $this->read_all_files('./content/');
    // print
    print_r($this->pages);
    // loop array
    return new root($this->pages);
}

}

我找不到错误。也许你们中的某些人看到了错误。非常感谢您的帮助!

最好的问候

4

1 回答 1

0

您在应该使用 isset() 的地方使用 in_array:

$pages = array('sub' => array())

in_array('sub', $pages) => false
isset($pages['sub'])    => true
于 2012-05-05T14:41:47.970 回答