我在 php.ini 中编写了以下对象。它工作正常,但是在我的结构中更改某些内容后,目录爬虫仅保存数组中最后找到的目录。
<?php
include_once('root.php');
class crawler
{
private $pages = array();
/**
* @param string $dir
* @return array current_dir
*/
function found_dir($dir)
{
// echo "Found (dir): {$dir}\n";
// add $dir to $pages
$parts = explode("/", $dir);
$current = &$this->pages;
// e.g. ./content/page/sub_page/
// [0] => '.'
// [1] => 'content'
// [-1] => ''
for ($i = 2; $i < count($parts) - 1; $i++) {
if (!in_array('sub', $current))
$current['sub'] = array();
if (!in_array($parts[$i], $current['sub']))
$current['sub'][$parts[$i]] = array('name' => $parts[$i]);
$current = &$current['sub'][$parts[$i]];
}
return $current;
}
/**
* @param string $file
*/
function found_file($file)
{
// echo "Found (file): {$file}\n";
// move to current $dir
$current = &$this->found_dir($file);
// add $file to $pages
$parts = explode(".", substr($file, strrpos($file, "/") + 1));
if (count($parts) == 1) {
$current[$parts[0]] = file_get_contents($file);
} else {
if (strlen($parts[0]) == 0)
$current[$parts[1]] = true;
else if (!in_array($parts[0], $current))
$current[$parts[0]] = array($parts[1] => file_get_contents($file));
else
$current[$parts[0]][$parts[1]] = file_get_contents($file);
}
}
/**
* @param string $root
*/
function read_all_files($root = '.')
{
// vars for handling
$directories = array();
$last_letter = $root[strlen($root) - 1];
$root = ($last_letter == '\\' || $last_letter == '/') ? $root : $root . DIRECTORY_SEPARATOR;
$directories[] = $root;
while (sizeof($directories)) {
$dir = array_pop($directories);
if ($handle = opendir($dir)) {
while (false !== ($file = readdir($handle))) {
if ($file == '.' || $file == '..') {
continue;
}
$file = $dir . $file;
if (is_dir($file)) {
$directory_path = $file . DIRECTORY_SEPARATOR;
array_push($directories, $directory_path);
} elseif (is_file($file))
$this->found_file($file);
}
closedir($handle);
}
}
}
/**
* @return root
*/
public function generatePages()
{
// get available files, create temp array
$this->read_all_files('./content/');
// print
print_r($this->pages);
// loop array
return new root($this->pages);
}
}
我找不到错误。也许你们中的某些人看到了错误。非常感谢您的帮助!
最好的问候