我正在尝试使用我的虚拟网络服务器进行网络抓取;我正在寻找页面中的项目名称 + 创建者的名称,例如 Yanira Castro 的《Bring THE PEOPLE TO COME to New York City》
这些信息位于 bbcard_name
我的问题是我在脚本末尾收到的数组和 csv 总是空的......
<?php
set_time_limit(0);
$data = array ()
$listpage = file_get_contents('http://www.kickstarter.com/discover/categories/dance/');
preg_match_all('#<h2> <a href="([A-Z]+)\.html">([a-za-Z ]+)</a></li>#', $listpage, $pagesurl);
foreach($pageurl[1] AS $pagesurl) {
$projectPage = file_get_contents('http://www.kickstarter.com/discover/categories/dance/' . $pagesurl . '.html');
preg_match('#<h2>bbcard_name ([a-zA-Z ]+)</h2>#', $projectPage, $name);
$name = $name[1];
preg_match_all('#<h2><a href="https?://.+\.[a-z]{2,5}">([^<]+)</a>#', $projectPage, $namefound);
foreach($namefound[1] AS $name) {
if(!isset($data[$name]))
$data[$name] = array('name' => $name);
else
$data[$name]['name'] .= ' - ' . $name;
}
}
print_r($data);
$out = fopen('data.csv', 'w');
fputcsv($out, array('Titre'));
foreach ($data as $name => $data) {
$name = (isset($data['name'])) ? $data['name'] : '';
fputcsv($out, array($data,$name));
}
fclose($out);
echo "FINITO";
exit;
?>
谢谢