此代码通过启动多个 cron 来检查页面的损坏链接。然而,它并没有识别出正确的链接被破坏了。尽管页面上有可用的断开链接,但它始终返回 0 个断开的链接。$url_list 是页面链接的数组。
$mh = curl_multi_init();
for ($i = 0; $i < $max_connections; $i++)
{
$this->add_url_to_multi_handle($mh, $url_list);
}
do
{
$mrc = curl_multi_exec($mh, $active);
} while ($mrc == CURLM_CALL_MULTI_PERFORM);
while ($active && $mrc == CURLM_OK)
{
// 5. there is activity
if (curl_multi_select($mh) != -1)
{
// 6. do work
do
{
$mrc = curl_multi_exec($mh, $active);
} while ($mrc == CURLM_CALL_MULTI_PERFORM);
// 7. is there info?
if ($mhinfo = curl_multi_info_read($mh))
{
// this means one of the requests were finished
// 8. get the info on the curl handle
$codes = array('400', '401', '402', '403', '404', '500', '501', '502', '503');
$chinfo = curl_getinfo($mhinfo['handle']);
// 9. dead link?
if (!$chinfo['http_code'])
{
$dead_urls [] = $chinfo['url'];
// 10. 404?
}
else if (in_array($chinfo['http_code'], $codes) || !$chinfo['http_code'])
{
$broken_links++;
$data = array(
'domain_id' => $domain_id,
'pageurl' => $url,
'broken_link' => $chinfo['url']
);
$this->db->insert($table, $data);
$not_found_urls [] = $chinfo['url'];
// 11. working
}
else
{
$working_urls [] = $chinfo['url'];
}
// 12. remove the handle
curl_multi_remove_handle($mh, $mhinfo['handle']);
curl_close($mhinfo['handle']);
// 13. add a new url and do work
if ($this->add_url_to_multi_handle($mh, $url_list))
{
do
{
$mrc = curl_multi_exec($mh, $active);
} while ($mrc == CURLM_CALL_MULTI_PERFORM);
}
}
}
}
curl_multi_close($mh);
echo "<br/>Total Broken: " . $broken_links;
echo "==<br/>Broken URLs==\n";
echo "<pre>";
print_r($not_found_urls);
echo "</pre>";
$nooflinks = $total_link;
}
function add_url_to_multi_handle($mh, $url_list)
{
static $index = 0;
// if we have another url to get
if (isset($url_list[$index]) && $url_list[$index] != "")
{
// new curl handle
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url_list[$index]);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE);
curl_setopt($ch, CURLOPT_NOBODY, TRUE);
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)");
curl_multi_add_handle($mh, $ch);
// $curl_handlers[] = $ch;
// increment so next url is used next time
$index++;
return true;
}
else
{
$index = 0;
// we are done adding new URLs
return false;
}
}
谢谢。