我正在使用 PHP 脚本(使用 cURL)来检查是否:
- 我数据库中的链接是正确的(即返回 HTTP 状态 200)
- 链接实际上被重定向并重定向到适当/相似的页面(使用页面的内容)
结果将保存到日志文件中,并作为附件通过电子邮件发送给我。
这一切都很好并且可以正常工作,但是它很慢,而且有一半的时间会超时并提前中止。值得注意的是,我有大约 16,000 个链接要检查。
想知道如何最好地让这个运行更快,我做错了什么?
下面的代码:
function echoappend ($file,$tobewritten) {
fwrite($file,$tobewritten);
echo $tobewritten;
}
error_reporting(E_ALL);
ini_set('display_errors', '1');
$filename=date('YmdHis') . "linkcheck.htm";
echo $filename;
$file = fopen($filename,"w+");
try {
$conn = new PDO('mysql:host=localhost;dbname=databasename',$un,$pw);
$conn->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION);
echo '<b>connected to db</b><br /><br />';
$sitearray = array("medical.posterous","ebm.posterous","behavenet","guidance.nice","www.rch","emedicine","www.chw","www.rxlist","www.cks.nhs.uk");
foreach ($sitearray as $key => $value) {
$site=$value;
echoappend ($file, "<h1>" . $site . "</h1>");
$q="SELECT * FROM link WHERE url LIKE :site";
$stmt = $conn->prepare($q);
$stmt->execute(array(':site' => 'http://' . $site . '%'));
$result = $stmt->fetchAll();
$totallinks = 0;
$workinglinks = 0;
foreach($result as $row)
{
$ch = curl_init();
$originalurl = $row['url'];
curl_setopt($ch, CURLOPT_URL, $originalurl);
curl_setopt($ch, CURLOPT_HEADER, 1);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_NOBODY, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false);
$output = curl_exec($ch);
if ($output === FALSE) {
echo "cURL Error: " . curl_error($ch);
}
$urlinfo = curl_getinfo($ch);
if ($urlinfo['http_code'] == 200)
{
echoappend($file, $row['name'] . ": <b>working!</b><br />");
$workinglinks++;
}
else if ($urlinfo['http_code'] == 301 || 302)
{
$redirectch = curl_init();
curl_setopt($redirectch, CURLOPT_URL, $originalurl);
curl_setopt($redirectch, CURLOPT_HEADER, 1);
curl_setopt($redirectch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($redirectch, CURLOPT_NOBODY, false);
curl_setopt($redirectch, CURLOPT_FOLLOWLOCATION, true);
$redirectoutput = curl_exec($redirectch);
$doc = new DOMDocument();
@$doc->loadHTML($redirectoutput);
$nodes = $doc->getElementsByTagName('title');
$title = $nodes->item(0)->nodeValue;
echoappend ($file, $row['name'] . ": <b>redirect ... </b>" . $title . " ... ");
if (strpos(strtolower($title),strtolower($row['name']))===false) {
echoappend ($file, "FAIL<br />");
}
else {
$header = curl_getinfo($redirectch);
echoappend ($file, $header['url']);
echoappend ($file, "SUCCESS<br />");
}
curl_close($redirectch);
}
else
{
echoappend ($file, $row['name'] . ": <b>FAIL code</b>" . $urlinfo['http_code'] . "<br />");
}
curl_close($ch);
$totallinks++;
}
echoappend ($file, '<br />');
echoappend ($file, $site . ": " . $workinglinks . "/" . $totallinks . " links working. <br /><br />");
}
$conn = null;
echo '<br /><b>connection closed</b><br /><br />';
} catch(PDOException $e) {
echo 'ERROR: ' . $e->getMessage();
}