我正在使用 PHP 脚本(使用 cURL)来检查是否:
- 我数据库中的链接是正确的(即返回 HTTP 状态 200)
- 链接实际上被重定向并重定向到适当/相似的页面(使用页面的内容)
结果将保存到日志文件中,并作为附件通过电子邮件发送给我。
这一切都很好并且可以正常工作,但是它很慢,而且有一半的时间会超时并提前中止。值得注意的是,我有大约 16,000 个链接要检查。
想知道如何最好地让这个运行更快,我做错了什么?
下面的代码:
function echoappend ($file,$tobewritten) {
        fwrite($file,$tobewritten);
        echo $tobewritten;
}
error_reporting(E_ALL);
ini_set('display_errors', '1');
$filename=date('YmdHis') . "linkcheck.htm";
echo $filename;
$file = fopen($filename,"w+");
try {
        $conn = new PDO('mysql:host=localhost;dbname=databasename',$un,$pw);
        $conn->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION);
        echo '<b>connected to db</b><br /><br />';
        $sitearray = array("medical.posterous","ebm.posterous","behavenet","guidance.nice","www.rch","emedicine","www.chw","www.rxlist","www.cks.nhs.uk");
        foreach ($sitearray as $key => $value) {    
            $site=$value;
            echoappend ($file, "<h1>" . $site . "</h1>");
            $q="SELECT * FROM link WHERE url LIKE :site";
            $stmt = $conn->prepare($q);
            $stmt->execute(array(':site' => 'http://' . $site . '%'));
            $result = $stmt->fetchAll();
            $totallinks = 0;
            $workinglinks = 0;
            foreach($result as $row)
            {
                $ch = curl_init();
                $originalurl = $row['url'];
                curl_setopt($ch, CURLOPT_URL, $originalurl);
                curl_setopt($ch, CURLOPT_HEADER, 1);
                curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
                curl_setopt($ch, CURLOPT_NOBODY, true);
                curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false);
                $output = curl_exec($ch);
                if ($output === FALSE) {
                    echo "cURL Error: " . curl_error($ch);
                }
                $urlinfo = curl_getinfo($ch);
                if ($urlinfo['http_code'] == 200)
                {
                    echoappend($file, $row['name'] . ": <b>working!</b><br />");
                    $workinglinks++;
                }
                else if ($urlinfo['http_code'] == 301 || 302)
                {
                    $redirectch = curl_init();                  
                    curl_setopt($redirectch, CURLOPT_URL, $originalurl);
                    curl_setopt($redirectch, CURLOPT_HEADER, 1);
                    curl_setopt($redirectch, CURLOPT_RETURNTRANSFER, 1);
                    curl_setopt($redirectch, CURLOPT_NOBODY, false);
                    curl_setopt($redirectch, CURLOPT_FOLLOWLOCATION, true);
                    $redirectoutput = curl_exec($redirectch);
                    $doc = new DOMDocument();
                    @$doc->loadHTML($redirectoutput);
                    $nodes = $doc->getElementsByTagName('title');
                    $title = $nodes->item(0)->nodeValue;
                    echoappend ($file, $row['name'] . ": <b>redirect ... </b>" . $title . " ... ");
                    if (strpos(strtolower($title),strtolower($row['name']))===false) {
                        echoappend ($file, "FAIL<br />");
                    }
                    else {
                        $header = curl_getinfo($redirectch);
                        echoappend ($file, $header['url']);
                        echoappend ($file, "SUCCESS<br />");
                    }
                    curl_close($redirectch);
                }
                else
                {
                    echoappend ($file, $row['name'] . ": <b>FAIL code</b>" . $urlinfo['http_code'] . "<br />");
                }
                curl_close($ch);
                $totallinks++;
            }
            echoappend ($file, '<br />');
            echoappend ($file, $site . ": " . $workinglinks . "/" . $totallinks . " links working. <br /><br />");
        }
        $conn = null;
        echo '<br /><b>connection closed</b><br /><br />';
    } catch(PDOException $e) {
            echo 'ERROR: ' . $e->getMessage();
    }