0

我正在向 50 个站点发送 Curl 请求,以使用 Cron 作业检查它们的运行状态。然而,我每次都设法得到前 10-15 个站点的响应。我认为在该服务器可能会超载并停止执行之后。

示例代码:

      foreach ($sites as $site) {
       if(Visit("http://www.domain.com")) {
           //saving in database
        }
       }

function Visit($url)

 {

$agent = "Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)";$ch=curl_init();

 curl_setopt ($ch, CURLOPT_URL,$url );

 curl_setopt($ch, CURLOPT_USERAGENT, $agent);

 curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1);

 curl_setopt ($ch,CURLOPT_VERBOSE,false);

 curl_setopt($ch, CURLOPT_TIMEOUT, 5);

 $page=curl_exec($ch);


  $httpcode = curl_getinfo($ch, CURLINFO_HTTP_CODE);

  curl_close($ch);

  if($httpcode>=200 && $httpcode<300) return true;

    else return false;

  }

请向我建议一些技术来防止服务器过载并设法获得所有站点的响应。

4

1 回答 1

7

您可以尝试使用以下内容curl_multi_exec,只需15.519232988358检查 100 个不同的域

$url = "google.com
facebook.com
youtube.com
yahoo.com
baidu.com
wikipedia.org
live.com
twitter.com
qq.com
amazon.com
taobao.com
linkedin.com
blogspot.com
google.co.in
yahoo.co.jp
sina.com.cn
google.de
yandex.ru
msn.com
wordpress.com
google.co.jp
google.com.hk
bing.com
ebay.com
google.co.uk
google.fr
vk.com
microsoft.com
babylon.com
weibo.com
googleusercontent.com
163.com
tumblr.com
apple.com
mail.ru
pinterest.com
soso.com
google.com.br
tmall.com
google.es
paypal.com
google.ru
google.it
xhamster.com
craigslist.org
sohu.com
blogger.com
fc2.com
imdb.com
go.com
ebay.de
google.co.id
zedo.com
alibaba.com
mywebsearch.com
google.com.tr
adf.ly
stackoverflow.com
blogspot.in
redtube.com
amazon.co.uk
360buy.com
google.com.au
alipay.com
sogou.com
about.com
instagram.com
ebay.co.uk
nytimes.com
livedoor.com
google.pl
netflix.com
imgur.com
uol.com.br
dailymotion.com
wordpress.org
360.cn
cnet.com
godaddy.com
youporn.com
bp.blogspot.com
ameblo.jp
mediafire.com
adcash.com
globo.com
chinaz.com
weather.com
incredibar.com
neobux.com
xnxx.com
google.nl
ehow.com
douban.com
google.com.sa
4shared.com
vimeo.com
livejournal.com
dropbox.com
renren.com
doubleclick.com";

echo "<pre>";
$url = explode("\n", $url);
set_time_limit(0);

$start = microtime(true);
multiplePost($url);
echo PHP_EOL, microtime(true) - $start;

输出

Status 301  for : http://www.google.com/ Error (x)
Status 302  for : https://facebook.com/ Error (x)
Status ok for : http://www.youtube.com/ Done (^)
Status ok for : http://www.yahoo.com/ Done (^)
Status ok for : http://baidu.com Done (^)
Status ok for : http://www.wikipedia.org/ Done (^)
Status 301  for : https://home.live.com/ Error (x)
Status ok for : http://twitter.com Done (^)
Status ok for : http://www.qq.com/ Done (^)
Status ok for : http://www.amazon.com/ Done (^)
Status ok for : http://www.taobao.com/ Done (^)
Status ok for : http://www.linkedin.com/ Done (^)
Status 302  for : https://accounts.google.com/ServiceLogin?service=blogger&passive=1209600&continue=http://www.blogger.com/home&followup=http://www.blogger.com/home<mpl=start Error (x)
Status ok for : http://www.google.co.in/ Done (^)
Status ok for : http://www.yahoo.co.jp/ Done (^)
Status 0  for : http://sina.com.cn Error (x)
Status ok for : http://www.google.de/ Done (^)
Status 301  for : http://www.yandex.ru/ Error (x)
Status ok for : http://www.msn.com/ Done (^)
Status ok for : http://wordpress.com Done (^)
Status ok for : http://www.google.co.jp/ Done (^)
Status ok for : http://www.google.com.hk/ Done (^)
Status ok for : http://www.bing.com/ Done (^)
Status 301  for : http://www.ebay.com Error (x)
Status ok for : http://www.google.co.uk/ Done (^)
Status ok for : http://www.google.fr/ Done (^)
Status ok for : http://vk.com Done (^)
Status ok for : http://www.microsoft.com/en-ng/default.aspx Done (^)
Status ok for : http://www.babylon.com/ Done (^)
Status ok for : http://weibo.com Done (^)
Status 0  for : http://googleusercontent.com Error (x)
Status 0  for : http://163.com Error (x)
Status 302  for : https://www.tumblr.com/ Error (x)
Status ok for : http://www.apple.com/ Done (^)
Status ok for : http://mail.ru Done (^)
Status ok for : http://pinterest.com Done (^)
Status 0  for : http://soso.com Error (x)
Status ok for : http://www.google.com.br/ Done (^)
Status ok for : http://www.tmall.com/ Done (^)
Status ok for : http://www.google.es/ Done (^)
Status 302  for : https://paypal.com/ Error (x)
Status ok for : http://www.google.ru/ Done (^)
Status ok for : http://www.google.it/ Done (^)
Status ok for : http://xhamster.com Done (^)
Status ok for : http://www.craigslist.org/about/sites/ Done (^)
Status 302  for : http://www.sohu.com/ Error (x)
Status 302  for : https://accounts.google.com/ServiceLogin?service=blogger&passive=1209600&continue=http://www.blogger.com/home&followup=http://www.blogger.com/home<mpl=start Error (x)
Status ok for : http://fc2.com Done (^)
Status ok for : http://www.imdb.com/ Done (^)
Status ok for : http://go.com Done (^)
Status 301  for : http://www.ebay.de Error (x)
Status ok for : http://www.google.co.id/ Done (^)
Status ok for : http://www.zedo.com/ Done (^)
Status ok for : http://www.alibaba.com/ Done (^)
Status ok for : http://home.mywebsearch.com/ Done (^)
Status ok for : http://www.google.com.tr/ Done (^)
Status ok for : http://adf.ly Done (^)
Status ok for : http://stackoverflow.com Done (^)
Status 302  for : http://www.google.com/ Error (x)
Status ok for : http://www.redtube.com/ Done (^)
Status ok for : http://www.amazon.co.uk/ Done (^)
Status ok for : http://360buy.com Done (^)
Status ok for : http://www.google.com.au/ Done (^)
Status 301  for : https://www.alipay.com/?src=alipay.com Error (x)
Status ok for : http://www.sogou.com/ Done (^)
Status ok for : http://www.about.com/ Done (^)
Status ok for : http://instagram.com Done (^)
Status 301  for : http://www.ebay.co.uk Error (x)
Status ok for : http://www.nytimes.com/ Done (^)
Status ok for : http://www.livedoor.com/ Done (^)
Status ok for : http://www.google.pl/ Done (^)
Status 301  for : http://www.netflix.com/ Error (x)
Status ok for : http://imgur.com Done (^)
Status ok for : http://www.uol.com.br/ Done (^)
Status 301  for : http://www.dailymotion.com/ Error (x)
Status ok for : http://wordpress.org Done (^)
Status ok for : http://360.cn Done (^)
Status ok for : http://www.cnet.com/ Done (^)
Status ok for : http://www.godaddy.com/ Done (^)
Status ok for : http://www.youporn.com/ Done (^)
Status 0  for : http://bp.blogspot.com Error (x)
Status ok for : http://ameblo.jp Done (^)
Status ok for : http://www.mediafire.com/ Done (^)
Status 301  for : https://www.adcash.com/index.php Error (x)
Status 301  for : http://www.globo.com/ Error (x)
Status ok for : http://chinaz.com Done (^)
Status ok for : http://www.weather.com/ Done (^)
Status ok for : http://incredibar.com/essentials/homepage Done (^)
Status ok for : http://www.neobux.com/ Done (^)
Status 301  for : http://www.xnxx.com/ Error (x)
Status ok for : http://www.google.nl/ Done (^)
Status ok for : http://www.ehow.com/ Done (^)
Status 0  for : http://douban.com Error (x)
Status ok for : http://www.google.com.sa/ Done (^)
Status 301  for : http://www.4shared.com Error (x)
Status ok for : http://vimeo.com Done (^)
Status ok for : http://www.livejournal.com/ Done (^)
Status 302  for : https://www.dropbox.com/ Error (x)
Status ok for : http://renren.com Done (^)
Status ok for : http://www.google.com/doubleclick/ Done (^)

15.519232988358 <--------------- Total Time Taken

使用的功能

function multiplePost($nodes) {
    $mh = curl_multi_init();
    $curl_array = array();
    foreach ( $nodes as $i => $domain ) {
        $domain = trim($domain);
        $curl_array[$i] = curl_init("http://$domain");
        curl_setopt($curl_array[$i], CURLOPT_RETURNTRANSFER, true);
        curl_setopt($curl_array[$i], CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.2) Gecko/20090729 Firefox/3.5.2 (.NET CLR 3.5.30729)');
        curl_setopt($curl_array[$i], CURLOPT_CONNECTTIMEOUT, 5);
        curl_setopt($curl_array[$i], CURLOPT_TIMEOUT, 15);
        curl_setopt($curl_array[$i], CURLOPT_FOLLOWLOCATION, true);
        curl_multi_add_handle($mh, $curl_array[$i]);
    }

    echo PHP_EOL, PHP_EOL;

    $running = NULL;
    do {
        usleep(10000);
        curl_multi_exec($mh, $running);
    } while ( $running > 0 );
    $res = array();
    foreach ( $nodes as $i => $url ) {

        $curlErrorCode = curl_errno($curl_array[$i]);

        if ($curlErrorCode === 0) {
            $info = curl_getinfo($curl_array[$i]);
            $info['url'] = trim($info['url']);
            if ($info['http_code'] == 200) {
                echo "Status ok for : {$info['url']} Done (^)", PHP_EOL;
                $content = curl_multi_getcontent($curl_array[$i]); // get content
            } else {
                echo "Status {$info['http_code'] }  for : {$info['url']} Error (x)", PHP_EOL;
            }
        }
        curl_multi_remove_handle($mh, $curl_array[$i]);
        curl_close($curl_array[$i]);
        flush();
        ob_flush();
    }
    curl_multi_close($mh);
}
于 2012-11-19T20:28:36.633 回答