我有一个代理列表,我正在尝试检查其中哪些是活动的。到目前为止,我所做的是通过 curl 连接到每个人,看看他们是否响应,但我正在寻找更快且类似于http://www.ip-adress.com/Proxy_Checker/所做的事情。我正在考虑检查端口是否打开或类似的东西。我目前使用的代码如下
<?php
error_reporting(E_ERROR);
//ini_set('memory_limit', '256M');
function hitFormGet($loginURL, $loginFields, $referer, $cookieString, $code)
{
$ch = curl_init();
curl_setopt($ch, CURLOPT_COOKIEJAR, "cookies.txt");
curl_setopt($ch, CURLOPT_COOKIEFILE, "cookies.txt");
// curl_setopt($ch, CURLOPT_AUTOREFERER, true);
curl_setopt($ch, CURLOPT_COOKIESESSION, true);
// curl_setopt( $ch, CURLOPT_COOKIE,$cookieString);
curl_setopt($ch, CURLOPT_FAILONERROR, false);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_VERBOSE, 0);
curl_setopt($ch, CURLOPT_ENCODING, 'gzip,deflate,sdch');
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
curl_setopt($ch, CURLOPT_FRESH_CONNECT, true);
curl_setopt($ch, CURLOPT_HEADER, false);
// curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($ch, CURLOPT_TIMEOUT, 35);
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.78 Safari/535.11");
curl_setopt($ch, CURLOPT_URL, $loginURL . $loginFields);
curl_setopt($ch, CURLOPT_REFERER, $referer);
// curl_setopt($ch, CURLOPT_POSTFIELDS, $loginFields);
$ret = curl_exec($ch);
$info = curl_getinfo($ch);
$code = $info['http_code'];
curl_close($ch);
return $ret;
}
function hitFormGetProxyINI($loginURL, $loginFields, $referer, $cookieString, &$code, $proxy, $js)
{
echo $proxy . ">$loginURL\n";
$ch = curl_init();
$initials = file_get_contents("bot.ini");
$initials = explode(";", $initials);
$encod = "gzip,deflate,sdch";
$agent = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.78 Safari/535.11";
curl_setopt($ch, CURLOPT_COOKIEJAR, "cookies.txt");
curl_setopt($ch, CURLOPT_COOKIEFILE, "cookies.txt");
curl_setopt($ch, CURLOPT_AUTOREFERER, true);
curl_setopt($ch, CURLOPT_COOKIESESSION, true);
curl_setopt($ch, CURLOPT_COOKIE, $cookieString);
curl_setopt($ch, CURLOPT_FAILONERROR, false);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_VERBOSE, 0);
curl_setopt($ch, CURLOPT_PROXY, $proxy);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 5);
curl_setopt($ch, CURLOPT_TIMEOUT, 20);
curl_setopt($ch, CURLOPT_MAXREDIRS, 10);
curl_setopt($ch, CURLOPT_ENCODING, $encod);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
curl_setopt($ch, CURLOPT_FRESH_CONNECT, true);
curl_setopt($ch, CURLOPT_HEADER, false);
if (!$js)
curl_setopt($ch, CURLOPT_HTTPHEADER, array(
'Accept-Language: en-US,en;q=0.8',
'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.3'
));
else
curl_setopt($ch, CURLOPT_HTTPHEADER, array(
'Accept-Language: en-US,en;q=0.8',
'Accept: */*',
'Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.3'
));
// curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($ch, CURLOPT_USERAGENT, $agent);
curl_setopt($ch, CURLOPT_URL, $loginURL . $loginFields);
if (strlen($referer) > 4)
curl_setopt($ch, CURLOPT_REFERER, $referer);
// curl_setopt($ch, CURLOPT_POSTFIELDS, $loginFields);
$ret = curl_exec($ch);
$info = curl_getinfo($ch);
$code = $info;
curl_close($ch);
return $ret;
}
function hitFormPost($loginURL, $loginFields, $referer, $cookieString)
{
$ch = curl_init();
curl_setopt($ch, CURLOPT_COOKIEJAR, "cookies.txt");
curl_setopt($ch, CURLOPT_COOKIEFILE, "cookies.txt");
// curl_setopt($ch, CURLOPT_AUTOREFERER, true);
curl_setopt($ch, CURLOPT_COOKIESESSION, true);
// curl_setopt( $ch, CURLOPT_COOKIE,$cookieString);
curl_setopt($ch, CURLOPT_FAILONERROR, false);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_VERBOSE, 0);
curl_setopt($ch, CURLOPT_ENCODING, 'gzip,deflate,sdch');
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
curl_setopt($ch, CURLOPT_FRESH_CONNECT, true);
curl_setopt($ch, CURLOPT_HEADER, false);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.78 Safari/535.11");
curl_setopt($ch, CURLOPT_URL, $loginURL);
curl_setopt($ch, CURLOPT_REFERER, $referer);
curl_setopt($ch, CURLOPT_POSTFIELDS, $loginFields);
$ret = curl_exec($ch);
curl_close($ch);
return $ret;
}
for ($in = 0; $in < 100; $in++) {
file_put_contents("mysql_dumpB$in.txt", '');
}
sleep(180);
$link = mysql_connect("localhost:3306", "userkdo_botuser1", "dvd6000") or die(mysql_error());
mysql_select_db("userkdo_botdb1", $link) or die(mysql_error());
$tier1_countries = array(
"United States",
"Canada",
"Japan",
"United Kingdom",
"Germany",
"France",
"Belgium",
"Netherlands",
"Sweden",
"Norway",
"Denmark",
"Ireland",
"Switzerland",
"Spain",
"Luxembourg",
"Liechtenstein",
"Monaco",
"Italy",
"Finland",
"Austria",
"Australia"
);
$tier2_countries = array(
"Cyprus",
"Greece",
"Hong Kong",
"Israel",
"Republic of Korea",
"New Zealand",
"Poland",
"Portugal"
);
$result = mysql_query("SELECT *
FROM `proxies` WHERE `type`='anon'
ORDER BY `proxies`.`last_checked` ASC
LIMIT 0 , 100");
$nexturl = "http://DOMAIN.net/bot/headers.php";
$pids = array();
$i = 0;
while ($row = mysql_fetch_assoc($result)) {
$pids[$i] = pcntl_fork();
if (!$pids[$i]) {
// child process
$proxy = $row['proxy'];
$res = hitFormGetProxyINI($nexturl, "", "", "", $cd, $proxy, false);
//print_r($cd);
echo $res . "\n";
//$res=trim(strstr($res,"<",true));
//file_put_contents("mysql_dumpB$i.txt","UPDATE `proxies` SET `last_checked` = '".time()."',`response_time`='".$cd['total_time']."' WHERE `proxies`.`proxy` = '".$proxy."'|");
//echo "UPDATE `proxies` SET `last_checked` = '".time()."',`response_time`='".$cd['total_time']."' WHERE `proxies`.`proxy` = '".$proxy."'"."\n";
if (strlen($res) > 1 && $res == "FALSE") { //$res1=hitFormGet("http://api.easyjquery.com/ips/?ip=".urlencode(strstr($proxy,":",true))."&full=true","","","","");
$LocationData = json_decode($res1);
//$country= $LocationData->CountryName;
//if(in_array($country,$tier1_countries)) $ctier=1; else if(in_array($country,$tier2_countries)) $ctier=2; else $ctier=3;
file_put_contents("mysql_dumpB$i.txt", "UPDATE `proxies` SET `status` = 'active',`last_checked` = '" . time() . "',`last_active` = '" . time() . "',`response_time`='" . $cd['total_time'] . "' WHERE `proxies`.`proxy` = '" . $proxy . "'|", FILE_APPEND);
echo "UPDATE `proxies` SET `status` = 'active',`last_active` = '" . time() . "',`response_time`='" . $cd['total_time'] . "' WHERE `proxies`.`proxy` = '" . $proxy . "'" . "\n";
echo "\n>>$country<<\n";
} else {
$res1 = hitFormGet("http://api.easyjquery.com/ips/?ip=" . urlencode(strstr($proxy, ":", true)) . "&full=true", "", "", "", "");
$LocationData = json_decode($res1);
//$country= $LocationData->CountryName;
//if(in_array($country,$tier1_countries)) $ctier=1; else if(in_array($country,$tier2_countries)) $ctier=2; else $ctier=3;
file_put_contents("mysql_dumpB$i.txt", "UPDATE `proxies` SET `status` = 'inactive',`last_checked` = '" . time() . "',`response_time`='" . $cd['total_time'] . "' WHERE `proxies`.`proxy` = '" . $proxy . "'|", FILE_APPEND);
echo "UPDATE `proxies` SET `status` = 'inactive',`response_time`='" . $cd['total_time'] . "' WHERE `proxies`.`proxy` = '" . $proxy . "'" . "\n";
echo "\n>>$country<<\n";
}
if (strlen($res) > 1 && $res != "FALSE") {
file_put_contents("mysql_dumpB$i.txt", "UPDATE `proxies` SET `status` = '404',`last_checked` = '" . time() . "',`response_time`='" . $cd['total_time'] . "' WHERE `proxies`.`proxy` = '" . $proxy . "'|", FILE_APPEND);
}
exit();
}
$i++;
}
foreach ($pids as $pid) {
pcntl_waitpid($pid, $status, WUNTRACED);
}
?>
该脚本从数据库中检索 100 个代理,然后通过将它们发送到目的地并检查响应来检查每个代理,我只需要查看它们当前是否处于活动状态,这样多余的东西就会被删除。检查结果作为查询写入文件,然后第二个脚本运行它们并更改数据库。