因为服务器成本是最大的支出,我们希望从每个人身上得到更多。
我们怎样才能实现,更多的脚本可以在这台服务器上运行?
Scrips 在做什么:
我们在一台服务器上运行 80 个 PHP 脚本,并通过 Gearman 为它们提供作业。
脚本使用 查找网站cURL
,使用 提取所需信息Zend_Dom_Query
并将数据存储在数据库中。每个脚本都得到 ca。他们必须查找的 1000 个 URL。脚本示例在下面。
服务器由什么组成:
lshw
s 输出:
description: Computer
width: 64 bits
capabilities: vsyscall64 vsyscall32
*-core
description: Motherboard
physical id: 0
*-memory
description: System memory
physical id: 0
size: 8191GiB
*-cpu
product: Intel(R) Xeon(R) CPU E31230 @ 3.20GHz
vendor: Intel Corp.
physical id: 1
bus info: cpu@0
width: 64 bits
capabilities: fpu fpu_exception wp vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx rdtscp x86-64 constant_tsc arch_perfmon pebs bts rep_good xtopology nonstop_tsc aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16 xtpr pdcm pcid sse4_1 sse4_2 x2apic popcnt tsc_deadline_timer xsave avx lahf_lm ida arat epb xsaveopt pln pts tpr_shadow vnmi flexpriority ept vpid
尽管如此,这是一个 V-Server,它是在该服务器上运行的唯一 V-Server。它也没有 8191GB 内存,更像 16GB。
为了向您展示服务器的疲惫程度,以下是top
输出:
top - 14:45:04 up 8 days, 3:10, 1 user, load average: 72.96, 72.51, 71.82
Tasks: 100 total, 72 running, 28 sleeping, 0 stopped, 0 zombie
Cpu(s): 87.5%us, 12.2%sy, 0.0%ni, 0.0%id, 0.0%wa, 0.0%hi, 0.0%si, 0.3%st
Mem: 8589934588k total, 4349016k used, 8585585572k free, 0k buffers
Swap: 0k total, 0k used, 0k free, 282516k cached
不要忘记这里的脚本主要结构:
// Get the Infos on which to crawl on
$asin = explode(',', $job->workload());
try {
$userproducts = new App_Dbservices_U...();
$konkurrenz = new App_Dbservices_K...();
$repricingstream = new App_Dbservices_R...();
$err = 0;
for ($i = 0; $i < count($asin) - 3; $i = $i + 50) {
$mh = curl_multi_init();
$handles = array();
for ($j = $i; $j < $i + 50; $j++) {
if ((count($asin) - 3) > $j) {
if (isset($asin[$j])) {
// create a new single curl handle
$ch = curl_init();
// setting several options like url, timeout, returntransfer
// simulate multithreading by calling the wait.php scipt and sleeping for $rand seconds
$url = // URL
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_TIMEOUT, 80);
// add this handle to the multi handle
$erroro[$j] = curl_errno($ch);
$errmsg[$j] = curl_error($ch);
curl_multi_add_handle($mh, $ch);
// put the handles in an array to loop this later on
$handles[] = $ch;
}
}
}
}
// execute the multi handle
$running = null;
do {
curl_multi_exec($mh, $running);
} while ($running > 0);
// get the content (if there is any)
$output = '';
for ($k = 0; $k < count($handles); $k++) {
// get the content of the handle
$output[$k] = curl_multi_getcontent($handles[$k]);
$_asin[$k]['asin'] = $asin[$j - 50 + $k];
$_asin[$k]['condition'] = $condition[$j - 50 + $k];
$_asin[$k]['pId'] = $pId[$j - 50 + $k];
if ($output[$k] != '')
{
// get the dom of each page
$dom = new Zend_Dom_Query($output[$k]);
// get the sellerInfos of each page
$seller = $dom->query('div.Offer');
if (count($seller) > 0) {
// get the price out of the string
$seller_i = 0;
$selfCameOver = false;
foreach ($seller as $d2) {
if ($seller_i <= 6 OR $selfCameOver === false) {
$itemHtml = '';
foreach($d2->childNodes as $node) {
$itemHtml .= $node->ownerDocument->saveHTML($node);
}
$dom = new Zend_Dom_Query($itemHtml);
$itemPrice = $dom->query('span.Price');
foreach($itemPrice as $ItemPrice)
{
$_asin[$k]['price_end'][$seller_i] = 0.00;
$_asin[$k]['shipping_end'][$seller_i] = 0.00;
if (preg_match('/[0-9]++(?>[,.][0-9]+)?+/', $ItemPrice->textContent, $rueckgabe)) {
$priceEnd = str_replace(',', '', str_replace('.', '', $rueckgabe[0][0]));
$priceLength = strlen($priceEnd);
$priceEnd = substr($priceEnd, 0, ($priceLength - 2)) . '.' . substr($priceEnd, ($priceLength - 2), 2);
$_asin[$k]['price_end'][$seller_i] = (float)$priceEnd;
}
}
}
$shippingPrice = $dom->query('span.ShippingPrice');
foreach($shippingPrice as $ShippingPrice)
{
preg_match_all('/[0-9]{1,}([\,\. ]?[0-9])*/', $ShippingPrice->textContent, $rueckgabe);
if (isset($rueckgabe[0][0])) {
// ...
}
}
$_asin[$k]['price_total_end'][$seller_i] = $_asin[$k]['price_end'][$seller_i] + $_asin[$k]['shipping_end'][$seller_i];
$conditionTag = $dom->query('.Condition');
foreach($conditionTag as $ConditionTag)
{
$_asin[$k]['main_con'][$seller_i]= 0;
$_asin[$k]['sub_con'][$seller_i] = 0;
$conditionValue = explode(' - ', $ConditionTag->textContent);
if(isset($conditionValue[0])){
// ...
}
if(isset($conditionValue[1])) {
// ...
}
}
$ratingItem = $dom->query('.Rating');
$_asin[$k]['bewertung_end'][$seller_i] = -1;
$_asin[$k]['stars_end'][$seller_i] = -1;
foreach($ratingItem as $RatingItem)
{
echo $RatingItem->textContent; // 99% positiv ... 12 Monaten ... 11.719 Bewertungen ...
// I want to get 99 (which is stars ) and 11719 (which is bewertungen )
preg_match_all('/[0-9]{1,}([\,\. ]?[0-9])*/', preg_replace('/,/', '.', $RatingItem->textContent), $rueckgabe);
if (isset($rueckgabe[0]) AND count($rueckgabe[0]) > 0) {
$_asin[$k]['bewertung_end'][$seller_i] = (int)str_replace('.', '', $rueckgabe[0][count($rueckgabe[0]) - 1]);
$_asin[$k]['stars_end'][$seller_i] = $rueckgabe[0][0];
}
}
$sellerType = $dom->query('.Name img');
$_asin[$k]['merchant_end'][$seller_i] = "N/A";
$_asin[$k]['name_end'][$seller_i] = "N/A";
$_asin[$k]['img_end'][$seller_i] = "N/A";
$_asin[$k]['konk_type'][$seller_i] = 'ERROR';
if(count($sellerType) == 1)
{
foreach($sellerType as $SellerType)
{
$imgAltText = $SellerType->getAttribute('alt');
$a = explode('.', $imgAltText);
// ...
}
}
elseif(count($sellerType) == 0)
{
$_asin[$k]['img_end'][$seller_i] = 'NO_IMG';
$_asin[$k]['konk_type'][$seller_i] = 'WO_IMG';
$sellerName = $dom->query('.Name b');
foreach($sellerName as $SellerName)
{
$_asin[$k]['name_end'][$seller_i] = $SellerName->textContent;
}
$sellerMerchant = $dom->query('.Name a');
foreach($sellerMerchant as $SellerMerchant)
{
$_asin[$k]['merchant_end'][$seller_i] = str_replace('=', '', substr($SellerMerchant->getAttribute('href'), -14));
}
}
unset($rueckgabe);
}
$seller_i++;
}
}
}
// remove the handle from the multi handle
curl_multi_remove_handle($mh, $handles[$k]);
}
// Update Price ...
// Update Shipping ...
// Update Conc ...
unset($_asin);
// close the multi curl handle to free system resources
curl_multi_close($mh);
}
} catch (Exception $e) {
$error = new Repricing_Dbservices_Error();
$error->setError($id, $e->getMessage(), $e->getLine(), $e->getFile());
}
还有价格更新的脚本(其他更新语句看起来很相似)
$this->db->beginTransaction();
try {
for ($i = 0; $i < count($asin); $i++) {
if (isset($asin[$i]['price_total_end'])) {
if (count($asin[$i]['price_total_end']) > 1) {
if ($asin[$i]['price_total_end'][0] > 0) {
$this->db->query("UPDATE u... SET lowest_price = ? , last_lowest_price_update = ? WHERE id = ?", array(
$asin[$i]['price_total_end'][1],
date("Y-m-d H:i:s", time()),
$asin[$i]['pId']
));
}
} elseif (count($asin[$i]['price_total_end']) == 1) {
if ($asin[$i]['price_total_end'][0] >= 0) {
$this->db->query("UPDATE u... SET lowest_price = ? , last_lowest_price_update = ? WHERE id = ?", array(
-1,
date("Y-m-d H:i:s", time()),
$asin[$i]['pId']
));
}
}
}
}
$this->db->commit();
} catch (Exception $e) {
$this->db->rollBack();
echo $e->getMessage();
}
$this->db->closeConnection();
我们的脚本中是否存在很大的性能泄漏,我们是否应该使用其他语言或任何其他技术?每个建议都受到高度赞赏。