再会,
我正在使用 cURL 和各种解析技术从各种网站检索信息。我编写了代码,因此如果需要,我可以添加其他我从中扫描信息的网站。
检索到的信息如下:(请注意,信息可能不准确,可能不反映真实价格/名称)
Array
(
[website1.com] => Array
(
[0] => Array
(
[0] => 60" BRAVIA LX900 Series 3D HDTV
[1] => website1.com
[2] => 5299.99
)
[1] => Array
(
[0] => 52" BRAVIA LX900 Series 3D HDTV
[1] => website1.com
[2] => 4499.99
)
[2] => Array
(
[0] => 46" BRAVIA LX900 Series 3D HDTV
[1] => website1.com
[2] => 3699.99
)
[3] => Array
(
[0] => 40" BRAVIA LX900 Series 3D HDTV
[1] => website1.com
[2] => 2999.99
)
)
[website2.com] => Array
(
[0] => Array
(
[0] => Sony 3D 60" LX900 HDTV BRAVIA
[1] => website2.com
[2] => 5400.99
)
[1] => Array
(
[0] => Sony 3D 52" LX900 HDTV BRAVIA
[1] => website2.com
[2] => 4699.99
)
[2] => Array
(
[0] => Sony 3D 46" LX900 HDTV BRAVIA
[1] => website2.com
[2] => 3899.99
)
)
)
所需的输出必须是:
Array
(
[0] => Array
(
[Name] => 60" BRAVIA LX900 Series 3D HDTV
[website1.com] => 5299.99
[website2.com] => 5400.99
)
[1] => Array
(
[Name] => 52" BRAVIA LX900 Series 3D HDTV
[website1.com] => 4499.99
[website2.com] => 4699.99
)
[2] => Array
(
[Name] => 46" BRAVIA LX900 Series 3D HDTV
[website1.com] => 3699.99
[website2.com] => 3899.99
)
[3] => Array
(
[Name] => 40" BRAVIA LX900 Series 3D HDTV
[website1.com] => 2999.99
)
)
请注意,名称可能会有所不同,因此需要使用similar_text。此外,某些信息可能不会显示在所有网站上。我知道只能选择一个电视名称,然后我将使用最相关来源(website1.com)中的一个
这是我正在尝试使用的代码。
<?php
$_Retreived = array(
"website1.com" => array(
array('60" BRAVIA LX900 Series 3D HDTV', 'website1.com', 5299.99),
array('52" BRAVIA LX900 Series 3D HDTV', 'website1.com', 4499.99),
array('46" BRAVIA LX900 Series 3D HDTV', 'website1.com', 3699.99),
array('40" BRAVIA LX900 Series 3D HDTV', 'website1.com', 2999.99)
),
"website2.com" => array(
array('Sony 3D 60" LX900 HDTV BRAVIA', 'website2.com', 5400.99),
array('Sony 3D 52" LX900 HDTV BRAVIA', 'website2.com', 4699.99),
array('Sony 3D 46" LX900 HDTV BRAVIA', 'website2.com', 3899.99),
)
);
$_Prices = array();
$_PricesTemp = array();
$_Sites = array("website1.com", "website2.com");
for($i = 0; $i < sizeOf($_Sites); $i++)
{
$_PricesTemp = array_merge($_PricesTemp, $_Retreived[ $_Sites[$i] ]);
}
/*
print_r($_PricesTemp);
Array
(
[0] => Array
(
[0] => 60" BRAVIA LX900 Series 3D HDTV
[1] => website1.com
[2] => 5299.99
)
[1] => Array
(
[0] => 52" BRAVIA LX900 Series 3D HDTV
[1] => website1.com
[2] => 4499.99
)
[2] => Array
(
[0] => 46" BRAVIA LX900 Series 3D HDTV
[1] => website1.com
[2] => 3699.99
)
[3] => Array
(
[0] => 40" BRAVIA LX900 Series 3D HDTV
[1] => website1.com
[2] => 2999.99
)
[4] => Array
(
[0] => Sony 3D 60" LX900 HDTV BRAVIA
[1] => website2.com
[2] => 5400.99
)
[5] => Array
(
[0] => Sony 3D 52" LX900 HDTV BRAVIA
[1] => website2.com
[2] => 4699.99
)
[6] => Array
(
[0] => Sony 3D 46" LX900 HDTV BRAVIA
[1] => website2.com
[2] => 3899.99
)
)
*/
foreach($_PricesTemp As $_KeyOne => $_EntryOne)
{
foreach(array_reverse($_PricesTemp, true) As $_KeyTwo => $_EntryTwo)
{
if ($_KeyOne != $_KeyTwo)
{
$_Percent = 0;
similar_text(strtoupper($_EntryOne[0]), strtoupper($_EntryTwo[0]), $_Percent);
if ($_Percent >= 90) //If names matches 90%+
{
echo "Similar : <b>" . $_KeyOne . "</b> " . $_EntryOne[0] . " and <b>" . $_KeyTwo . "</b> " . $_EntryTwo[0] . " Percent : " . $_Percent . "<br />";
$_Prices[] = array();
$_Prices[ sizeOf($_Prices)-1 ]['Name'] = $_EntryOne[0]; //Use the product name of the most revelant website (website1.com)
foreach($_Sites As $_Site)
{
if (isset($_EntryOne[ 1 ]) && $_EntryOne[ 1 ] == $_Site) //Check if it contains price from website1.com
{
$_Prices[ sizeOf($_Prices)-1 ][ $_Site ] = $_EntryOne[ 2 ];
}
if (isset($_EntryTwo[ 1 ]) && $_EntryTwo[ 1 ] == $_Site) //Check if it contains price from website2.com
{
$_Prices[ sizeOf($_Prices)-1 ][ $_Site ] = $_EntryTwo[ 2 ];
}
}
}
}
}
}
/*
print_r($_Prices);
Array
(
[0] => Array
(
[Name] => 60" BRAVIA LX900 Series 3D HDTV
[website1.com] => 2999.99
)
[1] => Array
(
[Name] => 60" BRAVIA LX900 Series 3D HDTV
[website1.com] => 3699.99
)
[2] => Array
(
[Name] => 60" BRAVIA LX900 Series 3D HDTV
[website1.com] => 4499.99
)
[3] => Array
(
[Name] => 52" BRAVIA LX900 Series 3D HDTV
[website1.com] => 2999.99
)
[4] => Array
(
[Name] => 52" BRAVIA LX900 Series 3D HDTV
[website1.com] => 3699.99
)
[5] => Array
(
[Name] => 52" BRAVIA LX900 Series 3D HDTV
[website1.com] => 5299.99
)
[6] => Array
(
[Name] => 46" BRAVIA LX900 Series 3D HDTV
[website1.com] => 2999.99
)
[7] => Array
(
[Name] => 46" BRAVIA LX900 Series 3D HDTV
[website1.com] => 4499.99
)
[8] => Array
(
[Name] => 46" BRAVIA LX900 Series 3D HDTV
[website1.com] => 5299.99
)
[9] => Array
(
[Name] => 40" BRAVIA LX900 Series 3D HDTV
[website1.com] => 3699.99
)
[10] => Array
(
[Name] => 40" BRAVIA LX900 Series 3D HDTV
[website1.com] => 4499.99
)
[11] => Array
(
[Name] => 40" BRAVIA LX900 Series 3D HDTV
[website1.com] => 5299.99
)
[12] => Array
(
[Name] => Sony 3D 60" LX900 HDTV BRAVIA
[website2.com] => 3899.99
)
[13] => Array
(
[Name] => Sony 3D 60" LX900 HDTV BRAVIA
[website2.com] => 4699.99
)
[14] => Array
(
[Name] => Sony 3D 52" LX900 HDTV BRAVIA
[website2.com] => 3899.99
)
[15] => Array
(
[Name] => Sony 3D 52" LX900 HDTV BRAVIA
[website2.com] => 5400.99
)
[16] => Array
(
[Name] => Sony 3D 46" LX900 HDTV BRAVIA
[website2.com] => 4699.99
)
[17] => Array
(
[Name] => Sony 3D 46" LX900 HDTV BRAVIA
[website2.com] => 5400.99
)
)
*/
?>
首先,上面的代码不起作用。一定有一个我无法指出的逻辑错误。此外,如果我将第三个网站添加到列表中,我不相信该代码将起作用。
有什么想法吗?从今天早上开始我就一直在做这件事。
编辑 2011-02-16:
我为这个问题添加了赏金。