如果您使用框架,那么一些网站可以跳出它们。如果您使用 CURL,则需要解析所有 url(链接、图像、脚本、css)并将它们更改为您自己的,如果您想将用户保留在您的站点中。所以 CURL 看起来更可靠,但它需要你做很多工作,并且它为你的站点产生了更多的带宽。如果您想要基于 CURL 的解决方案,您可以尝试在网上查找 Web 代理示例。
这是一个基本的工作代码,可以帮助您入门:
$url = isset($_GET['url']) ? $_GET['url'] : 'http://amazon.co.uk/';
$html = file_get_contents2($url);
$doc = new DOMDocument();
@$doc->loadHTML($html);
$xml = simplexml_import_dom($doc);
$host = 'http://' .parse_url($url, PHP_URL_HOST);
$proxy = 'http://' . $_SERVER['SERVER_NAME'] . $_SERVER['SCRIPT_NAME'] . '?url=';
$items['a'] = 'href';
$items['img'] = 'src';
$items['link'] = 'href';
$items['script'] = 'src';
foreach ($items AS $tag=>$attr)
{
$elems = $xml->xpath('//' . $tag);
foreach ($elems AS &$e)
{
if (substr($e[$attr], 0, 1) == '/')
{
$e[$attr] = $host . $e[$attr];
}
if ($tag == 'a')
{
$e[$attr] = $proxy . urlencode($e[$attr]);
}
}
}
$xmls = $xml->asXml();
$doc->loadXML($xmls);
$html = $doc->saveHTML();
echo $html;
function file_get_contents2($address)
{
$useragent = "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.1) Gecko/20061204 Firefox/2.0.0.1";
$c = curl_init();
curl_setopt($c, CURLOPT_URL, $address);
curl_setopt($c, CURLOPT_USERAGENT, $useragent);
curl_setopt($c, CURLOPT_HEADER, 0);
curl_setopt($c, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($c, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($c, CURLOPT_FRESH_CONNECT, 1);
if (!$data = curl_exec($c))
{
return false;
}
return $data;
}