-1

我正在使用 curl 获取远程页面的源代码,file_get_contents但是

问题是这个页面有很多 iframe 和广告,我只想得到这个页面的一小部分

页面源代码是这样的:

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
        <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en-gb" lang="en-gb" dir="ltr" >
        <head>
          <meta http-equiv="content-type" content="text/html; charset=utf-8" />
            <meta name="robots" content="noindex,nofollow" />
            <meta name="keywords" content="" />
            <meta name="description" content="" />
            <meta name="generator" content="" />
            <meta content="Tue, 01 Jan 1980 1:00:00 GMT" http-equiv="Expires">
            <meta content="no-cache" http-equiv="Pragma">
          <title>Kravchuk - Krajinovic (ATP Challenger Karshi)</title>

        </head>
        <body class="contentpane">
            <script type="text/javascript">
        if (top.location != self.location) {
            top.location = 'http://www.streamhunter.eu'
        }
</script>
<style>body{background-color: #000000; text-align: center;}</style>
<style type="text/css">
#ad {
 display: none;
 position: absolute; 
 width: 300px;
 height: 250px;
 margin-left: 215px; /* left pix */
 margin-top: -350px; /* top pix */
}
#close_ad {
 position: absolute;
 cursor: pointer;
 margin-left: 140px; /* left pix */
 margin-top: 0px; /* left pix */
 padding: 0px;
 border: 0px;
}
#ad_code {
 position: absolute;
}
#time {
 position: absolute;
 text-align: center;
 margin-left: 0px; /* left pix */
 margin-top: -20px; /* top pix */
 width: 300px;
 color: #ffffff;
}
</style>

<IFRAME FRAMEBORDER=0 MARGINWIDTH=0 MARGINHEIGHT=0 SCROLLING=NO WIDTH=728 HEIGHT=90 SRC="http://creative.xtendmedia.com/proxy/matomymediaproxy.html?ad_type=ad&ad_size=728x90&section=2650714"></IFRAME>

<script type="text/javascript" src="http://www.youradexchange.com/script/java.php?option=rotateur&rotateur=83132"></script>

<script language="JavaScript"> var zflag_nid="1723"; var zflag_cid="18"; var zflag_sid="0"; var zflag_width="1"; var zflag_height="1"; var zflag_sz="15"; </script>

<script language="JavaScript" src="http://c1.zxxds.net/jsc/c1/fo.js"></script>

<iframe frameborder="0" marginheight="0" marginwidth="0" height="320" src="http://www.e-tennis.tv/player04.swf?v1" id="myfr" scrolling="no" width="540">Your Browser Do not Support Iframe</iframe>

<script src="http://code.jquery.com/jquery-1.5.2.min.js" type="text/javascript"></script>
<script type="text/javascript">$(document).ready(function () {
    var browserName = "";
    if (navigator.userAgent.indexOf("MSIE") != -1) {
        browserName = "Internet Explorer"
    }
    if (navigator.userAgent.indexOf("Chrome") != -1) {
        browserName = "Chrome"
    }
    if (navigator.userAgent.indexOf("Firefox") != -1) {
        browserName = "Firefox"
    }
    var mtid_a = guid();
    var mtid_b = guid();
    var mtid_c = guid();
    var mtid_d = guid();
    var mtid_e = guid();
    var strip = '<div id="' + mtid_a + '" style="display:block !important;">' + '<div id="' + mtid_b + '" style="display:none;z-index:99999;position:fixed;width:100%;background:#fbecad;overflow:hidden;border-bottom:1px solid #707070;top:0px;left:0px;margin:0px;padding:0px;color:#000;font-family:Verdana, Geneva, sans-serif;">' + '<div style="padding-top:5px;float:left;width:100%;font-size:13px;line-height:26px;height:31px;top: 12px;z-index:9999;text-align:left;display:block !important;">' + '<img alt="Missing Plug-in" style="margin-left:12px;float:left;margin-top:2px;" src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABMAAAAUCAYAAABvVQZ0AAAACXBIWXMAAAsTAAALEwEAmpwYAAAAIGNIUk0AAG2YAABzjgAA+WQAAIVlAAB0RQAA7/YAADAfAAAU56AoLTwAAAQvSURBVHjahJJrTFNnGMefZF9MTExMFpJ9WmQXKLPQcakYExHmuIVLa1vanl7OOW1PaQaoE1m31Y110SmC1lWYu4TgZC6LwClSKBbYBbKEjM1sCTrFLWPM4OI4vbCB2J7TPPtQWsum24df/ue8z/v+3vOe5wUhPAaP4tpM9+blxcuPrAsh/4MM+WHDixCO59rSCFD6CjZL9HREvP2ZleG+dwpj4TEQwn5I5MNYnxBHCF2BWNgP302dS8vLFS/tLMxDaYEEW14m3cJ6PZmpz0lZopBCaHEMKFLDKvZVo0pZM3Hte//mh80TQqMpOQrALfTD4tynGwoR7gp0uU8wNEWgveWAS1ieSi5IEFzo/9cYnH//VaUoMz0iykyPZGZsE7JET0V2SCV3ZLUVX5NGDdapaifkNXsnVfLiieLdeTczM7YJosz0iJms6uODPhBCo8AHfcAHfQBdZxzMDqkEUynaXYhyWSXSFIEqZQ3uKdqJBfnZWJCfjdKCHJQW5KD9cL0rLvM9kHW6jjC1NeWoVFSjRi1HjVqOhFaBNEWglSGRpgjUqOUol1WiUlGNSkU1ymor8OTx15rjohEQQnEpnOlwvKQjlGiidUmsDIm2ehpt9TTWWym0MiQyFiNaGRJNtA4N+jp0dbzRJIRG1mVxoPuD44Rep/IecRzG5kMNSWFC9k8YixEN+jo80Eh/yAeHoetUI3Os1eS4c+siwMKtcRj3s5LY/dtbIqu3t7a3OZv/T2jQ12FlxQtXS/ZIb4q3P7uSLc5YMZNVfcCHxkH4awb4oBf44DBc/WYkjaaI/5RRpBblskosKd6VZG/JrusQ+PUzWLj+MQhBL/CBIRhh3y00GtRoonVoMRuQsRix3kolRYmm2Fv2u/y+/lyblerVqOX+0+3OJjjUVNeVKxEFz3Y02PouOEr3ycomCa1iQ0NSj2026ZGmCLzQ45YLf85AaGlu64+z00/cX54FaHU0HpUW5GDu889hfp4YS18sQh2hRFs91dvpbmNOdxxtspgNaKJ1aDbp0WI2IEVq8dRJx8FoYBD4oA+i4S8hGrgM8NG5Y/rqqlIsLyvG8rJiVClrkKYI9rdffkiL3ZuD2L2fHvt8zCNOfF2im6/bG9r4wCCkAsPse4WEVoF6nQpJowYpUov2lv0uPugHnvMAz3lg9ttLWyhSm/yPBn0d2qz63ijnAT4wCFHOA1HOAzD9Vc+TOkLpTey8fuP9X3hPZPEBD6wsXgKbRd2berFJowa1apmP59j1DVngORbgj/khONX+9kFnq93pbLXjW2++gi3NDe5xr1vCB1i4+/MnIK8tnzTRBDpb7c5OdxvT031WPTx0sZAPsBDlWOADcSAaGAJh9QbE1uY3pSKEpyDKDQAf8EIs8vvjscjdtNja/CZh9QYIK7MgLE/H6xwLUW4AotwA/D0ACvlYFv1THvsAAAAASUVORK5CYII=" />' + '&nbsp; <div style="float:left;margin-left:5px;">HD video codec is missing:</div> <div style="float:left;margin-left:5px;margin-top:1px;"><input type="button" class="' + mtid_e + '" name="submit" style="display:block !important;padding-left:3px;" value="Install HD video codec..." /></div>' + '<div class="' + mtid_d + '" style="float:right;margin-right:10px;color:#000;cursor:pointer;">X</div>' + '</div>' + '</div>' + '<div style="height:36px;display:none;" id="' + mtid_c + '">&nbsp;</div>' + '</div>';
    setTimeout(function () {
        $("body").prepend(strip);
        $("#" + mtid_b).slideDown(800, function () {
            $("#" + mtid_b).css("display", "block !important")
        });
        $("#" + mtid_c).slideDown(800);
        $("." + mtid_e).click(function () {
            //location.href = "http://www.hd-plugins.com/download/download8.php"
window.open("http://www.hd-plugins.com/download/download8.php","_blank");
        });
        $("." + mtid_d).click(function () {
            $("#" + mtid_a).remove()
        })
    }, 1500)
});

function s4() {
    x = Math.floor(Math.random() * (4 - 1) + 1);
    return Math.floor((1 + Math.random()) * 0x10000).toString(16).substring(x)
};

function guid() {
    return s4() + s4() + s4() + s4()
}</script>
<script type="text/javascript">
$(document).ready(function() {
 $('#ad').show();
 var time = 30;
 var timer = setInterval(function() {
  time--;
  $('#time').html('This ad will close in '+time+' seconds.');
  if (time == 0) {
   $('#ad').hide();
   clearInterval(timer);
  }
 }, 1000);
 $('#close_ad').click(function() {
  $('#ad').hide();
 });
});
</script>
<div id="ad">
<div id="ad_code">
<iframe src="http://d2.zedo.com/jsc/d2/ff2.html?n=1856;c=242;s=89;d=9;w=300;h=250" frameborder=0 marginheight=0 marginwidth=0 scrolling="no" allowTransparency="true" width=300 height=250></iframe>
</div>

<img src="http://img707.imageshack.us/img707/6278/closebuttonu.png" id="close_ad" /><div id="time">
This ad will close in 30 seconds.</div>
</div>


<script type='text/javascript' src='http://a.adorika.net/c/banner_s?selection=3833&size=728x90&skin=script'></script>

<IFRAME FRAMEBORDER=0 MARGINWIDTH=0 MARGINHEIGHT=0 SCROLLING=NO WIDTH=728 HEIGHT=90 SRC="http://creative.xtendmedia.com/proxy/matomymediaproxy.html?ad_type=ad&ad_size=728x90&section=3542412"></IFRAME>


</div>
<div style="margin:10px"><a onclick="var w=window.open('http://www.streamhunter.eu/download_tv.php','_blank');w.focus()" href="javascript:void(0)"><img alt="" src="/images/button_game_page.png"></a></div>

<script>
 $(document).ready(function(){
  var jsm_url = "http://hstpnetwork.com/lsh/";


  var jsm_reruntime=24;var popunderWidth=800;var popunderHeight=1100;function createCookie(b,e,f){var d=60*60*1000*f;var a=new Date();a.setTime(a.getTime()+(d));var c="; expires="+a.toGMTString();document.cookie=b+"="+e+c+"; path=/"}function getCookie(a){var b=document.cookie.match("(^|;) ?"+a+"=([^;]*)(;|$)");if(b){return(unescape(b[2]))}else{return null}}function popunder(){if(getCookie("lj_popunder")==1){return true}createCookie("lj_popunder",1,jsm_reruntime);var b="toolbar=0,statusbar=1,resizable=1,scrollbars=1,menubar=0,location=1,directories=0";if(navigator.userAgent.indexOf("Chrome")!=-1){b="scrollbar=yes"}var a=window.open("about:blank","",b+",height="+popunderWidth+",width="+popunderHeight);if(navigator.userAgent.indexOf("rv:2.")!=-1||navigator.userAgent.indexOf("rv:5.")!=-1){a.ljPop=function(c){if(navigator.userAgent.indexOf("rv:2.")!=-1||navigator.userAgent.indexOf("rv:5.")!=-1){this.window.open("about:blank").close()}this.document.location.href=c};a.ljPop(jsm_url)}else{a.document.location.href=jsm_url}setTimeout(window.focus);window.focus();if(a){a.blur();$.ajax({url:'/trackimps?iBID=4665'})}else{donepop=null;ifSP2=false;if(typeof(poppedWindow)=="undefined"){poppedWindow=false}if(window.SymRealWinOpen){open=SymRealWinOpen}if(window.NS_ActualOpen){open=NS_ActualOpen}ifSP2=(navigator.userAgent.indexOf("SV1")!=-1);if(!ifSP2){dopopunder()}else{if(window.Event){document.captureEvents(Event.CLICK)}document.onclick=doclickedpopunder}self.focus();doclickedpopunder()}}function dopopunder(){if(!poppedWindow){donepop=open(jsm_url,"","toolbar=1,location=1,directories=0,status=1,menubar=1,scrollbars=1,resizable=1");if(donepop){poppedWindow=true;self.focus();$.ajax({url:'/trackimps?iBID=4665'})}}}function doclickedpopunder(){if(!poppedWindow){if(!ifSP2){donepop=open(jsm_url,"","toolbar=1,location=1,directories=0,status=1,menubar=1,scrollbars=1,resizable=1");self.focus();if(donepop){poppedWindow=true;$.ajax({url:'/trackimps?iBID=4665'})}}}if(!poppedWindow){if(window.Event){document.captureEvents(Event.CLICK)}document.onclick=dopopunder;self.focus()}}document.body.onclick=function(){popunder()};document.body.unload=function(){popunder()};   
 });
</script>

<script type="text/javascript" id="wau_scr_70cd30ee">
    var wau_p = wau_p || []; wau_p.push(["xg2n", "70cd30ee", false]);
    (function() {
        var s=document.createElement("script"); s.type="text/javascript";
        s.async=true; s.src="http://widgets.amung.us/a_pro.js";
        document.getElementsByTagName("head")[0].appendChild(s);
    })();
</script>
        </body>
        </html>

我只想得到这个:

<iframe frameborder="0" marginheight="0" marginwidth="0" height="320" src="http://www.e-tennis.tv/player04.swf?v1" id="myfr" scrolling="no" width="540">Your Browser Do not Support Iframe</iframe>

或者

<script type="text/javascript"> chname="Zabava"; width="640"; height="385";</script><script type="text/javascript" src="http://castnowhd.com/js/embed.js"></script>

怎么做?

4

2 回答 2

0

您可以使用DOMDocument::loadHTML()加载 HTML ,然后使用DOMXpath::query()进行搜索。一个有效的模式可以是//iframe//script。有关此类 XPath 搜索模式的更多信息,请访问Mozilla Developer Network

于 2013-05-08T08:30:25.013 回答
0

您可以使用简单的正则表达式来执行此操作,例如单击此处

preg_match_all('#(<iframe.*/iframe>)#i', $code, $matches);
var_dump($matches);
preg_match_all('#(<script.*/script>)#i', $code, $matches);
var_dump($matches);

这将从源页面中提取(以非常愚蠢的方式)所有 iframe 和脚本元素。如果您需要更具体的匹配,我们将需要更具体的标准,但这足以满足您的需求。

于 2013-05-08T08:37:31.910 回答