3

我正在尝试用 aspx 抓取一个网站,该网站有一个 js dopostBack 按钮。其中一个按钮是回复我的页面的可打印视图,另一个按钮将 .csv 文件推送到客户端。

我在这里看到一个描述 csv 下载问题但没有回答的问题:PhantomJS download using a javascript link

所以我专注于尝试在 phantomJs 中获取可打印视图,因为它看起来更简单(它显示在浏览器窗口中,一定有办法!)

按钮代码:

<a id="ctl00_ctl00_ctl00_MainContentPlaceHolder_PrintResultsLinkButton" 
  title="Print Results" class="btn-blue"     
  href="javascript:
    __doPostBack('ctl00$ctl00$ctl00$MainContentPlaceHolder$PrintResultsLinkButton','')
">
    <span>Print Results</span>
</a> 

我可以单击 Phantom/casper 的链接,但它似乎不起作用。我认为必须在单击按钮时发送请求,并带有所有标题,但我不知道如何接收答案。帮助?

我的 casper 代码,工作到我应该得到结果页面的地步:https ://gist.github.com/xShirase/7156131

我也尝试过评估 js 函数,它在 chrome 控制台中工作,但仍然没有给我幻影中的结果......

迄今为止的最后一次尝试:我第一次加载我的页面,以获取 cookie 和隐藏输入的值,然后尝试自己发布请求。捕获中的输出仍然相同,所以我知道我的请求没问题,但为什么我没有得到正确的结果?

代码 :

casper.start();

capture = function(){
    var url = 'http://www.cms.gov/apps/physician-fee-schedule/search/search-criteria.aspx';
    casper.open(url).thenClick('a.btn',function() {
     this.then(grabResults);
    });
};

grabResults = function(){
  this.echo(this.getCurrentUrl());
  this.open('http://www.cms.gov/apps/physician-fee-schedule/search/search-results.aspx?Y=0&T=0&HT=2&CT=3&H1=00100&H2=11400&M=5').then(function(){
    this.capture('page.png');
    a = this.evaluate(function(){
        var v = $('input:hidden#__VIEWSTATE').val();
        var d = document.cookie;
        return [v,d];
    });
  });
  this.then(grabPRResults);

};


grabPRResults = function(){
    this.open('http://www.cms.gov/apps/physician-fee-schedule/search/search-results.aspx?Y=0&T=0&HT=2&CT=3&H1=00100&H2=11400&M=5', {
    method: 'post',
    headers: {
        'Host': 'www.cms.gov',
        'Connection': 'keep-alive',
        'Content-Length': '103902',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Origin': 'http://www.cms.gov',
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36',
        'Content-Type': 'application/x-www-form-urlencoded',
        'Referer': 'http://www.cms.gov/apps/physician-fee-schedule/search/search-results.aspx?Y=0&T=0&HT=2&CT=3&H1=00100&H2=11400&M=5',
        'Accept-Encoding': 'gzip,deflate,sdch',
        'Accept-Language': 'fr,en-US;q=0.8,en;q=0.6',
        'Cookie': a[1]
      },
    data:   {
        '__EVENTTARGET':'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$PrintResultsLinkButton',
        '__EVENTARGUMENT':'',
        '__LASTFOCUS':'',
        '__VIEWSTATE':a[0],
        'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$PFSSResultsCPEWrapper_ClientState':'false',
        'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$YearDropDown':'2013',
        'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$TypeOfInfoDropDown':'pi',
        'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$HCPCTypeDropDown':'range',
        'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$CarrierTypeDropDown':'all',
        'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$HCPC1Textbox':'00100',
        'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$HCPC2Textbox':'11400',
        'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$HCPC3Textbox':'',
        'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$HCPC4Textbox':'',
        'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$HCPC5Textbox':'',
        'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$ModifierDropDown':'%',
        'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$CarrierDropDown':'default',
        'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$CarrierLocalityDropDown':'default',
        'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$ResultsControl1$PFSSGridView$ctl01$ tbGotoPage':'',
        'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$ResultsControl1$PFSSGridView$ctl01$PFSSGridViewtopddlTopPageSize':'10',
        'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$ResultsControl1$PFSSGridView$ctl14$ tbGotoPageBottom':'',
        'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$ResultsControl1$PFSSGridView$ctl14$PFSSGridViewbottomddlBottomPageSize':'10',
        'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$DownloadsWidget1$DownloadsCPEWrapper_ClientState':'false'
    }
  }).then(function(){
    this.wait(25000);
    this.then(lest);
  });
};

lest= function(){
  this.capture('ppp.png');
};

casper.then(capture);
casper.run();
4

1 回答 1

3

据我所知,您的主要问题是在回发完成时收到通知。我已经模拟了一个模拟长回发的简单 aspx 页面,它应该适用于您的情况。要等待回调完成,您可以利用标准 casperjs 功能进行抓取。我有点担心发布政府网站的抓取说明,希望我的测试页面足以帮助您解决问题。

卡斯珀JS

var casper = require('casper').create({
    // verbose: true,
    logLevel: "debug"
});
casper.start();

casper.on('remote.message', function (message) {
    this.echo(message);
});


grabResults = function () {
    this.echo(this.getCurrentUrl());
};

casper.start('http://localhost:13851/default.aspx', function () {

    casper.thenClick('#Button1', function () {
        // Setup a listener for the postback complete event
        this.evaluate(function () {
            Sys.WebForms.PageRequestManager.getInstance().add_endRequest(function () {
                console.log("client: doPostback complete");
                window.onPostBackComplete = true;
            });
        });

        // Use waitFor to wait for the postback to be finished
        this.waitFor(function () {
            return this.evaluate(function () {
                return window.onPostBackComplete;
            });
        }, function then() {
            this.echo("doPostback complete");
            this.echo("value of test label: " + this.fetchText('#Label1'));
        }, function timeout() {
            this.echo("-- > timeout");
        },
        5000);
    });


});

casper.run(function () {
    this.echo("finished");
});

默认.aspx

<%@ Page Language="C#" AutoEventWireup="true" %>
<!DOCTYPE html>
<script runat="server">    
    protected void Button1_Click(object sender, EventArgs e)
    {
        Label1.Text = "Slow loaded text";
        System.Threading.Thread.Sleep(1000);  // simulate a slow server
    }
</script>
<html xmlns="http://www.w3.org/1999/xhtml">
<head runat="server">
    <title>Sample page</title>
</head>
<body>
    <form id="form1" runat="server">
        <asp:ScriptManager ID="ScriptManager1" runat="server"></asp:ScriptManager>
        <div>
            <asp:UpdatePanel ID="UpdatePanel1" runat="server" >
                <ContentTemplate>
                    <asp:Label ID="Label1" runat="server" Text="Default Label"></asp:Label>
                    <br />
                    <asp:Button ID="Button1" runat="server" Text="Button" OnClick="Button1_Click"  />
                </ContentTemplate>
            </asp:UpdatePanel>
        </div>
    </form>
</body>
</html>

结帐:http ://forums.asp.net/t/1245557.aspx?how+to+detect+the+end+of+__doPostBack+in+Javascript

于 2014-10-27T19:32:10.813 回答