0

我正在尝试在以下网站中提取表格

http://projects.wsj.com/jettracker/#a=HYA&d=BED&e=2011-01-01&m=indv&o=EMC+CORP.&p=0&s=2007-01-01&sort=d&t=N125TM,N424TM,N448TM,N67TM ,N866TM&v=表

url 更改为第 2 页,但无论暂停如何,屏幕截图或保存的 .doc 文件中都会出现相同的表格。在网站上,当您单击第 2 页时,表格会自动更新。任何帮助将不胜感激。

下面是我的代码

var casper = require('casper').create();
var fs = require('fs');
casper.start('http://projects.wsj.com/jettracker/#a=HYA&d=BED&e=2011-01-     01&m=indv&o=EMC+CORP.&p=0&s=2007-01-01&sort=d&t=N125TM,N424TM,N448TM,N67TM,N866TM&v=table', function() {

this.capture("crap0" + ".png");
var firstRow = this.evaluate(function () {
    var elements = __utils__.getElementsByXPath('//*[@id="table_results"]/table');
    return [].map.call(elements, function(element) {
        return element.innerText;
    });

});

fs.write('pook.doc', firstRow, 'w');

});

casper.then(function() {
 //Click on 1st result link
this.click({
type: 'xpath',
  path: '//*[@id="results-pagination"]/div/a[3]'
});

 // var url ='http://projects.wsj.com/jettracker/#a=HYA&d=BED&e=2011-01-01&m=indv&o=EMC+CORP.&p=1&s=2007-01-01&sort=d&t=N125TM,N424TM,N448TM,N67TM,N866TM&v=table'
//this.open(url);

this.waitFor(function check() {
    return (this.getCurrentUrl() === 'http://projects.wsj.com/jettracker/#a=HYA&d=BED&e=2011-01-01&m=indv&o=EMC+CORP.&p=1&s=2007-01-01&sort=d&t=N125TM,N424TM,N448TM,N67TM,N866TM&v=table');
    },
function then() { // step to execute when check() is ok
    this.echo('Navigated to page 2', 'INFO');
},
function timeout() { // step to execute if check has failed
    this.echo('Failed to navigate to page 2', 'ERROR');
});
this.capture("crap" + ".png");

this.wait(20000, function() {
    this.echo("I've waited for 20 seconds.");
});
var firstRow2 = this.evaluate(function () {
    var elements2 = __utils__.getElementsByXPath('//*[@id="table_results"]/table');
    return [].map.call(elements2, function(element2) {
        return element2.innerText;
    });

});

fs.write('poop.doc', firstRow2, 'w');
});


casper.run();
4

1 回答 1

0

你很亲近!请记住,只有当您的页面上下文包含动态内容时,您才必须使用 waitFor() 函数。事实并非如此。

尝试:

var casper = require('casper').create();
var fs = require('fs');

casper.start('http://projects.wsj.com/jettracker/#a=HYA&d=BED&e=2011-01-01&m=indv&o=EMC+CORP.&p=0&s=2007-01-01&sort=d&t=N125TM,N424TM,N448TM,N67TM,N866TM&v=table', function() {

    this.capture("crap0" + ".png");

    var firstRow = this.evaluate(function () {
        var elements = __utils__.getElementsByXPath('//*[@id="table_results"]/table');
        return [].map.call(elements, function(element) {
            return element.innerText;
        });
    });

    fs.write('pook.doc', firstRow, 'w');
});

casper.then(function() {
    //Click on 1st result link
    this.click({
        type: 'xpath',
        path: '//*[@id="results-pagination"]/div/a[3]'
    });

    casper.then(function() {

        this.capture("crap" + ".png"); 

        var firstRow2 = this.evaluate(function () {
            var elements2 = __utils__.getElementsByXPath('//*[@id="table_results"]/table');
            return [].map.call(elements2, function(element2) {
                return element2.innerText;
            });

        }); 

        fs.write('poop.doc', firstRow2, 'w');
    });
});

casper.run();
于 2013-05-23T13:34:50.693 回答