我正在尝试在以下网站中提取表格
url 更改为第 2 页,但无论暂停如何,屏幕截图或保存的 .doc 文件中都会出现相同的表格。在网站上,当您单击第 2 页时,表格会自动更新。任何帮助将不胜感激。
下面是我的代码
var casper = require('casper').create();
var fs = require('fs');
casper.start('http://projects.wsj.com/jettracker/#a=HYA&d=BED&e=2011-01- 01&m=indv&o=EMC+CORP.&p=0&s=2007-01-01&sort=d&t=N125TM,N424TM,N448TM,N67TM,N866TM&v=table', function() {
this.capture("crap0" + ".png");
var firstRow = this.evaluate(function () {
var elements = __utils__.getElementsByXPath('//*[@id="table_results"]/table');
return [].map.call(elements, function(element) {
return element.innerText;
});
});
fs.write('pook.doc', firstRow, 'w');
});
casper.then(function() {
//Click on 1st result link
this.click({
type: 'xpath',
path: '//*[@id="results-pagination"]/div/a[3]'
});
// var url ='http://projects.wsj.com/jettracker/#a=HYA&d=BED&e=2011-01-01&m=indv&o=EMC+CORP.&p=1&s=2007-01-01&sort=d&t=N125TM,N424TM,N448TM,N67TM,N866TM&v=table'
//this.open(url);
this.waitFor(function check() {
return (this.getCurrentUrl() === 'http://projects.wsj.com/jettracker/#a=HYA&d=BED&e=2011-01-01&m=indv&o=EMC+CORP.&p=1&s=2007-01-01&sort=d&t=N125TM,N424TM,N448TM,N67TM,N866TM&v=table');
},
function then() { // step to execute when check() is ok
this.echo('Navigated to page 2', 'INFO');
},
function timeout() { // step to execute if check has failed
this.echo('Failed to navigate to page 2', 'ERROR');
});
this.capture("crap" + ".png");
this.wait(20000, function() {
this.echo("I've waited for 20 seconds.");
});
var firstRow2 = this.evaluate(function () {
var elements2 = __utils__.getElementsByXPath('//*[@id="table_results"]/table');
return [].map.call(elements2, function(element2) {
return element2.innerText;
});
});
fs.write('poop.doc', firstRow2, 'w');
});
casper.run();