我有以下代码例程,效果很好。唯一的问题是我需要以与links
数组相同的顺序返回结果。例如,我需要首先返回 google.com 链接的结果,然后是 yahoo 等。代码当前以“随机”顺序返回。
var Nightmare = require('nightmare');
var async = require('async');
var links = [
"http://www.google.com",
"http://www.yahoo.com",
"http://www.bing.com",
"http://www.aol.com",
"http://duckduckgo.com",
"http://www.ask.com"
];
var scrape = function(url, callback) {
var nightmare = new Nightmare();
nightmare.goto(url);
nightmare.wait('body');
nightmare.evaluate(function () {
return document.querySelector('body').innerText;
})
.then(function (result) {
console.log(url, result);
})
nightmare.end(function() {
callback();
});
}
async.map(links, scrape);
更新:谢谢@christophetd。这是我修改后的工作代码:
var Nightmare = require('nightmare');
var async = require('async');
var links = [
"http://www.google.com",
"http://www.yahoo.com",
"http://www.bing.com",
"http://www.aol.com",
"http://duckduckgo.com",
"http://www.ask.com"
];
var scrape = function(url, callback) {
var nightmare = new Nightmare();
nightmare.goto(url);
nightmare.wait('body');
nightmare.evaluate(function () {
return document.querySelector('body').innerText;
})
.then(function (result) {
callback(null, url+result);
});
nightmare.end();
}
async.map(links, scrape, function (err, results) {
if (err) return console.log(err);
console.log(results);
})