我在市场上买一辆新车。与其反复搜索经销商网站,我认为这将是一个学习一点节点和 mongodb 的有趣而有趣的机会,所以我正在爬取当地经销商的网站以获取我感兴趣的品牌和型号。
我遇到的问题是在我的最终回调运行完成后节点不会终止。
var cheerio = require('cheerio');
var request = require('request');
var db = require('mongodb');
var S = require('string');
var log = require('console').log;
var async = require('async');
var links = [];
var website = 'http://www.yahoo.com';
async.series(
[
function(){
log('starting');
db.connect('mongodb://127.0.0.1:27017/test',
function(err, base){
if(err) throw err;
db = base;
});
},
request(website, start)
],
function(){
log('closing DB');
db.close();
});
function start(err,resp,body){
var $ = cheerio.load(body);
var numPages = 2;
$('.gbps').each(function(i,elem) {
links.push('http://www.yahoo.com');
});
var pageURLS = [];
for (var i = 2; i<=numPages; i++){
//create URLs for additional pages
pageURLS[i-2] = website;
}
var pages = 1;
log('getting page URLs');
pageURLS.forEach(function(url, index, array){
request(url, function(error,response,bodies) {
pages++;
var $ = cheerio.load(bodies);
$('.tab').each(function(i,elem) {
links.push('http://www.yahoo.com');
});
if (pages == numPages){
getDetailInfo();
};
});
});
}
function getDetailInfo(){
log(links.length);
links.forEach(function(link, index, array){
request(link, doStuff);
});
}
function doStuff(err, response, body){
if(err){
log(err);
}
parseDetailResponse(err,response,body, addToDB);
}
function parseDetailResponse(err,resp,body,callback){
log('parsing');
var $ = cheerio.load(body);
var specs = $('.specifications').children().map(function(i, elem){
var key = 'key';
var value = 'value';
var ret = {};
ret [ 'name' ] = key;
ret [ 'value' ] = value;
return ret;
});
var makeAndModel = 'makeAndModel';
callback(['picture url', 'vehicle description', 100, specs, makeAndModel]);
}
function getMakeAndModel(stuff){
var $ = cheerio.load(stuff);
temp = $('.gbps').map(function(i, elem){
var ret = {};
switch(i){
case 0:
ret['name'] = 'year';
ret['value'] = $(this).text();
break;
case 1:
ret['name'] = 'make';
ret['value'] = $(this).text();
break;
case 2:
ret['name'] = 'model';
ret['value'] = $(this).text();
break;
case 3:
ret['name'] = 'ignore';
ret['value'] = $(this).text();
break;
default:
ret['name'] = 'ignore';
ret['value'] = 'ignore';
}
return ret;
});
return temp;
}
function addToDB(arr){
log('adding to DB');
pic = arr[0];
description = arr[1];
price = arr[2];
specs = arr[3];
makeAndModel = arr[4];
var obj = {};
for (var i = specs.length - 1; i >= 0; i--) {
obj [specs[i].name] = specs[i].value;
};
for (var i = makeAndModel.length - 1; i >= 0; i--){
obj [makeAndModel[i].name] = makeAndModel[i].value;
};
db.collection('carsTest').update(
{VIN: obj.VIN},
{
$set: {
VIN: obj.VIN,
make: obj.make,
model: obj.model,
year: obj.year,
price: price,
engine: obj.Engine,
interior: obj.Interior,
exterior: obj.Exterior,
'model code': obj['Model Code'],
'stock number': S(obj['Stock Number']).toInt(),
transmission: obj.Transmission,
mileage: obj.Mileage ? obj.Mileage : 0,
description: description,
picture: pic,
}
},
{upsert: true, safe: true},
function(err,result){
if(err){
throw err;
}
});
log('finished with this one!');
}
我在这里省略并更改了相当数量的证明,没有进行大量错误检查或任何其他操作,但即使这样也会添加文档但不会退出。Node 只是坐在那里,等待某些事情发生,它从不调用最终回调来关闭数据库并退出。
> db.carsTest.find().pretty()
{
"_id" : ObjectId("52139aa7c9b7a39e0f1eb61d"),
"VIN" : null,
"description" : "vehicle description",
"engine" : null,
"exterior" : null,
"interior" : null,
"make" : null,
"mileage" : 0,
"model" : null,
"model code" : null,
"picture" : "picture url",
"price" : 100,
"stock number" : NaN,
"transmission" : null,
"year" : null
}