我想制作从任何给定网址中提取标题、描述、关键字和图像的网络爬虫。提取后我想保存在数据库中……我的代码不适用于图像……任何帮助将不胜感激
var $ = cheerio.load(html);
var title = $('head title').text();
var keywords = $('head meta[name=keywords]').attr('content');
var desc = $('head meta[name=description]').attr('content');
var links = $('a');
var img= $('img').attr('content')
console.log('Crawling "%s" | %s',title,this.url);
async.map(links.map(function(){
var href = $(this).attr('href');
if(href && href != self._url && !(/^#(\w)+/.test(href)) && !util.imageRegexp.test(href)){
if(util.isExternal(href)){
return 'INSERT INTO `queue` SET `id` = \''+util.id()+'\', `url` = '+self.conn.escape(href)+', `from` = '+self.conn.escape(from);
console.log("self.conn.escape" + self.conn.escape)
}
else {
return 'INSERT INTO `queue` SET `id` = \''+util.id()+'\', `url` = '+self.conn.escape(util.resolveRelativeURL(href,self._url))+', `from` = '+self.conn.escape(from);
}
}
return false;
}).filter(function(el){
return !!el;
})
,this.conn.query.bind(this.conn),function(e,result){
if(e){
console.log('Error writing queue.');
console.log(e);
}
});
this.conn.query('INSERT INTO `websites` SET ?',{
id:util.id(),
url:this.url,
from:from,
title:title,
keywords:keywords || '',
img:img || '',
desc:desc || ''
}