扩展赫尔曼的回答:
我发现节点爬虫+cheerio 的组合更易于管理。下面的代码可以更轻松地跟踪您正在搜索的标签属性,并且可以轻松调整以包含其他标签。我是这样做的:
var crawler = require('crawler'),
url = require('url');
var c = new crawler({
maxConnections:10,
callback:function(error,response,$) {
var data = {
'og:type':null,
'og:title':null,
'og:description':null,
'og:image':null,
'twitter:title':null,
'twitter:image':null,
'twitter:description':null,
'twitter:site':null,
'twitter:creator':null,
}
var meta = $('meta');
var keys = Object.keys(meta);
for (var s in data) {
keys.forEach(function(key) {
if ( meta[key].attribs
&& meta[key].attribs.property
&& meta[key].attribs.property === s) {
data[s] = meta[key].attribs.content;
}
})
}
console.log(data);
}
})
c.queue( [ YOUR URL HERE ] )