我想从 html 中提取图像,其具有像我一样的“微内容”类属性......
我想提取这些类为 class="microcontent" 的图像
我写了这段代码,但这不起作用。
var $ = cheerio.load(html);
var title = $('head title').text();
var keywords = $('head meta[name=keywords]').attr('content');
var desc = $('head meta[name=description]').attr('content');
var links = $('a');
var imgArray = [];
$('img.microconent').each(function(){
var temp = $(this).attr("src");
imgArray.push(temp);
var downloadImage = function (temp) {
http.get(temp, function (res) {
var imagedata = ''
res.setEncoding('binary')
res.on('data', function(chunk){
imagedata += chunk;
});
res.on('end', function(){
var imgArr = temp.split("/");
var Name = util.id();
imgName = imgArr[imgArr.length-1];
imgName = imgName.split(".");
imgName = imgName[imgName.length-1];
imgName = Name + "." +imgName;
fs.writeFile(__dirname + '/img/' + Name + ".png" , imagedata, 'binary', function(err){
if (err) throw err;
console.log('image saved')
});
});
});
};
downloadImage(temp);
console.log("image is " + imgArray);
});
console.log('Crawling "%s" | %s',title,this.url);
任何帮助都将是可观的。