6

我正在尝试使用cheerio 进行一些html 抓取(不能使用jsdon 我有依赖问题:contextify ecc 的错误。)但我无法获得元“og:type”,“og:title” ...

request(Url, function(error, response, body) {
var $ = cheerio.load(body);
    $('meta').each(function() {
        console.log(  $('meta').attr('content'));
    });
});

我只得到第一个元文本/html;charset=UTF-8"。你知道如何访问 og 吗??

4

3 回答 3

12

如果您知道要获得哪个属性,则更简单的解决方案是:

var $ = cheerio.load(html);
var result = $('meta[property="og:title"]').attr('content');

(假设您想获得标题。)

于 2015-12-16T13:42:37.603 回答
4

扩展赫尔曼的回答:

我发现节点爬虫+cheerio 的组合更易于管理。下面的代码可以更轻松地跟踪您正在搜索的标签属性,并且可以轻松调整以包含其他标签。我是这样做的:

  var crawler = require('crawler'),
  url = require('url');

  var c = new crawler({
    maxConnections:10,
    callback:function(error,response,$) {     
      var data = {
        'og:type':null,
        'og:title':null,
        'og:description':null,
        'og:image':null,
        'twitter:title':null,
        'twitter:image':null,
        'twitter:description':null,
        'twitter:site':null,
        'twitter:creator':null,
      }
      var meta = $('meta');
      var keys = Object.keys(meta);
      for (var s in data) {
        keys.forEach(function(key) {
          if ( meta[key].attribs
            && meta[key].attribs.property 
            && meta[key].attribs.property === s) {
              data[s] = meta[key].attribs.content;
          }
        }) 
      }
      console.log(data);
    }
  })
  c.queue( [ YOUR URL HERE ] )
于 2014-12-23T21:56:14.507 回答
3

您必须使用对象的键$('meta)并检查所需的键是否存在,才能获得结果。

试试这个代码:

var cheerio = require('cheerio')
var request = require('request')

request(Url, function(error, response, body) {
  var $ = cheerio.load(body);

  var meta = $('meta')
  var keys = Object.keys(meta)

  var ogType;
  var ogTitle;

  keys.forEach(function(key){
    if (  meta[key].attribs
       && meta[key].attribs.property
       && meta[key].attribs.property === 'og:type') {
      ogType = meta[key].attribs.content;
    }
  });

  keys.forEach(function(key){
    if (  meta[key].attribs
       && meta[key].attribs.property
       && meta[key].attribs.property === 'og:title') {
      ogTitle = meta[key].attribs.content;
    }
  });

  console.log(ogType);
  console.log(ogTitle);
});
于 2012-10-07T12:53:08.577 回答