0

因此,我需要将房地产广告抓取到 nidax.json 文件中。我转到所有广告页面,并使用指向各个广告的链接来获取我需要的数据。我正在使用 NodeJS Xray 刮刀,但由于某种原因它不起作用。

有时它什么也不返回,有时它只返回单个广告的链接。

var Xray = require('x-ray');
var x= Xray();
x('http://nidax-nekretnine.rs/nekretnine/','div.kutija-veca_dno > div.read-more` span ',[{
    url: 'a@href'
    items: x('div.kutija-veca_dno > div.read-more > span > a@href', {
    location: 'body > div.contentarea-novo > div > div.info-part > div.one-third  div.osnovni-podaci > p:nth-child(2) > span.orange-text',
}), // follow link to google images
}]).write('nidax.json');
4

1 回答 1

0

当以下拉取请求被批准时,您可以订阅获取。

同时,我建议您在下载的 X 射线模块中应用该解决方案。这是一行代码,我在两个项目中进行了测试,它很简单。查看第 237 行的 index.js 文件,在长注释后看到“ return ”:

function WalkHTML (xray, selector, scope, filters) {
  return function walkHTML ($, fn) {
    walk(selector, function (v, k, next) {
      if (typeof v === 'string') {
        var value = resolve($, root(scope), v, filters)
        return next(null, value)
      } else if (typeof v === 'function') {
        return v($, function (err, obj) {
          if (err) return next(err)
          return next(null, obj)
        })
      } else if (isArray(v)) {
        if (typeof v[0] === 'string') {
          return next(null, resolve($, root(scope), v, filters))
        } else if (typeof v[0] === 'object') {
          var $scope = $.find ? $.find(scope) : $(scope)
          var pending = $scope.length
          var out = []

          // Handle the empty result set (thanks @jenbennings!)
          if (!pending) return next(null, out)

          $scope.each(function (i, el) {
            var $innerscope = $scope.eq(i)
            var node = xray(scope, v[0])
            node($innerscope, function (err, obj) {
              if (err) return next(err)
              out[i] = obj
              if (!--pending) {
                return next(null, compact(out))
              }
            })
          })
          // Nested crawling broken on 'master'. When to merge 'bugfix/nested-crawling' #111, Needed to exit this without calling next, the problem was that it returned to the "finished" callback before it had retrived all pending request. it should wait for "return next(null, compact(out))"
          return
        }
      }
      return next()
    }, function (err, obj) {
      if (err) return fn(err)
      fn(null, obj, $)
    })
  }
}
于 2016-07-19T10:06:53.210 回答