1

I'm kind of starting in Javascript and I need help to figure out how can I make this code synchronous while looping through for loop. Basically what I'm doing is making multiple POST requests inside for loops and then im scrapping the data using the library X-Ray and finally I'm saving the result to a Mongo Database. The output is ok but it comes in unordered way and suddenly hangs and I have to force close using ctrl+C. This is my function:

  function getdata() {
  const startYear = 1996;
  const currentYear = 1998; // new Date().getFullYear()

for (let i = startYear; i <= currentYear; i++) {
for (let j = 1; j <= 12; j++) {
  if (i === startYear) {
    j = 12;
  }

  // Form to be sent
  const form = {
    year: `${i}`,
    month: `${j}`,
    day: '01',
  };

  const formData = querystring.stringify(form);
  const contentLength = formData.length;

  // Make HTTP Request
  request({
    headers: {
      'Content-Length': contentLength,
      'Content-Type': 'application/x-www-form-urlencoded',
    },
    uri: 'https://www.ipma.pt/pt/geofisica/sismologia/',
    body: formData,
    method: 'POST',
  }, (err, res, html) => {

    if (!err && res.statusCode === 200) {

      // Scrapping data with X-Ray
      x(html, '#divID0 > table > tr', {
        date: '.block90w',
        lat: 'td:nth-child(2)',
        lon: 'td:nth-child(3)',
        prof: 'td:nth-child(4)',
        mag: 'td:nth-child(5)',
        local: 'td:nth-child(6)',
        degree: 'td:nth-child(7)',
      })((error, obj) => {

        const result = {
          date: obj.date,
          lat: obj.lat.replace(',', '.'),
          lon: obj.lon.replace(',', '.'),
          prof: obj.prof == '-' ? null : obj.prof.replace(',', '.'),
          mag: obj.mag.replace(',', '.'),
          local: obj.local,
          degree: obj.degree,
        };

        // console.log(result);

        upsertEarthquake(result); // save to DB

      });

    }


  });

  }
  }
  }

I guess I have to use promises or callbacks but I can't understand how to do this, and I already tried using async await but with no success. If any additional info needs to be provided please tell me, thanks.

4

3 回答 3

1

您正在循环内调用请求。

异步函数是在主线程逻辑结束后获取结果(AKA,在回调函数中接收响应)的函数。

这样,如果我们有这个:

for (var i = 0; i < 12; i++) {
    request({
        data: i
    }, function(error, data) {
        // This is the request result, inside a callback function
    });
}

逻辑是在调用回调之前运行 12request秒,所以回调将被堆叠并在所有主循环运行后调用。

没有输入所有 ES6 生成器的东西(因为我认为它使它变得更复杂,并且在低级别学习正在发生的事情对你来说更好),你所要做的就是调用request,等待他的回调函数待叫又叫下request。怎么做?有很多方法,但我通常是这样的:

var i= 0;
function callNext() {
    if (i>= 12) {
        requestEnded();
    } else {
        request({
            data: i++ // Increment the counter as we are not inside a for loop that increments it
        }, function(error, data) {
            // Do something with the data, and also check if an error was received and act accordingly, which is very much possible when talking about internet requests
            console.log(error, data);
            // Call the next request inside the callback, so we are sure that the next request is ran just after this request has ended
            callNext();
        })
    }
}
callNext();

requestEnded() {
    console.log("Yay");
}

在这里您可以看到逻辑。您有一个名为的函数,如果不再需要调用callNext,它将进行下一次调用或调用。requestEnded

request在里面被调用时callNext,它会等待回调被接收(这将在未来的某个时间异步发生),将处理接收到的数据,然后在你告诉他再次调用的回调中callNext

于 2017-12-26T19:12:15.390 回答
-1

您可以使用 start 和 end year 创建一个数组,而不是循环,然后将其映射到您的请求的配置,然后将结果映射到 x-ray 返回的内容(x-ray 返回一个不需要回调的承诺)。然后使用返回承诺的函数将抓取的结果放入 mongodb。

如果有东西拒绝,则创建一个Fail类型对象并使用该对象解析。

使用 Promise.all 并行启动所有请求、x-ray 和 mongo,但使用 throttle 限制活动请求的数量

这是代码中的样子:

//you can get library containing throttle here:
//  https://github.com/amsterdamharu/lib/blob/master/src/index.js
const lib = require('lib');
const Fail = function(details){this.details=details;};
const isFail = o=>(o&&o.constructor)===Fail;
const max10 = lib.throttle(10);
const range = lib.range;
const createYearMonth = (startYear,endYear)=>
  range(startYear,endYear)
  .reduce(
    (acc,year)=>
      acc.concat(
        range(1,12).map(month=>({year,month}))
      )
    ,[]
  );
const toRequestConfigs = yearMonths =>
  yearMonths.map(
    yearMonth=>{
      const formData = querystring.stringify(yearMonth);
      return {
        headers: {
          'Content-Length': formData.length,
          'Content-Type': 'application/x-www-form-urlencoded',
        },
        uri: 'https://www.ipma.pt/pt/geofisica/sismologia/',
        body: formData,
        method: 'POST',
      };
    }
  );
const scrape = html =>
  x(
    html, 
    '#divID0 > table > tr', 
    {
      date: '.block90w',
      lat: 'td:nth-child(2)',
      lon: 'td:nth-child(3)',
      prof: 'td:nth-child(4)',
      mag: 'td:nth-child(5)',
      local: 'td:nth-child(6)',
      degree: 'td:nth-child(7)'
    }
  );
const requestAsPromise = config =>
  new Promise(
    (resolve,reject)=>
      request(
        config,
        (err,res,html)=>
          (!err && res.statusCode === 200) 
            //x-ray returns a promise:
            // https://github.com/matthewmueller/x-ray#xraythencb
            ? resolve(html)
            : reject(err)
      )
  );
const someMongoStuff = scrapeResult =>
  //do mongo stuff and return promise
  scrapeResult;
const getData = (startYear,endYear) =>
  Promise.all(
    toRequestConfigs(
      createYearMonth(startYear,endYear)
    )
    .map(
      config=>
        //maximum 10 active requests
        max10(requestAsPromise)(config)
        .then(scrape)
        .then(someMongoStuff)
        .catch(//if something goes wrong create a Fail type object
          err => new Fail([err,config.body])
        )
    )
  )
//how to use:
getData(1980,1982)
.then(//will always resolve unless toRequestConfigs or createYearMonth throws
  result=>{
    //items that were successfull
    const successes = result.filter(item=>!isFail(item));
    //items that failed
    const failed = result.filter(isFail);
  }
)

抓取经常发生的情况是,目标站点不允许您在任何时间段内发出超过 x 个请求,并开始将您的 IP 列入黑名单并拒绝服务,如果您超过了它。

假设您想限制每 5 秒 10 个请求,那么您可以将上面的代码更改为:

const max10 = lib.throttlePeriod(10,5000);

其余代码相同

于 2017-12-26T19:41:11.880 回答
-1

你有里面的sync for...loop问题async methods

解决此问题的一种干净方法是

ES2017async/await语法

假设你想在upsertEarthquake(result)你应该改变你的代码之后停止每次迭代。

function async getdata() {
    const startYear = 1996;
    const currentYear = 1998; // new Date().getFullYear()

    for (let i = startYear; i <= currentYear; i++) {
        for (let j = 1; j <= 12; j++) {
            if (i === startYear)
                j = 12; 

            // Form to be sent
            const form = {
                year: `${i}`,
                month: `${j}`,
                day: '01',
            };

            const formData = querystring.stringify(form);
            const contentLength = formData.length;
            //Make HTTP Request
            await new Promise((next, reject)=> { 
                request({
                    headers: {
                        'Content-Length': contentLength,
                        'Content-Type': 'application/x-www-form-urlencoded',
                    },
                    uri: 'https://www.ipma.pt/pt/geofisica/sismologia/',
                    body: formData,
                    method: 'POST',
                }, (err, res, html) => {
                    if (err || res.statusCode !== 200)
                        return next() //If there is an error jump to the next

                    //Scrapping data with X-Ray
                    x(html, '#divID0 > table > tr', {
                        date: '.block90w',
                        lat: 'td:nth-child(2)',
                        lon: 'td:nth-child(3)',
                        prof: 'td:nth-child(4)',
                        mag: 'td:nth-child(5)',
                        local: 'td:nth-child(6)',
                        degree: 'td:nth-child(7)',
                    })((error, obj) => {
                        const result = {
                            date: obj.date,
                            lat: obj.lat.replace(',', '.'),
                            lon: obj.lon.replace(',', '.'),
                            prof: obj.prof == '-' ? null : obj.prof.replace(',', '.'),
                            mag: obj.mag.replace(',', '.'),
                            local: obj.local,
                            degree: obj.degree,
                        }
                        //console.log(result);
                        upsertEarthquake(result); // save to DB
                        next() //This makes jump to the next for... iteration
                    })

                }) 
            }
        }
    }
}

我假设这upsertEarthquake是一个异步函数,或者是类型fire and forget。

如果出现错误,您可以使用next(),但如果您想中断循环,请使用reject()

if (err || res.statusCode !== 200)
    return reject(err)
于 2018-01-25T19:08:29.327 回答