0

我正在尝试创建一个价格抓取应用程序,它将获取一组 URL,执行请求,提取所需的数据(价格),并在完成所有 URL 后,执行 res.send 以使用抓取的数据填充浏览器.

下面的代码抱怨无法访问 url、list 和 count。这怎么可能?

    var express = require('express');
    var request = require('request');
    var cheerio = require('cheerio');
    var app     = express();

    // Initiate Express GET Handler

    app.use('/', function(req, res){

        //URL to Crawl
        var urls = [
            "http://www.ecommerce.com/products/test",
            "http://www.ecommerce.com/products/test1",
            "http://www.ecommerce.com/products/test2",
            "http://www.ecommerce.com/products/test3",
            "http://www.ecommerce.com/products/test4",
            "http://www.ecommerce.com/products/test5"
            ];    
        var list = [];
        var count = 0;

        scrape(function(){

            //check if the loop has completed and all request data returned.
            if(count < urls.length){
                res.send(list);
            }

        });

    });

    function scrape(callback){
            for(var i = 0; i < urls.length; i++){

                request(urls[i], function(error, response, html){

                    if(!error && response.statusCode == 200){
                        // LOAD Cherio (jQuery) on the webpage
                        var $ = cheerio.load(html);
                        var name = $(".name").text();
                        var mpn = $(".specs.block").contents().get(6).nodeValue.trim();
                        var jsontemp = {"MPN": "", "Name": "", "PriceList": {}};

                        jsontemp.MPN = mpn;
                        jsontemp.Name = name;

                        // Traverse the DOM to get tr tags and extract info

                        $(".wide-table tbody tr").each(function (i, row) {
                            var $row = $(row),
                                merchant = $row. attr("class").trim(),
                                total = $row.children(".total").text();
                                jsontemp.PriceList[merchant] = merchant;
                                jsontemp.PriceList[merchant] = total;
                                list.push(jsontemp);
                                count++;
                                callback();
                        });
                    }
                    else{
                        console.log(error);
                    }
                    //callback();
                });
            }
    }


    app.listen('8000')
    console.log('Server is running on Port 8000');
    exports = module.exports = app;     
4

0 回答 0