0

嗨,我最近一直在尝试制作一个 Web 应用程序来抓取网站以获取各种信息。为了将数据从实际执行抓取的页面传输到结果页面,我使用 connect-flash 将其存储在客户端会话中。

我的 app.js 如下。

var express = require('express')
  , flash = require('connect-flash')
  , util = require('util')
  , bodyParser = require('body-parser')
  , request = require('request')
  , cheerio = require("cheerio")
  , search = require('./search');

var app = express();
app.use(express.static('public'));
// configure Express
app.configure(function() {
    app.set('views', __dirname + '/views');
    app.set('view engine', 'ejs');
    app.use(express.logger());
    app.use(express.cookieParser('keyboard cat'));
    app.use(express.session({ key: 'sid', cookie: { maxAge: 60000 }}));
    app.use(flash());
    app.use(app.router);
});

app.get('/', function(req, res){
    res.render('index');
});

// set up routes
app.get('/scrape', function(req, res){
    let strLink = req.query.link;
    makeRequest(strLink, function (objData) {
        req.flash('objData', objData)
        res.redirect('/result');
    });
});

app.get('/result', function(req, res){
    let objData = req.flash('objData');

    // Stops server crashing on refresh
    if (!objData[0]) {
        res.redirect('/');
    }

    let arrEmails    = objData[0].EmailAddresses,
        arrPhones    = objData[0].PhoneNumbers,
        arrPostcodes = objData[0].Postcodes,
        arrLinkedIn  = objData[0].Social.linkedin,
        arrFacebook  = objData[0].Social.facebook,
        arrTwitter   = objData[0].Social.twitter;

    res.render(
        'result',
        {
            emails    : arrEmails,
            phones    : arrPhones,
            postcodes : arrPostcodes,
            linkedIn  : arrLinkedIn,
            facebook  : arrFacebook,
            twitter   : arrTwitter,

        }
    );
});

function makeRequest(url, callback) {
    console.log("Request to " + url + " performed");

    let objData = {
        EmailAddresses : [],
        PhoneNumbers   : [],
        Postcodes      : []
    };

    request.get(url, function (err, res, html) {
        if (!err && res.statusCode == 200) {
            const $ = cheerio.load(html);
            const htmlBody = $("body").html();
            const textBodyTrimmed = $("body").text().replace(/\s\s+/g, ' ');

            let arrWebPageHrefs = [];
            $("a").each(function (i, el) {
                const link = $(el).attr("href");
                arrWebPageHrefs.push(link);
            });

            objData.Social          = search.social(arrWebPageHrefs);
            objData.EmailAddresses  = search.email(htmlBody);
            objData.PhoneNumbers    = search.phone(textBodyTrimmed);
            objData.Postcodes       = search.postcode(textBodyTrimmed);
            callback(objData);
        } else {
            console.log("error:" + err);
        }
    });
}

app.listen(process.env.PORT || 3000);

这在本地工作得很好,但是当我尝试部署到 Heroku 时,它无法工作(https://pure-inlet-39487.herokuapp.com/),如您所见。

我检查了日志并打印出以下错误。

2019-11-13T18:36:23.610090+00:00 app[web.1]: ::ffff:10.63.30.156 - - [13/Nov/2019:18:36:23 +0000] "GET /scrape?link=http%3A%2F%2Falban-financial.co.uk%2F HTTP/1.1" 302 58 "https://pure-inlet-39487.herokuapp.com/" "Mozilla/5.0 (Windows NT 6.3; Win64; x64; rv:70.0) Gecko/20100101 Firefox/70.0"
2019-11-13T18:36:23.704092+00:00 app[web.1]: ::ffff:10.63.30.156 - - [13/Nov/2019:18:36:23 +0000] "GET /result HTTP/1.1" 302 46 "https://pure-inlet-39487.herokuapp.com/" "Mozilla/5.0 (Windows NT 6.3; Win64; x64; rv:70.0) Gecko/20100101 Firefox/70.0"
2019-11-13T18:36:23.704603+00:00 app[web.1]: TypeError: Cannot read property 'EmailAddresses' of undefined
2019-11-13T18:36:23.704605+00:00 app[web.1]: at /app/app.js:43:35
2019-11-13T18:36:23.704607+00:00 app[web.1]: at callbacks (/app/node_modules/express/lib/router/index.js:164:37)
2019-11-13T18:36:23.704609+00:00 app[web.1]: at param (/app/node_modules/express/lib/router/index.js:138:11)
2019-11-13T18:36:23.704610+00:00 app[web.1]: at pass (/app/node_modules/express/lib/router/index.js:145:5)
2019-11-13T18:36:23.704612+00:00 app[web.1]: at Router._dispatch (/app/node_modules/express/lib/router/index.js:173:5)
2019-11-13T18:36:23.704614+00:00 app[web.1]: at Object.router [as handle] (/app/node_modules/express/lib/router/index.js:33:10)
2019-11-13T18:36:23.704615+00:00 app[web.1]: at next (/app/node_modules/connect/lib/proto.js:174:15)
2019-11-13T18:36:23.704617+00:00 app[web.1]: at Object.handle (/app/node_modules/connect-flash/lib/flash.js:21:5)
2019-11-13T18:36:23.704618+00:00 app[web.1]: at next (/app/node_modules/connect/lib/proto.js:174:15)
2019-11-13T18:36:23.704620+00:00 app[web.1]: at Object.session [as handle] (/app/node_modules/express-session/index.js:398:7)

这让我相信这可能是会话没有正确存储在 Heroku 上的问题。

我需要做一些配置才能让它在heroku上工作吗?

非常感谢任何帮助

4

0 回答 0