嗨,我最近一直在尝试制作一个 Web 应用程序来抓取网站以获取各种信息。为了将数据从实际执行抓取的页面传输到结果页面,我使用 connect-flash 将其存储在客户端会话中。
我的 app.js 如下。
var express = require('express')
, flash = require('connect-flash')
, util = require('util')
, bodyParser = require('body-parser')
, request = require('request')
, cheerio = require("cheerio")
, search = require('./search');
var app = express();
app.use(express.static('public'));
// configure Express
app.configure(function() {
app.set('views', __dirname + '/views');
app.set('view engine', 'ejs');
app.use(express.logger());
app.use(express.cookieParser('keyboard cat'));
app.use(express.session({ key: 'sid', cookie: { maxAge: 60000 }}));
app.use(flash());
app.use(app.router);
});
app.get('/', function(req, res){
res.render('index');
});
// set up routes
app.get('/scrape', function(req, res){
let strLink = req.query.link;
makeRequest(strLink, function (objData) {
req.flash('objData', objData)
res.redirect('/result');
});
});
app.get('/result', function(req, res){
let objData = req.flash('objData');
// Stops server crashing on refresh
if (!objData[0]) {
res.redirect('/');
}
let arrEmails = objData[0].EmailAddresses,
arrPhones = objData[0].PhoneNumbers,
arrPostcodes = objData[0].Postcodes,
arrLinkedIn = objData[0].Social.linkedin,
arrFacebook = objData[0].Social.facebook,
arrTwitter = objData[0].Social.twitter;
res.render(
'result',
{
emails : arrEmails,
phones : arrPhones,
postcodes : arrPostcodes,
linkedIn : arrLinkedIn,
facebook : arrFacebook,
twitter : arrTwitter,
}
);
});
function makeRequest(url, callback) {
console.log("Request to " + url + " performed");
let objData = {
EmailAddresses : [],
PhoneNumbers : [],
Postcodes : []
};
request.get(url, function (err, res, html) {
if (!err && res.statusCode == 200) {
const $ = cheerio.load(html);
const htmlBody = $("body").html();
const textBodyTrimmed = $("body").text().replace(/\s\s+/g, ' ');
let arrWebPageHrefs = [];
$("a").each(function (i, el) {
const link = $(el).attr("href");
arrWebPageHrefs.push(link);
});
objData.Social = search.social(arrWebPageHrefs);
objData.EmailAddresses = search.email(htmlBody);
objData.PhoneNumbers = search.phone(textBodyTrimmed);
objData.Postcodes = search.postcode(textBodyTrimmed);
callback(objData);
} else {
console.log("error:" + err);
}
});
}
app.listen(process.env.PORT || 3000);
这在本地工作得很好,但是当我尝试部署到 Heroku 时,它无法工作(https://pure-inlet-39487.herokuapp.com/),如您所见。
我检查了日志并打印出以下错误。
2019-11-13T18:36:23.610090+00:00 app[web.1]: ::ffff:10.63.30.156 - - [13/Nov/2019:18:36:23 +0000] "GET /scrape?link=http%3A%2F%2Falban-financial.co.uk%2F HTTP/1.1" 302 58 "https://pure-inlet-39487.herokuapp.com/" "Mozilla/5.0 (Windows NT 6.3; Win64; x64; rv:70.0) Gecko/20100101 Firefox/70.0"
2019-11-13T18:36:23.704092+00:00 app[web.1]: ::ffff:10.63.30.156 - - [13/Nov/2019:18:36:23 +0000] "GET /result HTTP/1.1" 302 46 "https://pure-inlet-39487.herokuapp.com/" "Mozilla/5.0 (Windows NT 6.3; Win64; x64; rv:70.0) Gecko/20100101 Firefox/70.0"
2019-11-13T18:36:23.704603+00:00 app[web.1]: TypeError: Cannot read property 'EmailAddresses' of undefined
2019-11-13T18:36:23.704605+00:00 app[web.1]: at /app/app.js:43:35
2019-11-13T18:36:23.704607+00:00 app[web.1]: at callbacks (/app/node_modules/express/lib/router/index.js:164:37)
2019-11-13T18:36:23.704609+00:00 app[web.1]: at param (/app/node_modules/express/lib/router/index.js:138:11)
2019-11-13T18:36:23.704610+00:00 app[web.1]: at pass (/app/node_modules/express/lib/router/index.js:145:5)
2019-11-13T18:36:23.704612+00:00 app[web.1]: at Router._dispatch (/app/node_modules/express/lib/router/index.js:173:5)
2019-11-13T18:36:23.704614+00:00 app[web.1]: at Object.router [as handle] (/app/node_modules/express/lib/router/index.js:33:10)
2019-11-13T18:36:23.704615+00:00 app[web.1]: at next (/app/node_modules/connect/lib/proto.js:174:15)
2019-11-13T18:36:23.704617+00:00 app[web.1]: at Object.handle (/app/node_modules/connect-flash/lib/flash.js:21:5)
2019-11-13T18:36:23.704618+00:00 app[web.1]: at next (/app/node_modules/connect/lib/proto.js:174:15)
2019-11-13T18:36:23.704620+00:00 app[web.1]: at Object.session [as handle] (/app/node_modules/express-session/index.js:398:7)
这让我相信这可能是会话没有正确存储在 Heroku 上的问题。
我需要做一些配置才能让它在heroku上工作吗?
非常感谢任何帮助