我正在创建一个项目,使用 TimelineJS 抓取 Wikipedia JP 在 Netlify 函数上的数据。
TimelineJS 需要 JSON 及其确定的模式,我从 Wikiepdia JP 中抓取了多个元素来获取 JSON 数据。
我需要将这些元素设置为数组,并在 HTTP 请求时将正文作为文本返回。
我写的代码在这里。
const axios = require('axios')
const cheerio = require('cheerio')
const moment = require('moment')
const ogs = require('open-graph-scraper')
const url = 'https://ja.wikipedia.org/wiki/'
moment.locale('ja')
const getOGPImagePath = async (options) => {
return await ogs(options)
.then((response) => {
// console.log(response)
const ogImagePath = response.data.ogImage.url
return ogImagePath
})
.catch((error) => {
console.log(error)
})
}
exports.handler = async (event, context) => {
const date = event.queryStringParameters.date
const endpoint = url + date
const momentDate = moment(date, 'M月D日')
const day = momentDate.format('D')
const month = momentDate.format('M')
const html = await axios.get(encodeURI(endpoint),{ headers: { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100'}})
const $ = await cheerio.load(html.data, { decodeEntities: false })
let data = []
$('.mw-parser-output > ul').first().find('li').not('.mw-empty-elt').each(async (i,elem) => {
const year = String($(elem).text().match(/\d{1,4}年/)).replace(/年/, '')
const content = String($(elem).text().match(/\-\s(.*)/))
const contentHtml = $(elem).html().replace(/((^<a(.*)>\d{1,4}年<\/a>(.*)\s\-\s)|(^\d{1,4}年(.*)\s\-\s))/,'')
const $c = await cheerio.load(contentHtml, { decodeEntities: false })
const mainUrl = await 'https://ja.wikipedia.org' + $c('a').first().attr('href')
const options = {
url: mainUrl
}
// console.log(mainUrl) <- works fine
const ogImagePath = await getOGPImagePath(options)
// console.log(ogImagePath) <- works fine
data.push({
media: {
url: ogImagePath,
},
start_date: {
year: year,
month: month,
day: day
},
text: {
text: contentHtml
},
background: ogImagePath
})
// console.log(data) <- works fine
})
console.log(data) // <- Does not works fine. The result is '[]' empty array.
return {
statusCode: 200,
body: JSON.stringify({ events: data }),
headers: {
'Content-Type': 'application/json; charset=utf8',
'Access-Control-Allow-Origin': "*"
}
};
}
有没有人有这种情况的解决方案?