0

我尝试了很多之前已在此处发布的建议修复,但我真的不知道我还能做什么。

这是项目结构

在刮刀中,我有 3 个功能。一个主,两个帮手。我在路由中导入 index.js。

const { scrape, scrapeChannel, iterateProducts } = require('../../services/scrapers.js');

然后我打电话给scrapeChannel('url'),但我一直没有定义scrape。当我 console.log(scrape) 它显示它存在于 index.js

scrapers.js 所有函数都以这种方式导出

module.exports.iterateProducts = function()  {logic}

这是整个导入的样子 进口

@Marc index.js

   const express = require('express');
const router = express.Router();

const { scrape, scrapeChannel, iterateProducts } = require('../../services/scrapers.js');

let content = scrapeChannel('Placeholder cuz private, its working');
console.log(content)
router.get('/', async (req,res) => {   
    const creators = [
        {name: "Test test", img: 'https://'},
        {name: "bRAH BDSF", img: 'https://'},
        {name: "123 123", img: 'https://'},
    ]

    res.render('site/show')
})

router.post('/', async (req,res) => {
    console.log(req.body)
    res.send('succes')
})


module.exports = router;

刮板.js

const puppeteer = require('puppeteer');


module.exports.scrape = function () {
    let list = Array.from(document.querySelectorAll(".selenium-price-normal"), e => e.innerText);
    let name = Array.from(document.querySelectorAll(" div.product-header > h2 > a"), e => e.innerText);
    return [list, name];
}

module.exports.iterateProducts = function (productName, productPrice){
    let titles = [];
    let prices = [];
    for (let i = 0; i < productName.length - 4; i++) {
        titles.push(productName[i]);
    }
    for (let y = 1; y < productPrice.length - 3; y = y + 2) {
        prices.push(productPrice[y].split(',')[0].replace(/\D/g,''))
    }
    let result = Object.fromEntries(titles.map((_, i) => [ titles[i], prices[i]]));
    result = Object.entries(result)
    console.log(result)
    return result
}

module.exports.scrapeChannel = async function (url) {
    console.log(scrape)
    let allProducts = [];
    const browser = await puppeteer.launch({
        headless: true,
    });

    const page = await browser.newPage();
    page.setDefaultNavigationTimeout(90000);
    await page.addStyleTag({ content: "{scroll-behavior: auto !important;}" });
    await page.setUserAgent('Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36');
    await page.setDefaultNavigationTimeout(0);
    await page.goto(url);
    while (await page.$("#product-list > div.paging > a.paging-next.selenium-WC-paging-next-button")) {
    await page.evaluate(scrape).then(async el => {
        try {
                if (await page.$('#onetrust-accept-btn-handler')) {
                    await page.click('#onetrust-accept-btn-handler')
                    await page.waitFor(1500)
                }
                let products = await iterateProducts(el[1], el[0])
                allProducts = products.concat(allProducts)
                await page.waitFor(1000)
                await page.click('#product-list > div.paging > a.paging-next.selenium-WC-paging-next-button')
                await page.waitForNavigation();
        } catch (error) {
            console.log('errrrr!!!!!!!!!!', error)
        }
    })}
    allProducts = allProducts.sort((a, b) => a[1] - b[1])
    return allProducts
}
4

2 回答 2

2

您会收到该错误,因为在声明scrapeChannel时 ,scrape未定义。所以,当你调用时scrapeChannelscrape函数闭包内部仍然没有定义。您可以在此处阅读有关 javascript 闭包的更多信息https://developer.mozilla.org/en-US/docs/Web/JavaScript/Closures

您不应该依赖在其他文件上声明的变量而不导入它们。

如果你想引用scrape里面的函数scrapeChannel,我会推荐以下任一选项:

const scrape = module.exports.scrape = () => {}
module.exports.scrapeChannel = () => {scrape();}
module.exports.scrape = () => {}
module.exports.scrapeChannel = () => {module.exports.scrape();}
于 2021-08-17T02:23:46.177 回答
0

Adding this to scrape and iterateProducts fixed the problem. But still if someone can explain why this is needed

await page.evaluate(this.scrape)
let products = await this.iterateProducts(el[1], el[0])
于 2021-08-16T19:51:09.637 回答