0

在此处输入图像描述我是 puppeteer 的新手,我正在尝试从此页面上的表格中抓取数据: https ://www.ewrc-results.com/season/1995/wrc-1/ 。

这是 DOM 中表格的屏幕截图:

这是 DOM 中表格的屏幕截图。

我正在使用的代码如下:

const puppeteer = require('puppeteer');

async function getChampTable(year) {
    try {

        const browser = await puppeteer.launch();

        const page = await browser.newPage();

        const url = `https://www.ewrc-results.com/season/${year}/1-wrc/`;

        await page.goto(url, {waitUntil: 'domcontentloaded'});

        const driverTable = await page.evaluate(() => {
            const grabFromRow = (row, classname) => row
                .querySelector(`td.${classname}`)
                .innerText
                .trim()

            const DRIVER_ROW_SELECTOR = 'tr.table_sude'

            const data = []

            const driverRows = document.querySelectorAll(DRIVER_ROW_SELECTOR)

            for (const tr of driverRows) {
                data.push({
                    position: grabFromRow(tr, 'points-pos'),
                    name: grabFromRow(tr, 'a'),
                    pointsTotal: grabFromRow(tr, 'points-total')
                })
            }
            return data
        })

        console.log(JSON.stringify(driverTable, null, 2))

    } catch (error) {
        console.log(error)
    }
}

getChampTable(1995);

这是我收到的错误:

Error: Evaluation failed: TypeError: Cannot read property 'innerText' of null
    at grabFromRow (__puppeteer_evaluation_script__:4:5)
    at __puppeteer_evaluation_script__:16:12
    at ExecutionContext._evaluateInternal (/Users/jamescowell/Desktop/Projects/Bobble/scraper/node_modules/puppeteer/lib/ExecutionContext.js:102:19)
    at processTicksAndRejections (internal/process/task_queues.js:97:5)
    at async ExecutionContext.evaluate (/Users/jamescowell/Desktop/Projects/Bobble/scraper/node_modules/puppeteer/lib/ExecutionContext.js:33:16)
    at async getChampTable (/Users/jamescowell/Desktop/Projects/Bobble/scraper/index.js:14:23)
  -- ASYNC --
    at ExecutionContext.<anonymous> (/Users/jamescowell/Desktop/Projects/Bobble/scraper/node_modules/puppeteer/lib/helper.js:94:19)
    at DOMWorld.evaluate (/Users/jamescowell/Desktop/Projects/Bobble/scraper/node_modules/puppeteer/lib/DOMWorld.js:89:24)
    at processTicksAndRejections (internal/process/task_queues.js:97:5)
  -- ASYNC --
    at Frame.<anonymous> (/Users/jamescowell/Desktop/Projects/Bobble/scraper/node_modules/puppeteer/lib/helper.js:94:19)
    at Page.evaluate (/Users/jamescowell/Desktop/Projects/Bobble/scraper/node_modules/puppeteer/lib/Page.js:591:14)
    at Page.<anonymous> (/Users/jamescowell/Desktop/Projects/Bobble/scraper/node_modules/puppeteer/lib/helper.js:95:27)
    at getChampTable (/Users/jamescowell/Desktop/Projects/Bobble/scraper/index.js:14:34)
    at processTicksAndRejections (internal/process/task_queues.js:97:5)

任何帮助将不胜感激!

4

1 回答 1

0

页面上有tr.table_sude一些表格,但并非所有表格都需要子选择器。您需要使选择器更具体。(还需要一些更正来提取玩家姓名)。

'use strict';

const puppeteer = require('puppeteer');

async function getChampTable(year) {
    try {
        const browser = await puppeteer.launch();
        const page = await browser.newPage();
        const url = `https://www.ewrc-results.com/season/${year}/1-wrc/`;
        await page.goto(url, {waitUntil: 'domcontentloaded'});

        const driverTable = await page.evaluate(() => {
            const grabFromRow = (row, classname) => row
                .querySelector(classname)
                .innerText
                .trim()

            const DRIVER_ROW_SELECTOR = 'div#points + table tr.table_sude'
            const data = []
            const driverRows = document.querySelectorAll(DRIVER_ROW_SELECTOR)

            for (const tr of driverRows) {
                data.push({
                    position: grabFromRow(tr, 'td.points-pos'),
                    name: grabFromRow(tr, 'a'),
                    pointsTotal: grabFromRow(tr, 'td.points-total')
                })
                console.log(data);
            }
            return data
        })
        console.log(JSON.stringify(driverTable, null, 2))
    } catch (error) {
        console.log(error)
    }
}

getChampTable(1995);
于 2020-05-19T18:05:59.507 回答