尝试为我的刮刀获取“名称”元素的文本时。我尝试使用完整的 Xpath 获取它并得到错误“TypeError:无法读取未定义的属性“getProperty”” 它仅在尝试获取频道标题时发生,在获取个人资料图片时有效。
scaper.js
const puppeteer = require('puppeteer');
async function scrapeChannel(url) {
const browser = await puppeteer.launch()
const page = await browser.newPage();
await page.goto(url);
// const xpath_expression = '/html/body/ytd-app/div/ytd-page-manager/ytd-browse[2]/div[3]/ytd-c4-tabbed-header-renderer/tp-yt-app-header-layout/div/tp-yt-app-header/div[2]/div[2]/div/div[1]/div/div[1]/ytd-channel-name/div/div/yt-formatted-string';
// await page.waitForXPath(xpath_expression);
const [el] = await page.$x('/html/body/ytd-app/div/ytd-page-manager/ytd-browse[2]/div[3]/ytd-c4-tabbed-header-renderer/tp-yt-app-header-layout/div/tp-yt-app-header/div[2]/div[2]/div/div[1]/div/div[1]/ytd-channel-name/div/div/yt-formatted-string');
const text = await el.getProperty('textContent');
const name = await text.jsonValue();
const [el2] = await page.$x('//*[@id="img"]');
const src = await el2.getProperty('src');
const avatarURL = await src.jsonValue();
browser.close();
console.log({name, avatarURL});
return { name, avatarURL}
}
}
scrapeChannel('https://www.youtube.com/channel/UC8butISFwT-Wl7EV0hUK0BQ')
索引.html
function newEl(type, attrs = {}) {
const el = document.createElement(type);
for (let attr in attrs) {
const value = attrs[attr];
if (attr == "innerText") el.innerText = value;
else el.setAttribute(attr, value);
}
return el;
}