我对木偶师很陌生。我昨天开始,我正在尝试制作一个程序,它可以遍历一个 url,一个接一个地增量存储玩家 ID,并使用 neDB 保存玩家统计信息。有数千个链接需要翻阅,我发现如果我使用 for 循环,我的计算机基本上会崩溃,因为 1,000 个 Chromium 试图同时打开所有这些。有没有更好的方法或正确的方法来做到这一点?任何意见,将不胜感激。
const puppeteer = require('puppeteer');
const Datastore = require('nedb');
const database = new Datastore('database.db');
database.loadDatabase();
async function scrapeProduct(url){
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto(url);
let attributes = [];
//Getting player's name
const [name] = await page.$x('//*[@id="ctl00_ctl00_ctl00_Main_Main_name"]');
const txt = await name.getProperty('innerText');
const playerName = await txt.jsonValue();
attributes.push(playerName);
//Getting all 12 individual stats of the player
for(let i = 1; i < 13; i++){
let vLink = '//*[@id="ctl00_ctl00_ctl00_Main_Main_SectionTabBox"]/div/div/div/div[1]/table/tbody/tr['+i+']/td[2]';
const [e1] = await page.$x(vLink);
const val = await e1.getProperty('innerText');
const skillVal = await val.jsonValue();
attributes.push(skillVal);
}
//creating a player object to store the data how i want (i know this is probably ugly code and could be done in a much better way)
let player = {
Name: attributes[0],
Athleticism: attributes[1],
Speed: attributes[2],
Durability: attributes[3],
Work_Ethic: attributes[4],
Stamina: attributes[5],
Strength: attributes[6],
Blocking: attributes[7],
Tackling: attributes[8],
Hands: attributes[9],
Game_Instinct: attributes[10],
Elusiveness: attributes[11],
Technique: attributes[12],
};
database.insert(player);
await browser.close();
}
//For loop to loop through 1000 player links... Url.com is swapped in here because the actual url is ridiculously long and not important.
for(let i = 0; i <= 1000; i++){
let link = 'https://url.com/?id='+i+'§ion=Ratings';
scrapeProduct(link);
console.log("Player #" + i + " scrapped");
}