在一个需要:
- 使用登录凭据开始每个会话 + 通知/OTP
- 异步处理多个帐户
- 远程调试或监控会话
- 可以在打开的会话上以任意顺序请求 5+ 种不同的操作
我希望它是一个 puppeteer 集群浏览器,有一个远程调试端口来监控它,但无法集成 WsEndpoints
const puppeteer = require('puppeteer-extra');
const { Cluster } = require('puppeteer-cluster');
class SingleBrowserCluster {
browserInstance
options
constructor() {
if (SingleBrowserCluster._instance) {
//throw new Error("Singleton classes can't be instantiated more than once.")
}
else{
SingleBrowserCluster._instance = this;
// ... Your rest of the constructor code goes after this
console.log("pre optArgs");
const optArgs = [
'--remote-debugging-port=3002',//works if dockerised
'--remote-debugging-address=0.0.0.0',// at localhost.3002
'--window-size=1920,1080',
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-gpu', '--no-zygote', //'--single-process',
];
console.log("pre options");
this.options = {
headless: true,//for dockerization
args: optArgs,
defaultViewport: null,
waitUntil: 'networkidle2'
};
console.log("Do launch now");
return this;
}
}
async screenshotMethod({ page, data: url }) {
await page.goto(url);
console.log(`%c worker X is running on ${url} `, `color:green;`);
console.log("will wait 20 second");
await page.waitForTimeout(20000)
const path = url.replace(/[^a-zA-Z]/g, '_') + '.png';
await page.screenshot({ path });
};
async launchCluster (){
try {
this.browserInstance = await Cluster.launch({
concurrency: Cluster.CONCURRENCY_CONTEXT,
maxConcurrency: 3,
puppeteerOptions: this.options
})
console.log(this.browserInstance);
return this.browserInstance;
} catch (error) {
console.log(`%c ERRORR`,`color:red;`);
console.log(error);
}
}
}
const StartScraper = async (Url, useProxy) => new Promise((resolve, reject) => {
(async () => {
// get proxy url from environment files
const proxyUrl = process.env.PROXY_URL;
//--disable-dev-shm-usage
// By default, Docker runs a container with a /dev/shm shared memory space 64MB. This is typically too small for Chrome and will cause Chrome to crash when rendering large pages.
//his will write shared memory files into /tmp instead of /dev/shm. See crbug.com/736452 for more details.
var instanceOne1= new SingleBrowserCluster()//.launchCluster()
var browser= await instanceOne1.launchCluster();
browser.queue('https://www.google.com/', instanceOne1.screenshotMethod);
//THE PROBLEM LINE
const wsEndpoint = browser.wsEndpoint();
try {
const page = (await browser.pages())[0];
await page.goto(Url, { waitUntil: 'load' });
return resolve(wsEndpoint);
} catch (e) {
browser.close();
return reject(false);
}
})();
});
如何在 puppeteer-cluster 中拥有任何会话的 WSendpoint
(更多信息:我会将它们放在会话文件中,以便为我的下一个选择的后续操作提供其会话上的连接点
- localhost/StartScraper 创建 WSendpoint
- localhost/login==WSendpoint==>连接到现有会话做登录的东西
- localhost/listItems==WSendpoint==>连接到现有会话做 listItems 的东西...)