1

在一个需要:

  • 使用登录凭据开始每个会话 + 通知/OTP
  • 异步处理多个帐户
  • 远程调试或监控会话
  • 可以在打开的会话上以任意顺序请求 5+ 种不同的操作

我希望它是一个 puppeteer 集群浏览器,有一个远程调试端口来监控它,但无法集成 WsEndpoints

const puppeteer = require('puppeteer-extra');

const { Cluster } = require('puppeteer-cluster');

class SingleBrowserCluster {

  browserInstance
  options

  constructor() {
   
      if (SingleBrowserCluster._instance) {
          
        //throw new Error("Singleton classes can't be instantiated more than once.")
      }
else{
      SingleBrowserCluster._instance = this;
      // ... Your rest of the constructor code goes after this

      console.log("pre optArgs");
      const optArgs = [
          '--remote-debugging-port=3002',//works if dockerised 
          '--remote-debugging-address=0.0.0.0',// at localhost.3002
          '--window-size=1920,1080',
          '--no-sandbox',
          '--disable-setuid-sandbox',
          '--disable-gpu', '--no-zygote',    //'--single-process',     
      ];

      console.log("pre options");
      this.options = {
          headless: true,//for dockerization
          args: optArgs,
          defaultViewport: null,
          waitUntil: 'networkidle2'
      };

      console.log("Do launch now");


      return this; 
    }
  }
  
  
  async screenshotMethod({ page, data: url }) {
      
      await page.goto(url);
      
      console.log(`%c worker X is running on ${url} `, `color:green;`);
      console.log("will wait 20 second");
      await page.waitForTimeout(20000)
      
      const path = url.replace(/[^a-zA-Z]/g, '_') + '.png';
      await page.screenshot({ path });
  };
  
  
      
  async launchCluster (){
    try {
      
            this.browserInstance =  await Cluster.launch({
                concurrency: Cluster.CONCURRENCY_CONTEXT,
          maxConcurrency: 3,
          puppeteerOptions: this.options
      })

      console.log(this.browserInstance);
      return this.browserInstance;
  
  } catch (error) {
      console.log(`%c ERRORR`,`color:red;`);
      console.log(error);
  }
  }
  
}


const StartScraper = async (Url, useProxy) => new Promise((resolve, reject) => {
  (async () => {
    // get proxy url from environment files
    const proxyUrl = process.env.PROXY_URL;

//--disable-dev-shm-usage
    // By default, Docker runs a container with a /dev/shm shared memory space 64MB. This is typically too small for Chrome and will cause Chrome to crash when rendering large pages. 
    //his will write shared memory files into /tmp instead of /dev/shm. See crbug.com/736452 for more details.



    var instanceOne1= new SingleBrowserCluster()//.launchCluster()

    var browser= await instanceOne1.launchCluster();
    browser.queue('https://www.google.com/', instanceOne1.screenshotMethod);

//THE PROBLEM LINE
    const wsEndpoint = browser.wsEndpoint();



    try {
      const page = (await browser.pages())[0];
      await page.goto(Url, { waitUntil: 'load' });
      return resolve(wsEndpoint);
    } catch (e) {
      browser.close();
      return reject(false);
    }
  })();
});

如何在 puppeteer-cluster 中拥有任何会话的 WSendpoint

(更多信息:我会将它们放在会话文件中,以便为我的下一个选择的后续操作提供其会话上的连接点

  • localhost/StartScraper 创建 WSendpoint
  • localhost/login==WSendpoint==>连接到现有会话做登录的东西
  • localhost/listItems==WSendpoint==>连接到现有会话做 listItems 的东西...)
4

0 回答 0