0

长话短说,我制作了一个用于网络抓取的应用程序,为了让它一次能够同时运行超过 1 个进程(打开超过 1 个 Chromium),我使用了 puppeteer-cluster。我让它一次运行多个进程,但集群之后不会停止,它会永久运行。一路走来,遇到如下错误(一)

await cluster.close(); // Gives the following error -> cluster.close is not a function (1)

如果我这样使用它(2)

(await cluster).close(); // This returns no error (2)

Anywho,主要问题是当代码到达集群关闭的行时(在代码的末尾,检查下面的代码),它会冻结在那里。不会返回任何错误,因为我试图捕捉它,但同时它也不会崩溃。

const puppeteer = require('puppeteer');
const { Cluster }  = require('puppeteer-cluster/');

function delay(time) {
 return new Promise(function(resolve) { 
   setTimeout(resolve, time)
 });
}


(async () => {

  const cluster = Cluster.launch({
    concurrency: Cluster.CONCURRENCY_BROWSER,
    maxConcurrency: 2,
  });

  const test = async () => {

    const user = process.argv[2];
    const pass = process.argv[3];
    const smis = process.argv[4];
    const nrinreg = process.argv[5];



    const browser = await puppeteer.launch({headless: false, defaultViewport: null, args:['--start-fullscreen']}); // you can also use '--start-fullscreen']});
    const page = await browser.newPage();
    //await page.setViewport({width: 1200, height: 900});
    await page._client.send('Page.setDownloadBehavior', {behavior: 'allow', downloadPath: './'+smis+''}).catch(e => {});
    await page.goto('https://aplicatii2014.smis.fonduri-ue.ro/smis2014app/').catch(e => {});
    await delay(2000);
    // await page.waitFor(3000);

    //scriere credentiale pt elementele de tip HTML user&parola
    await page.type('#j_idt38 > .step-content > .step-pane > .col-md-12 > .form-group > input[name="j_idt38:utilizator"]', user).catch(e => {});
    await delay(1000);
    await page.type('#j_idt38 > .step-content > .step-pane > .col-md-12 > .form-group > input[name="j_idt38:pass"]', pass).catch(e => {});
    await delay(1000);
    page.click('#j_idt38 > .actions > a').catch(e => {});

    await delay(2000);

    page.click('#idPanelGroup > #headerPanel > div > #j_idt18 > tbody > tr > #topMenuCell > #j_idt22').catch(e => {});
    await delay(2000);
    await page.type('#dialogCereriFinantare > .ui-dialog-content > #formCereriFinantare > table > tbody > tr > td > input[id="formCereriFinantare:idSmisAll"]', smis).catch(e => {});
    await delay(1000);
    page.click('#dialogCereriFinantare > .ui-dialog-content > #formCereriFinantare > table > tbody > tr > td > a[id="formCereriFinantare:commandBtnSearch"]').catch(e => {});
    await delay(1000);
    page.click('span.ui-chkbox-icon.ui-icon.ui-icon-blank.ui-c').catch(e => {});
    await delay(1000);
    await page.type('#dialogAcordConfidentialitate > .ui-dialog-content > #formAcordConfidentialitate > div[id="formAcordConfidentialitate:j_idt167"] > .ui-scrollpanel-container > .ui-scrollpanel-content > .col-md-12 > .row > .col-md-3 > input[id="formAcordConfidentialitate:CNP"]', pass).catch(e => {});
    await delay(1000);
    page.click('#dialogAcordConfidentialitate > .ui-dialog-content > #formAcordConfidentialitate > div[id="formAcordConfidentialitate:j_idt167"] > .ui-scrollpanel-container > .ui-scrollpanel-content > .col-md-12 > .row > a[id="formAcordConfidentialitate:btnConfirmContent"]').catch(e => {});
    await delay(1000);
    page.click('#dialogCereriFinantare > .ui-dialog-content > #formCereriFinantare > div > div > a > .ui-icon-seek-end').catch(e => {});
    await delay(2000);

    const doc_details = await page.evaluate(() => {
    //Extract each doc's basic details
    let table = document.querySelector('#dialogCereriFinantare > .ui-dialog-content > #formCereriFinantare > div[id="formCereriFinantare:tableCereriFinantare"] > .ui-datatable-tablewrapper > table > tbody');
    let doc_panels = Array.from(table.children); 
    // Loop through each doc and get their details 
    let doc_info = doc_panels.map(doc_panel => {
      let codsmis = doc_panel.querySelector("tr > td:nth-child(1)").textContent;
      let titlu = doc_panel.querySelector("tr > td:nth-child(2)").textContent;
      let versiune = doc_panel.querySelector("tr > td:nth-child(3)").textContent;
      let contractare = doc_panel.querySelector("tr > td:nth-child(4)").textContent;

      return { versiune, contractare };
    });


    return doc_info;
    });


    doc_details.sort((a, b) => (parseInt(a.versiune) < parseInt(b.versiune) ? 1 : -1));
    let res = new Array();
    res = doc_details.filter(a => a.contractare.length > 0);


  /**
   * Get first elem from a array
   * // [...res].shift()
   */
   const [first] = res;
  /**
   * If no element exist 
   */

   if (first === null) {
    page.click('#dialogCereriFinantare > .ui-dialog-content > #formCereriFinantare > div > div > a > .ui-icon-seek-prev').catch(e => { });
  }
  else {

    let version = first["versiune"];

    await delay(1000);

    const example = await page.$('#dialogCereriFinantare > .ui-dialog-titlebar');
    const bounding_box = await example.boundingBox();

    await page.mouse.move(bounding_box.x + bounding_box.width / 2, bounding_box.y + bounding_box.height / 2);
    await page.mouse.down();
    await page.mouse.move(126, 19);
    await page.mouse.up();
    
    await delay(1000);
    await page.waitForXPath("//tr/td[3][contains(., '"+ version +"')]");  
    const [projects] = await page.$x("//tr/td[3][contains(., '"+ version +"')]");
    projects.click().catch(e => {});

    await delay(2000);
    await page.goto("https://aplicatii2014.smis.fonduri-ue.ro/smis2014app/faces/pages/comunicare.xhtml").catch(e => {});
    await delay(2000);

    await page.evaluate(() => {
      document.querySelector('#j_idt68 > div > #idPanelContent > #j_idt140 > #j_idt140_content > #j_idt142 > div > .ui-datatable-tablewrapper > table > tbody').scrollIntoView();
    }).catch(e => {});


    await delay(2000).catch(e => {});
    
    const [com] = await page.$x("//tr/td[1][contains(., '37114')]").catch(e => {});

    if (com){
      com.click().catch(e => {});
    }
    else
    {
      let [com2] = await page.$x("//tr/td[1][contains(., '"+ nrinreg +"')]");
      do{
        page.click('#j_idt68 > div > #idPanelContent > #j_idt140 > #j_idt140_content > #j_idt142 > div > div[id="j_idt142:idComunicareTable_paginator_bottom"] > .ui-paginator-next').catch(e => {});
        await delay(2000).catch(e => {});
        let [com2] = await page.$x("//tr/td[1][contains(., '"+ nrinreg +"')]").catch(e => {});
        if (com2){
          break;
        }
      }
      while(!com2);

      
      let [com3] = await page.$x("//tr/td[1][contains(., '"+ nrinreg +"')]").catch(e => {});
      com3.click().catch(e => {});

      await delay(2000).catch(e => {});

      await page.evaluate(() => {
        document.querySelector('#j_idt68 > div > #idPanelContent > #j_idt140 > div > #idDetaliicomunicare').scrollIntoView();
      }).catch(e => {});

      await delay(2000).catch(e => {});
    }

    await delay(2000);

    const listadownload = await page.$$('#j_idt68 > div > #idPanelContent > div > div > #idDetaliicomunicare > div > div > div > ul > li > .ui-treenode-children  > li > span');

    for (let iteminlistadownload of listadownload){
      await iteminlistadownload.click({button: 'right',}).catch(e => {});
      await delay(2000);
      let [viz] = await page.$x('//*[@id="idDetaliicomunicare:j_idt163"]/ul/li/a').catch(e => {});
      viz.click().catch(e => {});
      await delay(2000);
    }


  }

 
  await delay(3000);
  await browser.close();
  
};


  
  (await cluster).queue(test);
  
  
  (await cluster).idle();

  
  (await cluster).close();

  


})();


我一直在网上寻找解决方案,在 GitHub 上寻找修复程序,但似乎没有任何效果。我做错了什么,根本不会终止进程?PS:我添加了整个代码,希望它是相关的。

4

1 回答 1

0

Cluster.launch返回一个承诺。如果你只是调用const cluster = Cluster.launch,现在cluster是 Promise,当你调用时(await cluster).close();(await cluster)将返回一个Cluster实例 -> 它工作!

让我们cluster用作Cluster实例而不是 Promise 对象:

  const cluster = await Cluster.launch({ // wait until it "launch" finish
    concurrency: Cluster.CONCURRENCY_BROWSER,
    maxConcurrency: 2,
  });
于 2021-03-02T01:27:59.690 回答