1

我想通过循环打开多个选项卡并分别从每个选项卡下载文件来下载多个文件,但它只是在不同的下载路径中下载最后一个选项卡的文件。

这是一个示例代码,它无法通过 puppeteer 在单独的下载路径中下载多个文件。

const puppeteer = require('puppeteer');
const path = require('path');

async function download(i, browser) {
  const page = await browser.newPage();
  await page.goto('https://vinay-jtc.github.io/test-pdf', {
    waitUntil: 'networkidle2',
  });
  const billsData = await page.$$('.pdf');
  const downloadPath = path.resolve(`/home/vanni/download/${i}`);
  await page._client.send('Page.setDownloadBehavior', {
    behavior: 'allow',
    downloadPath: downloadPath,
  });
  await billsData[i].click();
  await new Promise((resolve) => setTimeout(resolve, 5000));
}

async function simplefileDownload() {
  const browser = await puppeteer.launch({ headless: false });
  const promises = [];
  for (let i = 0; i < 4; i++) {
    promises.push(download(i, browser));
  }
  await Promise.all(promises).then(() => {
    browser.close();
  });
}

simplefileDownload();

有人可以帮助解决这个问题吗?

4

1 回答 1

0

Puppeteer 是为运行e2e测试而设计的。它可以用于网页抓取和其他事情,但这是它的意图。如果您考虑如何使用 chrome,您不可能完全并行下载 3 个文件。您需要打开 1 个选项卡,按“下载”,然后转到下一个选项卡按“下载”,依此类推。

即使切换到新选项卡,下载本身也会继续。

所以在脚本中你必须做同样的事情。一个接一个地开始下载,但在开始后,下载本身是在后台完成的。

例如:

const puppeteer = require('puppeteer');
const path = require('path');

async function startDownload(i, browser) {
  const page = await browser.newPage();
  await page.goto('https://vinay-jtc.github.io/test-pdf', {
    waitUntil: 'networkidle2',
  });
  const billsData = await page.$$('.pdf');
  const downloadPath = path.resolve(`/home/vanni/download/${i}`);
  await page._client.send('Page.setDownloadBehavior', {
    behavior: 'allow',
    downloadPath: downloadPath,
  });
  await billsData[i].click();
}

// ideally you would watch the filesystem for being able to return at the moment
// the file was donwloaded. You can achieve that using the `chokidar` module for example.
async function waitForDownload(ms) {
    await new Promise((resolve) => setTimeout(resolve, ms));
}

async function simplefileDownload() {
  const browser = await puppeteer.launch({ headless: false });
  const promises = [];
  for (let i = 0; i < 4; i++) {

    // start the download, await it..
    await startDownload(i, browser)

    // After it has started, you can proceed!.
    promises.push(waitForDownload(5 * 1000));
  }
  await Promise.all(promises).then(() => {
    browser.close();
  });
}

simplefileDownload();


如果您想完全并行执行,则需要启动多个puppeteer实例,如下所示:

const puppeteer = require('puppeteer');
const path = require('path');

async function startDownload(i) {
  // run new puppeteer instance on each download.
  const browser = await puppeteer.launch({ headless: false });
  const page = await browser.newPage();
  await page.goto('https://vinay-jtc.github.io/test-pdf', {
    waitUntil: 'networkidle2',
  });
  const billsData = await page.$$('.pdf');
  const downloadPath = path.resolve(`/home/vanni/download/${i}`);
  await page._client.send('Page.setDownloadBehavior', {
    behavior: 'allow',
    downloadPath: downloadPath,
  });
  await billsData[i].click();
  await waitForDownload(5 * 1000)
  await browser.close();
}

// ideally you would watch the filesystem for being able to return at the moment
// the file was donwloaded. You can achieve that using the `chokidar` module for example.
async function waitForDownload(ms) {
  await new Promise((resolve) => setTimeout(resolve, ms));
}

async function simplefileDownload() {
  const promises = [];
  for (let i = 0; i < 4; i++) {

    // do it like you did before.
    promises.push(startDownload(i));
  }
  await Promise.all(promises).then(() => {
      console.log('done.')
  });
}

simplefileDownload();
于 2022-02-24T14:01:59.753 回答