5

我正在尝试在此页面上生成的弹出窗口中接受 cookie 同意。我尝试使用waitForSelector,但我使用的选择器似乎对无头浏览器不可见。我想实际切换到“是”,然后提交表格。我想它已经显示了,window.onload所以也许这需要在 JavaScript 中完成?

import asyncio
import time

from pyppeteer import launch
from pyppeteer.errors import TimeoutError
from urllib.parse import urlparse

URLS = [
    'https://www.trustarc.com/'
]

start = time.time()

async def fetch(url, browser):
    page = await browser.newPage()
    try:
        #await page.setRequestInterception(True)
        page.on('request', callback)
        await page.goto(url, {'waitUntil': 'networkidle0'})
        await page.screenshot({'path': f'img/{urlparse(url)[1]}.png', 'fullPage': True})
    except TimeoutError as e:
        print(f'Timeout for: {url}')
    finally:
        await page.close()


async def callback(req): 
    print(f'Request: {req.url}')

async def run():
    browser = await launch(headless=True, args=['--no-sandbox'])
    tasks = []

    for url in URLS:
        task = asyncio.ensure_future(fetch(url, browser))
        tasks.append(task)

    ret = await asyncio.gather(*tasks)
    await browser.close()

loop = asyncio.get_event_loop()
future = asyncio.ensure_future(run())
loop.run_until_complete(future)

print(f'It took {time.time()-start} seconds.')
4

2 回答 2

8

如果有人会觉得这很有用,这是我基于接受的答案的 Python 实现:

import asyncio
import time

from pyppeteer import launch
from pyppeteer.errors import TimeoutError
from urllib.parse import urlparse

URLS = [
    'https://www.trustarc.com/'
]

start = time.time()

async def fetch(url, browser):
    page = await browser.newPage()
    try:
        #await page.setRequestInterception(True)
        #page.on('request', callback)
        await page.goto(url, {'waitUntil': 'networkidle0'})
        if not await page.J('.truste_overlay'):
            await page.click('#teconsent > a')
        cookies_frame = page.frames[1]
        await cookies_frame.waitForSelector( '.active', {'visible': True})
        await cookies_frame.evaluate('''() =>
            {
               const yes_buttons   = document.getElementsByClassName( 'off' );
               const submit_button = document.getElementsByClassName( 'submit' )[0];

               yes_buttons[0].click();
               yes_buttons[1].click();

               submit_button.click();
            }''')
        close_button = await cookies_frame.waitForSelector( '#gwt-debug-close_id' )
        await close_button.click()
        await page.screenshot({'path': f'img/{urlparse(url)[1]}.png', 'fullPage': True})
    except TimeoutError as e:
        print(f'Timeout for: {url}')
    finally:
        await page.close()


async def callback(req): 
    print(f'Request: {req.url}')

async def run():
    browser = await launch(headless=True, args=['--no-sandbox'])
    tasks = []

    for url in URLS:
        task = asyncio.ensure_future(fetch(url, browser))
        tasks.append(task)

    ret = await asyncio.gather(*tasks)
    await browser.close()

loop = asyncio.get_event_loop()
future = asyncio.ensure_future(run())
loop.run_until_complete(future)

print(f'It took {time.time()-start} seconds.')
于 2018-09-11T08:09:17.227 回答
3

如果 Cookie Preferences 弹出框没有自动打开,您可以通过单击网页右下角的按钮手动打开弹出框。

在此处输入图像描述

cookie 选项位于 中iframe,因此您必须等到框架内容加载后,才能为功能性 Cookie 和广告 Cookie 选择“是”。

提交首选项后,您需要等待并关闭确认消息才能继续使用该网站。

完整示例:

// Navigate to the website

await page.goto( 'https://www.trustarc.com/', { 'waitUntil' : 'networkidle0' } );

// Open the Cookie Preferences pop-up (if necessary)

if ( await page.$( '.truste_overlay' ) === null )
{
    await page.click( '#teconsent > a' );
}

// Wait for the Cookie Preferences frame and content to load

const cookies_frame = page.frames()[1];

await cookies_frame.waitForSelector( '.active', { 'visible' : true } );

// Fill out and submit form

await cookies_frame.evaluate( () =>
{
    const yes_buttons   = document.getElementsByClassName( 'off' );
    const submit_button = document.getElementsByClassName( 'submit' )[0];

    yes_buttons[0].click();
    yes_buttons[1].click();

    submit_button.click();
});

// Wait for and close confirmation

const close_button = await cookies_frame.waitForSelector( '#gwt-debug-close_id' );

await close_button.click();
于 2018-09-10T19:59:51.230 回答