0

我正在尝试使用 selenium 登录网站,然后将经过身份验证的会话传递给 scrapy 以提取内容。

问题是,在我将会话传递给 scrapy 之后,我仍然没有登录。

class LoginSpider(scrapy.Spider):
name = 'login'
allowed_domains = ['*****']

start_urls = ['*****']
def __init__(self):
    self.driver = webdriver.Firefox()
def start_requests(self):
    # driver = webdriver.Firefox()
    self.driver.get('*****')
    time.sleep(5)
    portalButton = self.driver.find_element_by_xpath('//*[@id="fb_submit"]')
    portalButton.click()
    time.sleep(2)


    self.driver.find_element_by_xpath('//*[@id="email"]').send_keys('******')
    self.driver.find_element_by_xpath('//*[@id="password"]').send_keys('******')
    self.driver.find_element_by_xpath('//*[@id="btn-login"]').click()
    time.sleep(5)
    for cookie in self.driver.get_cookies():
        c = {cookie['name']: cookie['value']}
    yield Request(url="****",cookies=c,callback=self.parse)




def parse(self,response):
    # self.log("->>>>>>>>>>>>")
    open_in_browser(response)
    # view(response)
    self.log("->>>>>>>>>>>>")
4

1 回答 1

3

我建议稍微改变一下这一步:

for cookie in self.driver.get_cookies():
        c = {cookie['name']: cookie['value']}

像这样的东西:

_cookies = {cookie['name']: cookie['value'] for cookie in self.driver.get_cookies()}
yield Request(url="****",cookies=_cookies,callback=self.parse)

在每次迭代中,您使用new {cookie['name']: cookie['value']}重新创建c

我的代码示例:

import time

import scrapy
from scrapy import Request
from scrapy.utils.response import open_in_browser
from selenium import webdriver
from selenium.webdriver.common.by import By


class LoginSpider(scrapy.Spider):
    name = 'login'

    start_urls = ['URL']

    def __init__(self):
        super().__init__()
        self.driver = webdriver.Chrome()

    def start_requests(self):
        self.driver.get('URL')
        time.sleep(5)

        self.driver.find_element(By.ID, ('email')).send_keys('EMAIL')
        self.driver.find_element(By.ID, ('passwd')).send_keys('PASSWORD')
        self.driver.find_element(By.ID, ('SubmitLogin')).click()
        _cookies = {cookie['name']: cookie['value'] for cookie in self.driver.get_cookies()}
        yield Request(url='URL',
                      cookies=_cookies,
                      callback=self.parse)
        self.driver.quit()

    def parse(self, response, **kwargs):
        open_in_browser(response)
        self.log(response)
于 2021-03-12T14:09:31.807 回答