我正在尝试使用 Selenium 和 Python 来抓取 DuckDuckGo 搜索结果,但我只能在代码中断之前进入第二页。下面是最小的例子:
import scrapy
from scrapy_selenium import SeleniumRequest
from selenium import webdriver
class PrototypeSpider(scrapy.Spider):
name = 'prototype1'
def start_requests(self):
query = "test"
url = "https://duckduckgo.com/html?q="+query
yield SeleniumRequest(
url = url,
wait_time = 3,
screenshot = False,
callback = self.parse
)
def parse(self, response):
driver = response.meta['driver']
next_page_button = driver.find_element_by_xpath('//input[@value="Next"]')
while next_page_button:
results = response.xpath('//div[@class = "links_main links_deep result__body"]/h2')
for result in results:
yield{
'Title': result.xpath('.//a[@class = "result__a"]/text()').get(),
'Link': result.xpath('.//a[@class = "result__a"]/@href').get()
}
next_page_button.click()
我该如何改进它?非常感谢大家!