我在这里面临一个问题。我正在尝试使用 scrapy-selenium 执行此代码,但这并没有刮什么。实际上我在这里缺少什么。如果是因为“page_source”,那么如何正确应用“page_source”?这是我的代码_
import scrapy
from scrapy_selenium import SeleniumRequest
from register.settings import *
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import time
class DataSpider(scrapy.Spider):
name = 'data'
def start_requests(self):
yield SeleniumRequest(
url='https://registers.maryland.gov/RowNetWeb/Estates/frmEstateSearch2.aspx/',
wait_time=3,
callback=self.parse
)
def parse(self, response):
chrome_options = Options()
chrome_options.add_argument('__headless')
chrome_path = SELENIUM_DRIVER_EXECUTABLE_PATH
driver = webdriver.Chrome(executable_path=chrome_path, options= chrome_options)
driver.get("https://registers.maryland.gov/RowNetWeb/Estates/frmEstateSearch2.aspx/")
driver.set_window_size(1920, 1080)
time.sleep(3)
search_btn = driver.find_element_by_id('cmdSearch')
search_btn.click()
time.sleep(10)
results = response.xpath("//table[@id='dgSearchResults']/tbody/tr[position() = 1 < position() = 21]/td[2]/a")
for result in results:
result.click()
for info in result:
yield {
'Estate Number:': info.xpath("//span[@id='lblEstateNumber']/text()").get()
}
driver.quit()