2

我在这里面临一个问题。我正在尝试使用 scrapy-selenium 执行此代码,但这并没有刮什么。实际上我在这里缺少什么。如果是因为“page_source”,那么如何正确应用“page_source”?这是我的代码_

import scrapy 
from scrapy_selenium import SeleniumRequest 
from register.settings import * 
from selenium import webdriver 
from selenium.webdriver.chrome.options import Options 
import time


class DataSpider(scrapy.Spider):
    name = 'data'

def start_requests(self):
    yield SeleniumRequest(
        url='https://registers.maryland.gov/RowNetWeb/Estates/frmEstateSearch2.aspx/',
        wait_time=3,
        callback=self.parse
    )

def parse(self, response):
    chrome_options = Options()
    chrome_options.add_argument('__headless')

    chrome_path = SELENIUM_DRIVER_EXECUTABLE_PATH
    driver = webdriver.Chrome(executable_path=chrome_path, options= chrome_options)
    driver.get("https://registers.maryland.gov/RowNetWeb/Estates/frmEstateSearch2.aspx/")
    driver.set_window_size(1920, 1080)
    time.sleep(3)

    search_btn = driver.find_element_by_id('cmdSearch')
    search_btn.click()
    time.sleep(10)

    results = response.xpath("//table[@id='dgSearchResults']/tbody/tr[position() = 1 < position() = 21]/td[2]/a")

    for result in results:
        result.click()
        for info in result:
            yield {
                'Estate Number:': info.xpath("//span[@id='lblEstateNumber']/text()").get()
            }

    driver.quit()
4

0 回答 0