0

您好,我正在尝试从以下页面中删除一些信息:http: //verify.sos.ga.gov/verification/

我的代码如下:

import sys
reload(sys)
sys.setdefaultencoding('utf8')
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import Select  
import time
import csv

url = 'http://verify.sos.ga.gov/verification/'

def init_Selenium():
    global driver
    driver = webdriver.Chrome("/Users/rodrigopeniche/Downloads/chromedriver")
    driver.get(url)

def select_profession():
    select = Select(driver.find_element_by_name('t_web_lookup__profession_name'))
    options = select.options

    for index in range(1, len(options) - 1):
        select = Select(driver.find_element_by_name('t_web_lookup__profession_name'))
        select.select_by_index(index)
        select_license_type()

def select_license_type():
    select = Select(driver.find_element_by_name('t_web_lookup__license_type_name'))
    options = select.options

    for index in range(1, len(options) - 1):
        select = Select(driver.find_element_by_name('t_web_lookup__license_type_name'))
        select.select_by_index(index)
        search_button = driver.find_element_by_id('sch_button')
        driver.execute_script('arguments[0].click();', search_button)
        scrap_licenses_results()

def scrap_licenses_results():
    table_rows = driver.find_elements_by_tag_name('tr')

    for index, row in enumerate(table_rows):
        if index < 9:
            continue
        else:

            attributes = row.find_elements_by_xpath('td')

            try:
                name = attributes[0].text
                license_number = attributes[1].text
                profession = attributes[2].text
                license_type = attributes[3].text
                status = attributes[4].text
                address = attributes[5].text

                license_details_page_link = attributes[0].find_element_by_id('datagrid_results__ctl3_name').get_attribute('href')
                driver.get(license_details_page_link)

                data_rows = driver.find_elements_by_class_name('rdata')

                issued_date = data_rows[len(data_rows) - 3].text
                expiration_date = data_rows[len(data_rows) - 2].text
                last_renewal_day = data_rows[len(data_rows) - 1].text

                print name, license_number, profession, license_type, status, address, issued_date, expiration_date, last_renewal_day

                driver.back()

            except:
                pass

init_Selenium()
select_profession()

当我执行脚本时,它适用于第一次迭代,但在第二次迭代中失败。引发错误的确切位置在该attributes = row.find_elements_by_xpath('td')行中的 scrap_licenses_results() 函数中。

任何帮助将不胜感激

4

1 回答 1

1

staleElementReferenceException 是由于在循环迭代之前收集的行列表造成的。最初,您创建了一个名为 table_rows 的所有行的列表。

 table_rows = driver.find_elements_by_tag_name('tr')

现在在循环中,在第一次迭代期间,您的第一行元素是新鲜的并且可以由驱动程序找到。在第一次迭代结束时,您正在做的是driver.back(),您的页面更改/刷新 HTML DOM。所有以前收集的参考现在都丢失了。table_rows 列表中的所有行现在都已过时。因此,在第二次迭代中,您将面临这样的异常。

您必须在循环中移动查找行操作,以便每次在目标应用程序上找到新的引用。伪代码应该做这样的事情。

total_rows = driver.find_elements_by_tag_name('tr').length()

for i in total_rows
    driver.find_element_by_xpath('//tr[i]')
    .. further code..
于 2018-03-30T04:19:17.077 回答