0

我正在尝试以递归方式访问一系列网页。访问一个页面,然后返回然后移动到下一页,依此类推。使用我的代码,我可以访问第一页,然后返回上一页,但我无法访问下一页并得到一个过时的元素引用错误。我通读了有关此错误的答案,但仍然不知道我的代码在哪里更改了状态...

下面是我的代码

"""Get the browser (a "driver")."""
# find the path with 'which chromedriver'
path_to_chromedriver = ('/Users/xxxx/Desktop/chromedriver')
browser = webdriver.Chrome(executable_path=path_to_chromedriver)

url = "http://www.presidency.ucsb.edu/index_docs.php"
browser.get(url)
#time.sleep(600) 

# 2008 Presidential Election
pe_2008 = browser.find_element_by_xpath(
"/html/body/table/tbody/tr[2]/td/table/tbody/tr/td[2]/table/tbody/tr/td/table/tbody/tr/td/table/tbody/tr/td/ul/li[22]/ul/li[5]/a")

# 2012 Presidential Election
pe_2012 = browser.find_element_by_xpath(
 "/html/body/table/tbody/tr[2]/td/table/tbody/tr/td[2]/table/tbody/tr/td/table/tbody/tr/td/table/tbody/tr/td/ul/li[22]/ul/li[6]/a")

# 2016 Presidential Election
pe_2016 = browser.find_element_by_xpath("/html/body/table/tbody/tr[2]/td/table/tbody/tr/td[2]/table/tbody/tr/td/table/tbody/tr/td/table/tbody/tr/td/ul/li[22]/ul/li[7]/a")

pe_2016.click()

links = browser.find_elements_by_partial_link_text('campaign')

for i in range(len(links)):
    links[i].click()
    time.sleep(5)
    elems = browser.find_elements_by_xpath("//a[@href]")
    linkls = []
    for elem in elems:
        address = elem.get_attribute("href")
        if 'http://www.presidency.ucsb.edu/ws/' in address:
            linkls.append(elem)
    try:
        k = len(linkls)
        for j in range(k):
            linkls[j].click()
            time.sleep(5)
            browser.back()
            time.sleep(5)
    except Exception as e:
        print(e)


Video Transcript: Presidential Exploratory Committee Announcement
Message: stale element reference: element is not attached to the page document
  (Session info: chrome=63.0.3239.132)
  (Driver info: chromedriver=2.35.528157     (4429ca2590d6988c0745c24c8858745aaaec01ef),platform=Mac OS X 10.12.6 x86_64)

---------------------------------------------------------------------------
StaleElementReferenceException            Traceback (most recent call last)
<ipython-input-255-edb48028a541> in <module>()
      1 for i in range(len(links)):
----> 2     links[i].click()
      3     time.sleep(5)
      4     elems = browser.find_elements_by_xpath("//a[@href]")
      5     linkls = []

/Users/misun/anaconda/lib/python3.5/site-    packages/selenium/webdriver/remote/webelement.py in click(self)
     78     def click(self):
     79         """Clicks the element."""
---> 80         self._execute(Command.CLICK_ELEMENT)
     81 
     82     def submit(self):

/Users/misun/anaconda/lib/python3.5/site-    packages/selenium/webdriver/remote/webelement.py in _execute(self, command,     params)
    626             params = {}
    627         params['id'] = self._id
--> 628         return self._parent.execute(command, params)
    629 
    630     def find_element(self, by=By.ID, value=None):

/Users/misun/anaconda/lib/python3.5/site-    packages/selenium/webdriver/remote/webdriver.py in execute(self, driver_command,     params)
    310         response = self.command_executor.execute(driver_command, params)
    311         if response:
--> 312             self.error_handler.check_response(response)
    313             response['value'] = self._unwrap_value(
    314                 response.get('value', None))

/Users/misun/anaconda/lib/python3.5/site-        packages/selenium/webdriver/remote/errorhandler.py in check_response(self, response)
    235         elif exception_class == UnexpectedAlertPresentException and 'alert' in value:
    236             raise exception_class(message, screen, stacktrace, value['alert'].get('text'))
--> 237         raise exception_class(message, screen, stacktrace)
    238 
    239     def _value_or_default(self, obj, key, default):

StaleElementReferenceException: Message: stale element reference: element is not attached to the page document
  (Session info: chrome=63.0.3239.132)
  (Driver info: chromedriver=2.35.528157     (4429ca2590d6988c0745c24c8858745aaaec01ef),platform=Mac OS X 10.12.6 x86_64)
4

2 回答 2

0

当您转到新页面时,前一页中的所有元素都将被破坏/陈旧。每次返回起始页面时,您都必须重新获取元素列表。

最好的解决方案可能是找到所有元素,然后对它们进行迭代以提取 URL。然后,您可以遍历 url 列表而不是元素列表。

于 2018-01-20T23:59:33.350 回答
-1

1)查看我在另一篇文章中的回答,以帮助您很好地理解 StaleReferenceException 的原因。

2)我对你的代码做了一些修改,你可以试试:

# 2012 Presidential Election
pe_2012 = browser.find_element_by_link_text("2012 Election")

# 2016 Presidential Election
pe_2016 = browser.find_element_by_link_text("2016 Election")

pe_2016.click()

candidates = browser.find_element_by_link_text("campaign speeches")

for i in range(len(candidates)):
    candidates[i].click()
    time.sleep(5)

    speeches = browser.find_elements_by_css_selector("td.listdate > a")
    speeche_count = len(linkls)

    // Begin click each speech link of one candidate

    try:

        for j in range(speeche_count):
            // click on one speeche link
            speeches[j].click()
            time.sleep(5)
            // back to previous page
            browser.back()
            time.sleep(5)

            // find speeches again to avoid StaleReferenceException
            speeches = browser.find_elements_by_css_selector("td.listdate > a")

    except Exception as e:
        print(e)

    // End click each speech link of one candidate

    // back to candidates list page to enter next candidate speech list page
    browser.back()
    time.sleep(5)

    // find candidates again to avoid StaleReferenceException
    candidates = browser.find_element_by_link_text("campaign speeches")
于 2018-01-21T13:20:23.553 回答