-1

在我的 python selenium 项目中随机出现一个错误,我使用树莓派从网站上抓取数据。它获取日期、温度、风和降雨量。该脚本有时会正常运行,但有时会弹出错误:

selenium.common.exceptions.StaleElementReferenceException:消息:过时的元素引用:元素未附加到页面文档(会话信息:chrome=84.0.4147.141)

是否有任何包装器可以实现以避免此类错误消息?如果你能分享一个解决方案,我会很高兴。

完整代码:

from selenium import webdriver
import pandas as pd
from datetime import datetime
import time
import schedule

def job():
    driver = webdriver.Chrome()
    driver.get("https://pent.no/60.19401,11.09936")

    date = driver.find_elements_by_class_name("forecast-day-view-date-bar__date")
    i = 0

    for klikk in date:
            date[i].click()
            i = i+1
            if i==len(date):
                break
    time = driver.find_elements_by_class_name("forecast-hour-view-hour-label")
        
    count = len(time)-193

    temp = driver.find_elements_by_class_name("forecast-hour-view-weather-widget__temperature")
    temp2 = temp[::2]
    temp3 = temp[1::2]

    wind = driver.find_elements_by_class_name("forecast-hour-view-weather-widget__wind-speed")
    wind2 = wind[::2]
    wind3 = wind[1::2]

    rainfall = driver.find_elements_by_class_name("forecast-hour-view-weather-widget__precipitation")
    rainfall2 = rainfall[::2]
    rainfall3 = rainfall[1::2]

    a = []
    b = []
    c = []
    d = []
    e = []
    f = []
    g = []
    h = []
    k = 0

    for datoer in date:
        print("Dato:"+datoer.text)
        a.append(datoer.text)
        if k==0:
            a.extend([""]*count) 
        else:
            a.extend([""]*23)
        k = k+1
        

    df1 = pd.DataFrame(a, columns= ["Date"])
        
    #
    for tider in time:
        print("Tid:"+tider.text)
        b.append(tider.text)
        
    df2 = pd.DataFrame(b, columns= ["Time"])
    #  
    for tempyr in temp2:
        print("Temp yr:"+tempyr.text)
        c.append(tempyr.text)
        
    df3 = pd.DataFrame(c, columns= ["Temp Yr"])

    for tempstorm in temp3:
        print("Temp storm:"+tempstorm.text)
        d.append(tempstorm.text)
        
    df4 = pd.DataFrame(d, columns= ["Temp Storm"])
    #   
    for windyr in wind2:
        print("Vind yr:"+windyr.text)
        e.append(windyr.text)
        
    df5 = pd.DataFrame(e, columns= ["Wind Yr"])

    for windstorm in wind3:
        print("Vind storm:"+windstorm.text)
        f.append(windstorm.text)
        
    df6 = pd.DataFrame(f, columns= ["Wind Storm"])
    #   
    for rainfallyr in rainfall2:
        g.append(rainfallyr.text)
        if rainfallyr.text == "":
            print("Rein yr:"+"0.0 mm")
        else:
            print("Rein yr:"+rainfallyr.text)
        
    df7 = pd.DataFrame(g, columns= ["Rainfall Yr"])
    df7 = df7.replace(r'^\s*$', "0.0 mm", regex=True)
      
    for rainfallstorm in rainfall3:
        h.append(rainfallstorm.text)
        if rainfallstorm.text == "":
            print("Rein storm:"+"0.0 mm")
        else:
            print("Rein storm:"+rainfallstorm.text)
        
    df8 = pd.DataFrame(h, columns= ["Rainfall Storm"])
    df8 = df8.replace(r'^\s*$', "0.0 mm", regex=True)
    #
    tabell = [df1, df2, df3, df4, df5, df6, df7, df8]
    result = pd.concat(tabell, axis=1)

    result.to_excel("weather" + str(int(datetime.now().day)) + ".xlsx")

            
    driver.quit()
    
schedule.every().day.at("00:00").do(job)
while 1:
    schedule.run_pending()
    time.sleep(60)

编辑:

Traceback (most recent call last):
  File "/home/pi/Desktop/Data Scraper/test.py", line 108, in <module>
    schedule.run_pending()
  File "/home/pi/.local/lib/python3.7/site-packages/schedule/__init__.py", line 563, in run_pending
    default_scheduler.run_pending()
  File "/home/pi/.local/lib/python3.7/site-packages/schedule/__init__.py", line 94, in run_pending
    self._run_job(job)
  File "/home/pi/.local/lib/python3.7/site-packages/schedule/__init__.py", line 147, in _run_job
    ret = job.run()
  File "/home/pi/.local/lib/python3.7/site-packages/schedule/__init__.py", line 466, in run
    ret = self.job_func()
  File "/home/pi/Desktop/Data Scraper/test.py", line 47, in job
    a.append(datoer.text)
  File "/usr/local/lib/python3.7/dist-packages/selenium/webdriver/remote/webelement.py", line 76, in text
    return self._execute(Command.GET_ELEMENT_TEXT)['value']
  File "/usr/local/lib/python3.7/dist-packages/selenium/webdriver/remote/webelement.py", line 633, in _execute
    return self._parent.execute(command, params)
  File "/usr/local/lib/python3.7/dist-packages/selenium/webdriver/remote/webdriver.py", line 321, in execute
    self.error_handler.check_response(response)
  File "/usr/local/lib/python3.7/dist-packages/selenium/webdriver/remote/errorhandler.py", line 242, in check_response
    raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.StaleElementReferenceException: Message: stale element reference: element is not attached to the page document
  (Session info: chrome=84.0.4147.141)
4

1 回答 1

0

Selenium 让您可以引用浏览器内存中当前页面上的对象。当您click()或它运行添加元素的 Javascript 代码时,浏览器内存中的对象会更改位置,并且引用会导致浏览器内存中的元素错误 - 这会产生错误stale element reference: element is not attached to the page document

之后您将不得不date再次获得click()

或者你应该得到date之前的文本click()

date = driver.find_elements_by_class_name("forecast-day-view-date-bar__date")

# get all dates as text (before `click()`)
date_text = [item.text for item in date]

for item in date:
    item.click()

稍后您应该使用此列表

for k, text in enumerate(date_text):
    print("Dato:", text)
    a.append(text)
    if k == 0:
        a.extend([""]*count) 
    else:
        a.extend([""]*23)

编辑:

我的版本有其他变化 - 即。我用的少DataFrame

我尝试制作一些非常相似的元素以使其发挥作用并使其更短。

在 Linux 上,我会使用 servicecron而不是 Python 模块schedule

当代码在某个调度程序中运行或者cron我不必显示文本时,我会使用一些变量来停止显示if display: print(...)。并且不显示它应该运行得更快。

from selenium import webdriver
import pandas as pd
from datetime import datetime
import time
import schedule

def job():
    driver = webdriver.Chrome()
    driver.get("https://pent.no/60.19401,11.09936")

    date = driver.find_elements_by_class_name("forecast-day-view-date-bar__date")

    # get all dates as text (before `click()`)
    date_text = [item.text for item in date]
    
    for item in date:
        item.click()

    time = driver.find_elements_by_class_name("forecast-hour-view-hour-label")
        
    count = len(time)-193

    temp = driver.find_elements_by_class_name("forecast-hour-view-weather-widget__temperature")
    temp2 = temp[::2]
    temp3 = temp[1::2]

    wind = driver.find_elements_by_class_name("forecast-hour-view-weather-widget__wind-speed")
    wind2 = wind[::2]
    wind3 = wind[1::2]

    rainfall = driver.find_elements_by_class_name("forecast-hour-view-weather-widget__precipitation")
    rainfall2 = rainfall[::2]
    rainfall3 = rainfall[1::2]

    # --- dictionary for all columns ---
    
    all_columns = dict()
    
    # --- Date --- 

    rows = []

    for k, text in enumerate(date_text):
        print("Dato:", text)
        rows.append(text)
        if k == 0:
            rows.extend([""]*count) 
        else:
            rows.extend([""]*23)
  
    all_columns["Date"] = rows
    
    # --- Time ---

    rows = []
    
    for item in time:
        text = item.text.strip()
        print("Tid:", text)
        rows.append(text)

    all_columns["Time"] = rows

    # --- Temp Yr ---

    rows = []

    for item in temp2:
        text = item.text.strip()
        print("Temp yr:", text)
        rows.append(text)
        
    all_columns["Temp Yr"] = rows
        
    # --- Temp Storm --- 
    
    rows = []
    
    for item in temp3:
        text = item.text.strip()
        print("Temp storm:", text)
        rows.append(text)

    all_columns["Temp Storm"] = rows

    # --- Vind Yr --- 

    rows = []
    
    for item in wind2:
        text = item.text.strip()
        print("Vind yr:", text)
        rows.append(text)

    all_columns["Wind Yr"] = rows

    # --- Vind Storm --- 

    rows = []

    for item in wind3:
        text = item.text.strip()
        print("Vind storm:", text)
        rows.append(text)

    all_columns["Wind Storm"] = rows
 
    # --- Rainfall Yr --- 

    rows = []
    
    for item in rainfall2:
        text = item.text.strip()
        if text == "":
            text = "0.0 mm"
        print("Rein yr:", text)
        rows.append(text)

    all_columns["Rainfall Yr"] = rows
        
    # now I don't need to replace() empty string 

    # --- Rainfall Storm --- 
      
    rows = []
    
    for item in rainfall3:
        text = item.text.strip()
        if text == "":
            text = "0.0 mm"
        print("Rein storm:", text)
        rows.append(text)

    all_columns["Rainfall Storm"] = rows
        
    # now I don't need to replace() empty string 

    # --- --- 

    result = pd.DataFrame(all_columns)

    result.to_excel("weather{}.xlsx".format(datetime.now().day))
            
    driver.quit()
    
#schedule.every().day.at("00:00").do(job)
#while True:   # `True` instead of `1` is more readable, besides Python will run `while bool(1):`
#    schedule.run_pending()
#    time.sleep(60)

job()

编辑:

带功能版本

def get_rows(items, description=None, replace=None):
    rows = []
    
    for item in items:
        text = item.text.strip()
        
        if replace and text == "":
            text = replace
            
        rows.append(text)
        
        if DISPLAY and description:
            print(description, text)
    
    return rows

现在代码更短了

from selenium import webdriver
import pandas as pd
from datetime import datetime
import time
import schedule

# --- constans --- (PEP8: UPPER_CASE_NAMES)

DISPLAY = True

# --- classes --- (PEP8: CamelCaseNames)

# empty

# --- functions --- (PEP8: lower_case_names)

def get_rows(items, description=None, replace=None):
    rows = []
    
    for item in items:
        text = item.text.strip()
        
        if replace and text == "":
            text = replace
            
        rows.append(text)
        
        if DISPLAY and description:
            print(description, text)
    
    return rows


def job():
    driver = webdriver.Chrome()
    driver.get("https://pent.no/60.19401,11.09936")

    date = driver.find_elements_by_class_name("forecast-day-view-date-bar__date")

    # get all dates as text (before `click()`)
    date_text = [item.text for item in date]
    
    for item in date:
        item.click()

    time = driver.find_elements_by_class_name("forecast-hour-view-hour-label")
        
    count = len(time)-193

    temp = driver.find_elements_by_class_name("forecast-hour-view-weather-widget__temperature")
    temp2 = temp[::2]
    temp3 = temp[1::2]

    wind = driver.find_elements_by_class_name("forecast-hour-view-weather-widget__wind-speed")
    wind2 = wind[::2]
    wind3 = wind[1::2]

    rainfall = driver.find_elements_by_class_name("forecast-hour-view-weather-widget__precipitation")
    rainfall2 = rainfall[::2]
    rainfall3 = rainfall[1::2]

    # - Date - 

    rows_date = []

    for k, text in enumerate(date_text):
        if DISPLAY:
            print("Dato:", text)
        rows_date.append(text)
        if k == 0:
            rows_date.extend([""]*count) 
        else:
            rows_date.extend([""]*23)

    # - other -
    
    result = pd.DataFrame({
                            "Date": rows_date,
                            "Time": get_rows(time, "Tid:"),
                            "Temp Yr": get_rows(temp2, "Temp yr:"),
                            "Temp Storm": get_rows(temp3, "Temp storm:"),
                            "Wind Yr": get_rows(wind2, "Vind yr:"),
                            "Wind Storm": get_rows(wind3, "Vind storm:"),
                            "Rainfall Yr": get_rows(rainfall2, "Rein yr:", "0.0 mm"),
                            "Rainfall Storm": get_rows(rainfall3, "Rein storm:", "0.0 mm"),
                         })

    # - save -
    
    result.to_excel("weather--{}.xlsx".format(datetime.now().day))
            
    driver.quit()
    
# --- main --- (PEP8: loser_case_names)
    
#schedule.every().day.at("00:00").do(job)
#while True:   # `True` instead of `1` is more readable, besides Python will run `while bool(1):`
#    schedule.run_pending()
#    time.sleep(60)

job()

PEP 8——Python 代码风格指南

于 2021-02-07T16:03:24.077 回答