1
## RJ: import libraries ##
## RJ: END OF COMMENT ##
from urllib.request import urlopen
from bs4 import BeautifulSoup
import csv
import time
from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC


##RJ specify the url
quote_page = 'https://www.onlineprinters.fr/p/autocollants-offset-a8'

##RJ query the website and return the html to the variable 'page'
page = urlopen(quote_page)

##RJ parse the html using beautiful soup and store in variable `soup`
soup = BeautifulSoup(page, 'html.parser')


driver = webdriver.Chrome('C:\\Users\\rashm\\Desktop\\WebScraper\\chromedriver.exe')
driver.get("https://www.onlineprinters.fr/p/autocollants-offset-a8")
inputPapier = driver.find_element_by_class_name('wsmds_input')
cookiepopup = driver.find_element_by_id('ws_cookie_layer_button')

## RJ: Using a series of actions ##
actions = ActionChains(driver)
actions.click(cookiepopup)
actions.click(inputPapier)
actions.perform()

    

selectPaper = Select(driver.find_element_by_name('input_var_PAKA840_1_1')).options
driver.execute_script("document.getElementsByName('input_var_PAKA840_1_1')[0].style.display = 'block';")
optionsPaper=[]
for option in selectPaper:
    optionsPaper.append(option.text)    
    
selectTirage = Select(driver.find_element_by_name('input_var_PAKA840_2_1')).options
driver.execute_script("document.getElementsByName('input_var_PAKA840_2_1')[0].style.display = 'block';")
optionsTirage=[]
for option in selectTirage:
    optionsTirage.append(option.text)   
    
selectDelai = Select(driver.find_element_by_name('input_var_ZAKA834Y_1_3')).options
driver.execute_script("document.getElementsByName('input_var_ZAKA834Y_1_3')[0].style.display = 'block';")
optionsDelai=[]
for option in selectDelai:
    optionsDelai.append(option.text)
    
selectVerification = Select(driver.find_element_by_name('input_var_ZAKXXXXD_1_2')).options
driver.execute_script("document.getElementsByName('input_var_ZAKXXXXD_1_2')[0].style.display = 'block';")
optionsVerification=[]
for option in selectVerification:
    optionsVerification.append(option.text)
    
bacsicPriceBox = driver.find_element_by_id('pr_basispreis')

for indexPaper in range(0, len(optionsPaper)):
    print('---------------------TESTING FOR--------------------')
    driver.execute_script("document.getElementsByName('input_var_PAKA840_1_1')[0].style.display = 'block';")
    paperElement = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.NAME, "input_var_PAKA840_1_1")))
    Select(paperElement).select_by_index(indexPaper)
    for indexTirage in range(0, len(optionsTirage)):
        driver.execute_script("document.getElementsByName('input_var_PAKA840_2_1')[0].style.display = 'block';")
        tirageElement = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.NAME, "input_var_PAKA840_2_1")))
        Select(tirageElement).select_by_index(indexTirage)
        print('---------------------TESTING TIRAGE SELECTED--------------------'+str(indexTirage))
        for indexDelai in range(0, len(optionsDelai)):
            driver.execute_script("document.getElementsByName('input_var_ZAKA834Y_1_3')[0].style.display = 'block';")
            delaiElement = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.NAME, "input_var_ZAKA834Y_1_3")))
            Select(delaiElement).select_by_index(indexDelai)
            print('---------------------TESTING DELAI SELECTED--------------------'+str(indexDelai))        
            for indexVerification in range(0, len(optionsVerification)):
                driver.execute_script("document.getElementsByName('input_var_ZAKXXXXD_1_2')[0].style.display = 'block';")
                verificationElement = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.NAME, "input_var_ZAKA834Y_1_3")))
                Select(verificationElement).select_by_index(indexVerification)
                print('---------------------TESTING VERIFICATION SELECTED--------------------'+str(indexVerification))
                driver.execute_script("document.getElementsByName('input_var_PAKA840_1_1')[0].style.display = 'block';")
                driver.execute_script("document.getElementsByName('input_var_PAKA840_2_1')[0].style.display = 'block';")
                driver.execute_script("document.getElementsByName('input_var_ZAKA834Y_1_3')[0].style.display = 'block';")
                driver.execute_script("document.getElementsByName('input_var_ZAKXXXXD_1_2')[0].style.display = 'block';")
                price_update=driver.find_element_by_id('pr_basispreis')
                with open('goodJobV2.csv', 'a') as csv_file:
                    writer = csv.writer(csv_file)
                    writer.writerow([optionsPaper[indexPaper], optionsTirage[indexTirage],optionsDelai[indexDelai],optionsVerification[indexVerification],price_update])
        
                
    print('---------------------CLOSING FOR AND PAPER--------------------')
    
print('---------------------CONGRATULATIONS!!!!!--------------------')
driver.quit()

你好朋友,

我是 Python 和 Selenium 的新手,对于一个自由项目,我需要创建一个网络爬虫,它可以自动执行从网站获取信息的手动任务。我在这里添加了代码。它应该打开一个产品页面,从下拉选项中跟踪所有可能的组合,并获取每个组合和该产品组合的价格。最后将其写入excel文件。一个产品可能有数千种变化。每次在下拉列表中选择一个选项时,价格都会更新。我尝试在 Selenium 中使用 WAIT 和 EXPECTED CONDITION,但是,我继续收到异常“消息:过时的元素引用:元素未附加到页面文档”

我已经在这方面花费了足够的时间,并希望 Stack Overflow 社区能够帮助我找到解决方案!

提前致谢!

4

0 回答 0