我正在使用 selenium 和 BeautifulSoup 从表单中抓取数据。第一步是在搜索字段中提交条目。第二步是从新加载的表单中抓取数据。这两个步骤都是可行的。
编辑:当脚本发送条目(send_keys())并单击提交按钮(submit.click())时,网页加载。我希望网页在后台加载,所以我看不到它。
这是代码:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
searchterm = "DE431311903710"
url = 'http://eagri.cz/public/web/mze/zemedelstvi/zivocisna-vyroba/zivocisne-komodity/kone/centralni-pristupove-misto-pro-evidenci.html'
driver = webdriver.Firefox()#, executable_path=r'C:\Utility\BrowserDrivers\chromedriver.exe')
driver.get(url)
driver.implicitly_wait(50)
## You have to switch to the iframe like so: ##
driver.switch_to.frame(driver.find_element_by_tag_name("iframe"))
## Insert text via xpath ##
elem = driver.find_element_by_xpath("/html/body/div/form/div[3]/div/div[2]/table/tbody/tr[2]/td/table/tbody/tr[2]/td[2]/input")
elem.send_keys(searchterm)
submit = driver.find_element_by_xpath("//*[@id=\"btnVyhledat\"]")
submit.click()
p = BeautifulSoup(driver.page_source, features = "html.parser")
l = []
k = []
inputs = p.find_all('span',{"class":"editprvek"})
inputs2 = p.find_all("span",{"class":"editpopis"})
for i in inputs:
l.append(i.text)
for j in inputs2:
k.append(j.text)
def merge(list1,list2):
merged = [(list1[i], list2[i]) for i in range(0, len(list1))]
return merged
print(merge(k,l))