我想从以下链接中提取整篇新闻文章:https ://www.reuters.com/world/europe/navalny-allies-accuse-telegram-censorship-russian-election-2021-09-18/ 下面代码是获取那些链接,现在对于每个链接我都想获取文章。我无法提取 XPath 来执行此操作。该段落分为多个 <p> 标签,我不知道如何处理它
!pip install selenium
!apt-get update
!apt install chromium-chromedriver
from selenium import webdriver
import time
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
driver = webdriver.Chrome('chromedriver',chrome_options=chrome_options)
driver.maximize_window()
driver.implicitly_wait(10)
driver.get("https://www.reuters.com/companies/AAPL.O")
links=[]
i=0
try:
while True:
news = driver.find_elements_by_xpath("//div[@class='item']")
driver.execute_script("arguments[0].scrollIntoView(true);", news[i])
if news[i].find_element_by_tag_name("time").get_attribute("innerText") == "a year ago":
break
links.append(news[i].find_element_by_tag_name("a").get_attribute("href"))
i += 1
time.sleep(.5)
except:
pass
driver.quit()
#links