目前,您无法从 selenium 合法地获取状态码。您可以使用 selenium 来抓取 url,以及其他库(如请求检查链接状态的请求)(或使用@MrTi 提出的标题检查解决方案):
import requests
def find_broken_links(root, driver):
visited = set()
broken = set()
# Use queue for BFS, list / stack for DFS.
elements = [root]
session = requests.session()
while len(elements):
el = elements.pop()
if el in visited:
continue
visited.add(el)
resp = session.get(el)
if resp.status_code in [500, 404]:
broken.add(el)
continue
driver.get(el)
links = driver.find_element_by_tag_name("a")
for link in links:
elements.append(link.get_attribute('href'))
return broken