该代码获取 fitgirl 中可用的所有游戏,并制作一个包含每个游戏中评论数量的字典。评论获取部分需要 selenium,因为它依赖于他们的 disqus 服务。我可以以某种方式优化流程吗?
import bs4
from selenium import webdriver
from selenium.webdriver import ChromeOptions
from selenium.common import exceptions
import requests
path = 'C:\\ChromeDriver\\chromedriver.exe'
options = ChromeOptions()
options.add_argument('--headless')
options.add_argument('--ignore-certificate-errors')
options.add_argument('--ignore-ssl-errors')
options.add_argument('log-level=3')
options.add_experimental_option('excludeSwitches', ['enable-logging'])
info_dict = {}
for page_number in range(1, 31):
url = f'https://fitgirl-repacks.site/all-my-repacks-a-z/?lcp_page0={page_number}#lcp_instance_0'
response1 = requests.get(url)
index_site = bs4.BeautifulSoup(response1.content, 'lxml')
ul = index_site.find('ul', class_='lcp_catlist')
for li in ul.children:
site_url = li.a['href']
try:
driver = webdriver.Chrome(executable_path=path, options=options)
driver.get(site_url)
page_data = driver.page_source
site = bs4.BeautifulSoup(page_data, 'lxml')
comment = site.find('span', class_='comments-link').a.string
title = site.find('header', class_='entry-header').h1.string
print(title + ' ' + comment)
info_dict[title] = comment
except exceptions.SessionNotCreatedException:
print('pass')
# driver.close()
driver.close()
print(info_dict)