!pip install git+https://github.com/niklasb/webkit-server.git@refs/pull/35/head
!pip install dryscrape
from xvfbwrapper import Xvfb
from datetime import datetime
import dryscrape
from bs4 import BeautifulSoup
import requests
from xvfbwrapper import Xvfb
olderNews = False
stories_list = []
for i in range(0, 450):
url = "https://www.efsyn.gr/search?created%5B0%5D=2021-01-01&created%5B1%5D=2021-09-13&field_anonymous_author=&field_author=All&field_category=All&keywords=covid&sort_by=created&page=" + str(i)
# r = requests.get(url)
dryscrape.start_xvfb()
session = dryscrape.Session()
session.visit(url)
response = session.body()
doc = BeautifulSoup(response)
stories = doc.findAll('article', { 'class': 'default-teaser__article' })
print(stories)
for story in stories:
dt = story.find('time', {'class': "default-teaser__date default-date"})
print(dt.text)
parsed_dt = datetime.strptime(dt.text, "%d.%m.%Y, %H:%M")
print(parsed_dt)
if parsed_dt.date() >= datetime(2021, 1, 1).date():
headline = story.find('div', {'class': 'default-teaser__title'})
link = story.find('a', {'class': 'full-link'})
abstract = story.find('div', {'class': 'default-teaser__summary'})
story_dict = {
"headline": headline.text.strip(),
"url": link['href'].replace('\r', ''),
"summary": abstract.text.replace('\r', '').replace('\n', ''),
"date": parsed_dt.strftime('%d.%m.%Y').replace('\n', '')
}
stories_list.append(story_dict)
print(story_dict, sep='\n')
print("===")
else:
print("END OF SCRAPING")
olderNews = True
break
if olderNews:
break
我正在做 Web Scraping,使用 dryscrape 和美丽的汤。但是 Colab 没有找到 xvfb,尽管它已安装。为什么???#the error I get it in dryscrape.start_xvfb() OSError: Can not find Xvfb. 请安装并重试。我已经安装了...有什么问题?