0
!pip install git+https://github.com/niklasb/webkit-server.git@refs/pull/35/head

!pip install dryscrape
from xvfbwrapper import Xvfb

from datetime import datetime
import dryscrape 
from bs4 import BeautifulSoup
import requests
from xvfbwrapper import Xvfb

olderNews = False
stories_list = []

for i in range(0, 450):
    url = "https://www.efsyn.gr/search?created%5B0%5D=2021-01-01&created%5B1%5D=2021-09-13&field_anonymous_author=&field_author=All&field_category=All&keywords=covid&sort_by=created&page=" + str(i)
    # r = requests.get(url)
    dryscrape.start_xvfb()
    session = dryscrape.Session() 
    session.visit(url)
    response = session.body()
    doc = BeautifulSoup(response)


    stories = doc.findAll('article', { 'class': 'default-teaser__article' })
    print(stories)
    for story in stories: 
       dt = story.find('time', {'class': "default-teaser__date default-date"})
       print(dt.text)
       parsed_dt = datetime.strptime(dt.text, "%d.%m.%Y, %H:%M")
       print(parsed_dt)
       if parsed_dt.date() >= datetime(2021, 1, 1).date():
          headline = story.find('div', {'class': 'default-teaser__title'})
          link = story.find('a', {'class': 'full-link'})
          abstract = story.find('div', {'class': 'default-teaser__summary'})

          story_dict = {
             "headline": headline.text.strip(),
             "url": link['href'].replace('\r', ''),
              "summary": abstract.text.replace('\r', '').replace('\n', ''),
              "date": parsed_dt.strftime('%d.%m.%Y').replace('\n', '')
          }
          stories_list.append(story_dict)
          print(story_dict, sep='\n')


          print("===")
       else:
          print("END OF SCRAPING")
          olderNews = True
          break 


     if olderNews:
          break

我正在做 Web Scraping,使用 dryscrape 和美丽的汤。但是 Colab 没有找到 xvfb,尽管它已安装。为什么???#the error I get it in dryscrape.start_xvfb() OSError: Can not find Xvfb. 请安装并重试。我已经安装了...有什么问题?

4

0 回答 0