1
from scrapy import project, signals
from scrapy.crawler import Settings
from scrapy.crawler import CrawlerProcess
from scrapy.xlib.pydispatch import dispatcher
from multiprocessing.queues import Queue
import multiprocessing

class CrawlerWorker(multiprocessing.Process):

    def __init__(self, spider, result_queue):
        multiprocessing.Process.__init__(self)
        self.result_queue = result_queue

        self.crawler = Crawler(Settings())
        if not hasattr(project, 'crawler'):
            self.crawler.install()
        self.crawler.configure()

        self.items = []
        self.spider = spider
        dispatcher.connect(self._item_passed, signals.item_passed)

    def _item_passed(self, item):
        self.items.append(item)

    def run(self):
        self.crawler.crawl(self.spider)
        self.crawler.start()
        self.crawler.stop()
        self.result_queue.put(self.items)

我在尝试仅使用 scrapy.conf.settings 中的 CrawlerProcess(settings) 时遇到错误,scrapy doc 在这里所说的似乎存在差异http://doc.scrapy.org/en/latest/topics/practices .html

我正在关注一个旧的scrapy版本,我试图让它与0.16的scrapy一起工作。

这是我运行 python 脚本时的错误。

Traceback (most recent call last):
  File "server.py", line 5, in <module>
    from scraper import Scraper
  File "/home/me/spider/spider/scraper.py", line 6, in <module>
    from crawlerworker import CrawlerWorker
  File "/home/me/spider/spider/crawlerworker.py", line 2, in <module>
    from scrapy.crawler import Settings
ImportError: cannot import name Settings
4

1 回答 1

4

尝试:

from scrapy.settings import Settings
于 2013-04-09T18:41:33.217 回答