经过几个小时的修补和尝试代码片段,我在stackoverflow中发现,我终于设法定期运行scrapy:
timeout = 60.0 # seconds
class UrlCrawlerScript(Process):
def __init__(self, spider):
Process.__init__(self)
settings = get_project_settings()
self.crawler = Crawler(settings)
if not hasattr(project, 'crawler'):
self.crawler.install()
self.crawler.configure()
self.crawler.signals.connect(reactor.stop, signal=signals.spider_closed)
self.spider = spider
def run(self):
self.crawler.crawl(self.spider)
self.crawler.start()
reactor.run()
def run_spider():
spider = MarketSpider()
crawler = UrlCrawlerScript(spider)
crawler.start()
crawler.join()
print 'finished'
l = task.LoopingCall(run_spider)
l.start(timeout) # call every sixty seconds
reactor.run()
我的问题是,ReactorAlreadyRunning
第二次运行后我仍然得到。我怎样才能解决这个问题?