由于此时日志观察器已经启动,因此蜘蛛__init__()
还不足以自行调用;log.start()
因此,您需要重新初始化日志记录状态以欺骗 Scrapy (重新)启动它。
在您的蜘蛛类文件中:
from datetime import datetime
from scrapy import log
from scrapy.spider import BaseSpider
class ExampleSpider(BaseSpider):
name = "example"
allowed_domains = ["example.com"]
start_urls = ["http://www.example.com/"]
def __init__(self, name=None, **kwargs):
LOG_FILE = "scrapy_%s_%s.log" % (self.name, datetime.now())
# remove the current log
# log.log.removeObserver(log.log.theLogPublisher.observers[0])
# re-create the default Twisted observer which Scrapy checks
log.log.defaultObserver = log.log.DefaultObserver()
# start the default observer so it can be stopped
log.log.defaultObserver.start()
# trick Scrapy into thinking logging has not started
log.started = False
# start the new log file observer
log.start(LOG_FILE)
# continue with the normal spider init
super(ExampleSpider, self).__init__(name, **kwargs)
def parse(self, response):
...
输出文件可能如下所示:
scrapy_example_2012-08-25 12:34:48.823896.log