我正在使用 scrapy 框架并通过创建两个蜘蛛文件从两个 url 获取数据。
现在,例如,当我运行时,抓取spider1
的url1
数据将保存到csv1
文件中,当我运行第二次时spider2
,数据将保存到csv2
文件中。
实际上,我要做的是将来自不同蜘蛛的所有数据保存到不同工作表中的单个 csv 文件中(工作表名称应为蜘蛛名称)
All about my question is how to write data in to multiple sheets in a single csv file from python
管道.py
from w3c_browser.items import WCBrowserItem
import csv
from csv import DictWriter
from cStringIO import StringIO
from datetime import datetime
from scrapy.xlib.pydispatch import dispatcher
from scrapy import signals
from scrapy import log
class W3CBrowserPipeline(object):
def __init__(self):
dispatcher.connect(self.spider_opened, signal=signals.spider_opened)
dispatcher.connect(self.spider_closed, signal=signals.spider_closed)
self.brandCategoryCsv = csv.writer(open('wcbbrowser.csv', 'wb'))
def spider_opened(self, spider):
spider.started_on = datetime.now()
if spider.name == 'browser_statistics':
log.msg("opened spider %s at time %s" % (spider.name,datetime.now().strftime('%H-%M-%S')))
self.brandCategoryCsv = csv.writer(open("csv/%s-%s.csv"% (spider.name,datetime.now().strftime('%d%m%y')), "wb"),
delimiter=',', quoting=csv.QUOTE_MINIMAL)
elif spider.name == 'browser_os':
log.msg("opened spider %s at time %s" % (spider.name,datetime.now().strftime('%H-%M-%S')))
self.brandCategoryCsv = csv.writer(open("csv/%s-%s.csv"% (spider.name,datetime.now().strftime('%d%m%y')), "wb"),
delimiter=',', quoting=csv.QUOTE_MINIMAL)
elif spider.name == 'browser_display':
log.msg("opened spider %s at time %s" % (spider.name,datetime.now().strftime('%H-%M-%S')))
self.brandCategoryCsv = csv.writer(open("csv/%s-%s.csv"% (spider.name,datetime.now().strftime('%d%m%y')), "wb"),
delimiter=',', quoting=csv.QUOTE_MINIMAL)
def process_item(self, item, spider):
if spider.name == 'browser_statistics':
self.brandCategoryCsv.writerow([item['year'],
item['internet_explorer'],
item['firefox'],
item['chrome'],
item['safari'],
item['opera'],
])
return item
elif spider.name == 'browser_os':
def process_item(self, item, spider):
self.brandCategoryCsv.writerow([item['year'],
item['vista'],
item['nt'],
item['winxp'],
item['linux'],
item['mac'],
item['mobile'],
])
return item
def spider_closed(self, spider):
log.msg("closed spider %s at %s" % (spider.name,datetime.now().strftime('%H-%M-%S')))
work_time = datetime.now() - spider.started_on
print str(work_time),"Total Time taken by the spider to run>>>>>>>>>>>"