我正在尝试使用 scrapy + MongoDB (PyMongo) 抓取 Spider,但收到错误消息:name must be an instance of basestring。
由于我的 Spider 正在工作,因为它正在将数据抓取到 json 中,我猜错误在我的新管道中,这里是源代码:
import pymongo
from scrapy import log
from scrapy.conf import settings
from scrapy.exceptions import DropItem
class MongoDBPipeline(object):
def __init__(self):
self.server = settings['localhost']
self.port = settings['27017']
self.db = settings['IngressoRapido']
self.col = settings['Shows']
connection = pymongo.Connection(self.server, self.port)
db = connection[self.db]
self.collection = db[self.col]
def process_item(self, item, spider):
err_msg = ''
for banda, local in item.items():
if not local :
err_msg += 'Faltando local %s da banda %s\n' % (banda, item['banda'])
if err_msg:
raise DropItem(err_msg)
self.collection.insert(dict(item))
log.msg('Item written to MongoDB database %s/%s' % (self.db, self.col),
level=log.DEBUG, spider=spider)
return item