当我通过 Python 脚本调用 Spider 时,它给了我一个ImportError
:
ImportError: No module named app.models
我items.py
的是这样的:
from scrapy.item import Item, Field
from scrapy.contrib.djangoitem import DjangoItem
from app.models import Person
class aqaqItem(DjangoItem):
django_model=Person
pass
我settings.py
的是这样的:
#
# For simplicity, this file contains only the most important settings by
# default. All the other settings are documented here:
#
# http://doc.scrapy.org/topics/settings.html
#
BOT_NAME = 'aqaq'
BOT_VERSION = '1.0'
SPIDER_MODULES = ['aqaq.spiders']
NEWSPIDER_MODULE = 'aqaq.spiders'
USER_AGENT = '%s/%s' % (BOT_NAME, BOT_VERSION)
ITEM_PIPELINES = [
'aqaq.pipelines.JsonWithEncodingPipeline']
import sys
import os
c=os.getcwd()
os.chdir("../../myweb")
d=os.getcwd()
os.chdir(c)
sys.path.insert(0, d)
# Setting up django's settings module name.
# This module is located at /home/rolando/projects/myweb/myweb/settings.py.
import os
os.environ['DJANGO_SETTINGS_MODULE'] = 'myweb.settings'
我调用蜘蛛的 Python 脚本是这样的:
from twisted.internet import reactor
from scrapy.crawler import Crawler
from scrapy import log, signals
from final.aqaq.aqaq.spiders.spider import aqaqspider
from scrapy.utils.project import get_project_settings
def stop_reactor():
reactor.stop()
spider = aqaqspider(domain='aqaq.com')
settings = get_project_settings()
crawler = Crawler(settings)
crawler.signals.connect(reactor.stop, signal=signals.spider_closed)
crawler.configure()
crawler.crawl(spider)
crawler.start()
log.start()
reactor.run()
我的目录结构是这样的:
.
|-- aqaq
| |-- aqaq
| | |-- call.py
| | |-- __init__.py
| | |-- __init__.pyc
| | |-- items.py
| | |-- items.pyc
| | |-- pipelines.py
| | |-- pipelines.pyc
| | |-- settings.py
| | |-- settings.pyc
| | `-- spiders
| | |-- aqaq.json
| | |-- __init__.py
| | |-- __init__.pyc
| | |-- item.json
| | |-- spider.py
| | |-- spider.pyc
| | `-- url
| |-- call.py
| |-- call_spider.py
| `-- scrapy.cfg
|-- mybot
| |-- mybot
| | |-- __init__.py
| | |-- items.py
| | |-- pipelines.py
| | |-- settings.py
| | `-- spiders
| | |-- example.py
| | `-- __init__.py
| `-- scrapy.cfg
`-- myweb
|-- app
| |-- admin.py
| |-- admin.pyc
| |-- __init__.py
| |-- __init__.pyc
| |-- models.py
| |-- models.pyc
| |-- tests.py
| `-- views.py
|-- manage.py
`-- myweb
|-- file
|-- __init__.py
|-- __init__.pyc
|-- settings.py
|-- settings.pyc
|-- urls.py
|-- urls.pyc
|-- wsgi.py
`-- wsgi.pyc
请帮助我,因为我是 Scrapy 的新手。
我真的很困惑我尝试导入
import os
os.environ['DJANGO_SETTINGS_MODULE'] = 'myweb.settings
在我的脚本顶部的新错误中
get_project_settings is invalid
我的 Scarapy 版本也是 18
谢谢大家我得到了解决方案