嗨,我试图在我的代码中实现这一点。但我收到以下错误:exceptions.NameError: global name 'Request' is not defined
。
from scrapy.spider import BaseSpider
from scrapy.selector import HtmlXPathSelector
from bs4 import BeautifulSoup
class spider_aicte(BaseSpider):
name = "Indian_Colleges"
allowed_domains = ["http://www.domain.org"]
start_urls = [
"http://www.domain.org/appwebsite.html",
]
def parse(self, response):
filename = response.url.split("/")[-2]
soup = BeautifulSoup(response.body)
for link in soup.find_all('a'):
download_link = link.get('href')
if '.pdf' in download_link:
pdf_link = "http://www.domain.org" + download_link
print pdf_link
class FileSpider(BaseSpider):
name = "fspider"
allowed_domains = ["www.domain.org"]
start_urls = [
pdf_link
]
for url in pdf_link:
yield Request(url, callback=self.save_pdf)
def save_pdf(self, response):
path = self.get_path(response.url)
with open(path, "wb") as f:
f.write(response.body)