我从一个站点设置了一个代理抓取器,但我什么也没得到。
import scrapy
from scrapy.item import Field, Item
from scrapy.spiders import CrawlSpider, Rule
from scrapy.linkextractors import LinkExtractor
from scrapy.loader import ItemLoader
from scrapy.loader.processors import MapCompose
class ProxyServersPro(Item):
ip = scrapy.Field()
port = scrapy.Field()
country = scrapy.Field()
speed = scrapy.Field()
protocol = scrapy.Field()
anon = scrapy.Field()
class ProxyServersPro(CrawlSpider):
name = "ProxyServersProCrawler"
start_urls = ["https://es.proxyservers.pro/proxy/list/speed/2/anonymity/elite/order/duration/order_dir/asc/page/1"]
allowed_domains = ['proxyservers.pro']
rules = {
Rule(LinkExtractor(allow=r'page'), callback = 'parse_item')
}
def parse_item(self, response):
item = ItemLoader (ProxyServersPro(), response=response)
item.add_xpath('ip', '//*[@id="content-content"]/div/div/div[1]/table/tbody/tr[1]/td[2]/a/text()')
item.add_xpath('port', '//html/body/div[1]/div/div[2]/div/div/div/div[1]/table/tbody/tr[1]/td[3]/span/text()')
item.add_xpath('country', '//html/body/div[1]/div/div[2]/div/div/div/div[1]/table/tbody/tr[1]/td[4]/text()')
item.add_xpath('speed', '//html/body/div[1]/div/div[2]/div/div/div/div[1]/table/tbody/tr[1]/td[5]/div[1]/div/div/text()')
item.add_xpath('protocol', '//html/body/div[1]/div/div[2]/div/div/div/div[1]/table/tbody/tr[1]/td[7]/text()')
item.add_xpath('anon', '//html/body/div[1]/div/div[2]/div/div/div/div[1]/table/tbody/tr[1]/td[8]/text()')
return item.load_item()
这就是控制台所说的。
2019-03-24 04:53:27 [scrapy.extensions.logstats] INFO: Crawled 0 pages (at 0 pages/min), scraped 0 items (at 0 items/min)
有人能弄清楚发生了什么吗?谢谢