我正在尝试从网站https://tonaton.com/en/ads/ghana/electronics抓取。有一个“下一步”按钮,我想单击并抓取内容。问题是该按钮的xpath或css选择器在scrapy shell和splash中都没有返回任何值,我被卡住了。我不能进去刮我需要的东西。请问有什么帮助吗?这就是我能走多远,但我没有得到正确的结果。
# -*- coding: utf-8 -*-
进口scrapy 进口scrapy_selenium 从scrapy_selenium 进口SeleniumRequest
类 VisionSpider(scrapy.Spider): name = 'vision'
def start_requests(self):
yield SeleniumRequest(
url= 'https://tonaton.com',
wait_time=3,
screenshot=True,
callback=self.parse
)
def parse(self, response):
businesses = response.xpath(
"//a[@class='link--1t8hM gtm-home-category-link-click']")
for business in businesses:
link = business.xpath(".//@href").get()
category = business.xpath(".//div[2]/p/text()").get()
yield response.follow(url=link, callback=self.parse_business, meta={'business_category': category})
def parse_business(self, response):
category = response.request.meta['business_category']
rows = response.xpath("//a[@class='card-link--3ssYv gtm-ad-item']")
for row in rows:
new_link = row.xpath(".//@href").get()
yield response.follow(url=new_link, callback=self.next_parse, meta={'business_category': category})
next_page = response.xpath("//div[@class = 'action-button--1O8tU']")
if next_page:
button = next_page.click()
yield SeleniumRequest(
url=button,
wait_time=3,
callback=self.parse
)
def next_parse(self, response):
category = response.request.meta['business_category']
lines = response.xpath("//a[@class='member-link--IzDly gtm-visit-shop']")
for line in lines:
next_link = line.xpath(".//@href").get()
yield response.follow(url=next_link, callback=self.another_parse, meta={'business_category': category})
def another_parse(self, response):
category = response.request.meta['business_category']
button = response.xpath("//button[@class = 'contact-section--1qlvP gtm-show-number']").click()
yield response.follow(url=button, callback=self.new_parse, meta={'business_category': category})
def new_parse(self, response):
category = response.request.meta['business_category']
times = response.xpath("//div[@class='info-container--3pMhK']")
for time in times:
name = time.xpath(".//div/span/text()").get()
location = time.xpath(".//div/div/div/span/text()").get()
phone = time.xpath(".//div[3]/div/button/div[2]/div/text()").get()
yield {
'business_category': category,
'business_name': name,
'phone': phone,
'location': location
}