我正在尝试使用硒自动化在谷歌反向图像搜索中搜索照片链接列表。我能够获得第一个链接谷歌它并获得谷歌搜索页面的 URL,但随后scrapy 停止。我如何通过for循环中的所有链接继续运行scrapy?我正在谈论的循环是for link in links:
下面的代码:
import sqlite3
import sys
sys.path.append(r"C:/Users/lado9/Desktop/Code/scraper/scrapy alza amazon/alza_amazon_scrapy/google_scrapy/googlescrapy/googlescrapy")
import scrapy
from items import GooglescrapyItem
from selenium import webdriver
import time
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import NoSuchFrameException
from selenium.webdriver.common.keys import Keys
class GoogleImgSpider(scrapy.Spider):
name = 'google'
start_urls = ['https://www.google.de/imghp?hl=en&ogbl']
def __init__(self):
self.driver = webdriver.Chrome('C:\Program Files (x86)\chromedriver.exe')
def parse(self, response):
name = GooglescrapyItem()
self.driver.get(response.url)
try:
self.driver.find_element_by_xpath("//button[@class='tHlp8d'][@id='zV9nZe']").click()
except NoSuchElementException:
pass
try:
self.driver.find_element_by_xpath("//button[@class='EzgVlc']").click()
except NoSuchElementException:
pass
self.driver.find_element_by_xpath('//*[@id="sbtc"]/div/div[3]/div[2]').click()
time.sleep(1)
conn = sqlite3.connect(r"C:/Users/lado9/Desktop/Code/scraper/scrapy alza amazon/alza_amazon_scrapy/alza_amazon_scrapy/alza_amazon_tb.db")
c = conn.cursor()
links = c.execute('SELECT img_link FROM alza_amazon_tb')
for link in links:
search = self.driver.find_element_by_xpath(".//input[@id='Ycyxxc'][@class='lst']")
search.send_keys(link)
search.send_keys(Keys.RETURN)
search2 = self.driver.find_element_by_xpath('//*[@id="sbtc"]/div[2]/div[2]/input')
search2.send_keys(Keys.SPACE)
search2.send_keys('amazon.de')
search2.send_keys(Keys.RETURN)
try:
self.driver.switch_to_frame(0)
self.driver.find_element_by_id("introAgreeButton").click()
except NoSuchFrameException:
pass
url = self.driver.current_url
name['url'] = self.driver.current_url
yield name