0

我正在尝试使用硒自动化在谷歌反向图像搜索中搜索照片链接列表。我能够获得第一个链接谷歌它并获得谷歌搜索页面的 URL,但随后scrapy 停止。我如何通过for循环中的所有链接继续运行scrapy?我正在谈论的循环是for link in links:下面的代码:

import sqlite3
import sys
sys.path.append(r"C:/Users/lado9/Desktop/Code/scraper/scrapy alza amazon/alza_amazon_scrapy/google_scrapy/googlescrapy/googlescrapy")
import scrapy
from items import GooglescrapyItem
from selenium import webdriver
import time
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import NoSuchFrameException
from selenium.webdriver.common.keys import Keys


class GoogleImgSpider(scrapy.Spider):
    name = 'google'
    start_urls = ['https://www.google.de/imghp?hl=en&ogbl']
    
    def __init__(self):
        self.driver = webdriver.Chrome('C:\Program Files (x86)\chromedriver.exe')

    def parse(self, response):

        name = GooglescrapyItem()
        
        self.driver.get(response.url)
        
        try:
            self.driver.find_element_by_xpath("//button[@class='tHlp8d'][@id='zV9nZe']").click()
        except NoSuchElementException:
            pass
        
        try:
            self.driver.find_element_by_xpath("//button[@class='EzgVlc']").click()
        except NoSuchElementException:
            pass

        self.driver.find_element_by_xpath('//*[@id="sbtc"]/div/div[3]/div[2]').click()

        time.sleep(1)

        conn = sqlite3.connect(r"C:/Users/lado9/Desktop/Code/scraper/scrapy alza amazon/alza_amazon_scrapy/alza_amazon_scrapy/alza_amazon_tb.db")
        c = conn.cursor()
        links = c.execute('SELECT img_link FROM alza_amazon_tb')

        for link in links:

            search = self.driver.find_element_by_xpath(".//input[@id='Ycyxxc'][@class='lst']")
    
            search.send_keys(link)

            search.send_keys(Keys.RETURN)

            search2 = self.driver.find_element_by_xpath('//*[@id="sbtc"]/div[2]/div[2]/input')
            search2.send_keys(Keys.SPACE)
            search2.send_keys('amazon.de')
            search2.send_keys(Keys.RETURN)
            try:
                self.driver.switch_to_frame(0)
                self.driver.find_element_by_id("introAgreeButton").click()

            except  NoSuchFrameException:
                pass

            url = self.driver.current_url

            name['url'] = self.driver.current_url
            
            yield name 
4

0 回答 0