0

控制台输出:https ://i.stack.imgur.com/x2CAN.png

我正在尝试在 docker 容器中使用 selenium 运行 python 脚本。使用 selenium 运行其他脚本没有任何问题,所以我知道这不是 chromedriver 路径的问题。当我在本地运行脚本时,它工作得很好。但是当我在容器中运行时,出现以下错误。有谁知道问题可能是什么?

代码:

from datetime import date 
from datetime import timedelta 
from scrapy.spiders import Spider
from scrapy import Request
from cryptospiders.items import AssetsSRScrape
import requests, json, time, scrapy, itertools, os
from bs4 import BeautifulSoup
import pandas as pd

from scrapy_selenium import SeleniumRequest
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

timestamp = datetime.now()
dates = datetime.now().date()
allowed_domains = ["stakingrewards.com"]

chromedriver = '/usr/local/bin/chromedriver'
os.environ["webdriver.chrome.driver"] = chromedriver

options = webdriver.ChromeOptions()

options.add_argument('--ignore-certificate-errors')
options.add_argument('--allow-running-insecure-content')
options.add_argument('user-agent=Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.50 Safari/537.36')

options.add_argument('headless')
options.add_argument("--enable-javascript")
options.add_argument('--no-sandbox')
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)

class assetsSRScrape(Spider):
    
    name = 'assetssrscrape'
    item = AssetsSRScrape()
    
    def start_requests(self):

        self.driver = webdriver.Chrome(chromedriver, options=options)
        self.driver.implicitly_wait(10)
        yield SeleniumRequest(url = "https://www.stakingrewards.com/cryptoassets", wait_time=10, callback = self.parse)
   
    def parse(self, response):

        self.driver.get(response.url)

        self.driver.implicitly_wait(10)

        soup = BeautifulSoup(self.driver.find_element_by_class_name("ReactTable").get_attribute("outerHTML"), 'html.parser')

        rows = soup.findAll('div', {"class": 'rt-tr-group'})
            
        for row in rows:
           print(str(row.findAll('div', {"class": 'rt-td'})[1].findAll('span')[0].text).strip())
                
        self.driver.quit();

Docker文件:


RUN apt-get update \
 && pip install --upgrade setuptools 
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
RUN apt-get install -y libgtk2.0-0 libgtk-3-0 libnotify-dev \
    libgconf-2-4 libnss3 libxss1 \
    libasound2 libxtst6 xauth xvfb \
    libgbm-dev \
    && rm -rf /var/lib/apt/lists/*
COPY . .

RUN echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" | \
    tee -a /etc/apt/sources.list.d/google.list && \
    wget -q -O - https://dl.google.com/linux/linux_signing_key.pub | \
    apt-key add - && \
    apt-get update && \
    apt-get install -y google-chrome-stable libxss1

RUN BROWSER_MAJOR=$(google-chrome --version | sed 's/Google Chrome \([0-9]*\).*/\1/g') && \
    wget https://chromedriver.storage.googleapis.com/LATEST_RELEASE_${BROWSER_MAJOR} -O chrome_version && \
    wget https://chromedriver.storage.googleapis.com/`cat chrome_version`/chromedriver_linux64.zip && \
    unzip chromedriver_linux64.zip && \
    mv chromedriver /usr/local/bin/ && \
    DRIVER_MAJOR=$(chromedriver --version | sed 's/ChromeDriver \([0-9]*\).*/\1/g') && \
    echo "chrome version: $BROWSER_MAJOR" && \
    echo "chromedriver version: $DRIVER_MAJOR"

# Add scrapy as a user
RUN groupadd -r scrapy && useradd -r -g scrapy -G audio,video scrapy \
    && mkdir -p /home/scrapy && chown -R scrapy:scrapy /home/scrapy

# Run Chrome non-privileged
USER scrapy

输出

  File "/cryptospiders/spiders/stakingrewards.py", line 53, in parse
    soup = BeautifulSoup(self.driver.find_element_by_class_name("ReactTable").get_attribute("outerHTML"), 'html.parser')
  File "/usr/local/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py", line 564, in find_element_by_class_name
    return self.find_element(by=By.CLASS_NAME, value=name)
  File "/usr/local/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py", line 978, in find_element
    'value': value})['value']
  File "/usr/local/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py", line 321, in execute
    self.error_handler.check_response(response)
  File "/usr/local/lib/python3.7/site-packages/selenium/webdriver/remote/errorhandler.py", line 242, in check_response
    raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.NoSuchElementException: Message: no such element: Unable to locate element: {"method":"css selector","selector":".ReactTable"}
  (Session info: headless chrome=93.0.4577.63)

2021-09-02 15:53:02 [cryptospiders.rotating_proxies_custom.middlewares] DEBUG: 6 proxies moved from 'dead' to 'reanimated'
4

0 回答 0