控制台输出:https ://i.stack.imgur.com/x2CAN.png
我正在尝试在 docker 容器中使用 selenium 运行 python 脚本。使用 selenium 运行其他脚本没有任何问题,所以我知道这不是 chromedriver 路径的问题。当我在本地运行脚本时,它工作得很好。但是当我在容器中运行时,出现以下错误。有谁知道问题可能是什么?
代码:
from datetime import date
from datetime import timedelta
from scrapy.spiders import Spider
from scrapy import Request
from cryptospiders.items import AssetsSRScrape
import requests, json, time, scrapy, itertools, os
from bs4 import BeautifulSoup
import pandas as pd
from scrapy_selenium import SeleniumRequest
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
timestamp = datetime.now()
dates = datetime.now().date()
allowed_domains = ["stakingrewards.com"]
chromedriver = '/usr/local/bin/chromedriver'
os.environ["webdriver.chrome.driver"] = chromedriver
options = webdriver.ChromeOptions()
options.add_argument('--ignore-certificate-errors')
options.add_argument('--allow-running-insecure-content')
options.add_argument('user-agent=Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.50 Safari/537.36')
options.add_argument('headless')
options.add_argument("--enable-javascript")
options.add_argument('--no-sandbox')
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
class assetsSRScrape(Spider):
name = 'assetssrscrape'
item = AssetsSRScrape()
def start_requests(self):
self.driver = webdriver.Chrome(chromedriver, options=options)
self.driver.implicitly_wait(10)
yield SeleniumRequest(url = "https://www.stakingrewards.com/cryptoassets", wait_time=10, callback = self.parse)
def parse(self, response):
self.driver.get(response.url)
self.driver.implicitly_wait(10)
soup = BeautifulSoup(self.driver.find_element_by_class_name("ReactTable").get_attribute("outerHTML"), 'html.parser')
rows = soup.findAll('div', {"class": 'rt-tr-group'})
for row in rows:
print(str(row.findAll('div', {"class": 'rt-td'})[1].findAll('span')[0].text).strip())
self.driver.quit();
Docker文件:
RUN apt-get update \
&& pip install --upgrade setuptools
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
RUN apt-get install -y libgtk2.0-0 libgtk-3-0 libnotify-dev \
libgconf-2-4 libnss3 libxss1 \
libasound2 libxtst6 xauth xvfb \
libgbm-dev \
&& rm -rf /var/lib/apt/lists/*
COPY . .
RUN echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" | \
tee -a /etc/apt/sources.list.d/google.list && \
wget -q -O - https://dl.google.com/linux/linux_signing_key.pub | \
apt-key add - && \
apt-get update && \
apt-get install -y google-chrome-stable libxss1
RUN BROWSER_MAJOR=$(google-chrome --version | sed 's/Google Chrome \([0-9]*\).*/\1/g') && \
wget https://chromedriver.storage.googleapis.com/LATEST_RELEASE_${BROWSER_MAJOR} -O chrome_version && \
wget https://chromedriver.storage.googleapis.com/`cat chrome_version`/chromedriver_linux64.zip && \
unzip chromedriver_linux64.zip && \
mv chromedriver /usr/local/bin/ && \
DRIVER_MAJOR=$(chromedriver --version | sed 's/ChromeDriver \([0-9]*\).*/\1/g') && \
echo "chrome version: $BROWSER_MAJOR" && \
echo "chromedriver version: $DRIVER_MAJOR"
# Add scrapy as a user
RUN groupadd -r scrapy && useradd -r -g scrapy -G audio,video scrapy \
&& mkdir -p /home/scrapy && chown -R scrapy:scrapy /home/scrapy
# Run Chrome non-privileged
USER scrapy
输出
File "/cryptospiders/spiders/stakingrewards.py", line 53, in parse
soup = BeautifulSoup(self.driver.find_element_by_class_name("ReactTable").get_attribute("outerHTML"), 'html.parser')
File "/usr/local/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py", line 564, in find_element_by_class_name
return self.find_element(by=By.CLASS_NAME, value=name)
File "/usr/local/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py", line 978, in find_element
'value': value})['value']
File "/usr/local/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py", line 321, in execute
self.error_handler.check_response(response)
File "/usr/local/lib/python3.7/site-packages/selenium/webdriver/remote/errorhandler.py", line 242, in check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.NoSuchElementException: Message: no such element: Unable to locate element: {"method":"css selector","selector":".ReactTable"}
(Session info: headless chrome=93.0.4577.63)
2021-09-02 15:53:02 [cryptospiders.rotating_proxies_custom.middlewares] DEBUG: 6 proxies moved from 'dead' to 'reanimated'