0

我有以下代码:

from selenium import webdriver
from selenium.webdriver import Firefox
from selenium.webdriver.common.keys import Keys
from selenium.webdriver import ActionChains

from selenium.webdriver.common.proxy import Proxy, ProxyType

proxy = Proxy({
    'proxyType': ProxyType.MANUAL,
    'httpProxy': '192.156.1.1:33',
    'ftpProxy': '192.156.1.1:33',
    'sslProxy': '192.156.1.1:33',
    'noProxy': '' # set this value as desired
    })
url = 'http://www.expressvpn.com/what-is-my-ip'
driver_path = 'C:\\Users\\user\\geckodriver.exe'

browser = Firefox(executable_path = driver_path, proxy = proxy)
browser.get(url)

出于某种原因,每次我检查 IP 时,它都显示我的真实 IP 而不是代理 IP。为什么要这样做,您能否告知如何实现?代码有问题吗?

4

1 回答 1

1

我开始研究这个并注意到代理是使用WebDriver功能和.geckodriver

我从测试中使用了这些来源的代理信息。

免费代理列表:

请让我指出,使用免费代理 IP 地址可能会有很大问题。这些类型的代理因存在连接问题而臭名昭著,例如与延迟相关的超时。此外,这些站点也可能是断断续续的,这意味着它们可以随时关闭。有时这些网站会被滥用,因此它们可能会被屏蔽。

下面的代码使用DesiredCapabilitieswithselenium.

from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.firefox.options import FirefoxProfile
from selenium.webdriver.firefox.options import DesiredCapabilities

firefox_options = Options()
firefox_options.add_argument("--disable-infobars")
firefox_options.add_argument("--disable-extensions")
firefox_options.add_argument("--disable-popup-blocking")

profile_options = FirefoxProfile()
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11.5; rv:90.0) Gecko/20100101 Firefox/90.0'
firefox_options.set_preference('profile_options = FirefoxProfile()', user_agent)

firefox_capabilities = DesiredCapabilities().FIREFOX

firefox_capabilities['proxy'] = {
    "proxyType": "MANUAL",
    "sslProxy": '34.95.40.165:3128',
}

driver = webdriver.Firefox(executable_path='/usr/local/bin/geckodriver', options=firefox_options, desired_capabilities=firefox_capabilities)

URL = 'http://www.expressvpn.com/what-is-my-ip'

driver.get(URL)

在此处输入图像描述

你也可以这样做:

from selenium import webdriver
from selenium.webdriver.common.proxy import Proxy
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.firefox.options import FirefoxProfile
from selenium.webdriver.firefox.options import DesiredCapabilities

firefox_options = Options()
firefox_options.add_argument("--disable-infobars")
firefox_options.add_argument("--disable-extensions")
firefox_options.add_argument("--disable-popup-blocking")

profile_options = FirefoxProfile()
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11.5; rv:90.0) Gecko/20100101 Firefox/90.0'
firefox_options.set_preference('profile_options = FirefoxProfile()', user_agent)

firefox_capabilities = DesiredCapabilities().FIREFOX

firefox_proxies = Proxy()
firefox_proxies.ssl_proxy = '143.110.148.15:8080'
firefox_proxies.add_to_capabilities(firefox_capabilities)

driver = webdriver.Firefox(executable_path='/usr/local/bin/geckodriver', options=firefox_options,
                           desired_capabilities=firefox_capabilities)

URL = 'http://www.expressvpn.com/what-is-my-ip'

driver.get(URL)

在此处输入图像描述

您还可以使用 Python 包http_request_randomize获取代理 IP 地址,该地址可以传递给geckodriver.

import random
import logging
from selenium import webdriver
from selenium.webdriver.common.proxy import Proxy
from selenium.webdriver.firefox.options import Options
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.firefox.options import FirefoxProfile
from selenium.webdriver.firefox.options import DesiredCapabilities
from http_request_randomizer.requests.proxy.ProxyObject import Protocol
from http_request_randomizer.requests.proxy.requestProxy import RequestProxy

# Obtain a list of HTTPS proxies
# Suppress the console debugging output by setting the log level
req_proxy = RequestProxy(log_level=logging.ERROR, protocol=Protocol.HTTPS)

# Obtain a random single proxy from the list of proxy addresses
random_proxy = random.sample(req_proxy.get_proxy_list(), 1)

firefox_options = Options()
firefox_options.add_argument("--disable-infobars")
firefox_options.add_argument("--disable-extensions")
firefox_options.add_argument("--disable-popup-blocking")

profile_options = FirefoxProfile()
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11.5; rv:90.0) Gecko/20100101 Firefox/90.0'
firefox_options.set_preference('profile_options = FirefoxProfile()', user_agent)

firefox_capabilities = DesiredCapabilities().FIREFOX

# add the random proxy to firefox_capabilities
firefox_proxies = Proxy()
firefox_proxies.ssl_proxy = random_proxy[0].get_address()
firefox_proxies.add_to_capabilities(firefox_capabilities)

driver = webdriver.Firefox(executable_path='/usr/local/bin/geckodriver', options=firefox_options,
                           desired_capabilities=firefox_capabilities)

try:
    # print proxy IP for testing
    print(random_proxy[0].get_address())
    # output 
    93.183.250.200:53281

    URL = 'http://www.expressvpn.com/what-is-my-ip'
    driver.get(URL)

except TimeoutException as e:
    print("A Page load Timeout Occurred.")
    driver.quit()

在此处输入图像描述

如前所述,免费代理可能有多个问题。下面的代码展示了如何使用代理判断来检查单个代理的状态。

import random
import logging
from time import sleep
from random import randint
from proxy_checking import ProxyChecker
from http_request_randomizer.requests.proxy.ProxyObject import Protocol
from http_request_randomizer.requests.proxy.requestProxy import RequestProxy


def random_ssl_proxy_address():
    # Obtain a list of HTTPS proxies
    # Suppress the console debugging output by setting the log level
    req_proxy = RequestProxy(log_level=logging.ERROR, protocol=Protocol.HTTPS)

    # Obtain a random single proxy from the list of proxy addresses
    random_proxy = random.sample(req_proxy.get_proxy_list(), 1)

    return random_proxy[0].get_address()


def get_proxy_address():
    proxy_address = random_ssl_proxy_address()
    checker = ProxyChecker()
    proxy_judge = checker.check_proxy(proxy_address)
    proxy_status = [value for key, value in proxy_judge.items() if key == 'status']

    if proxy_status[0]:
        return proxy_address
    else:
        print('Looking for a valid proxy address.')

        # this sleep timer is helping with some timeout issues
        # that were happening when querying
        sleep(randint(5, 10))

        get_proxy_address()


random_ssl_proxy = get_proxy_address()
print(f'Valid proxy address: {random_ssl_proxy}')
# output
Valid proxy address: 98.116.152.143:3128

请注意,我使用的proxy_checker包没有任何嵌入式错误处理,因此您必须添加一些来捕获一些错误。

于 2021-08-08T16:28:09.553 回答