2

我有一个多线程应用程序,它使用python-requests 库BeautifulSoup运行 HTTP 请求。尽管主页python-requests声明该库是线程安全的,但我怀疑段错误位于python-requests. 我看不出怎么BeautifulSoup可能不是线程安全的,但我也不能排除这种可能性。

这里是核心转储。(~800 KB)

Coredump 这里也是为了冗余。(~800 KB)

这是相关Python代码的总结:

import bs4
import requests
import time
from functools import partial
from threading import Thread


def get_response(url, phrase=''):
    proxy = '1.2.3.4:3128'
    proxy_config = { 'http':proxy, 'https':proxy, 'ftp':proxy }

    try:
        response = requests.get(url, proxies=proxy_config)
    except Exception as error:
        return False

    if response.status_code != 200:
        return False

    soup = bs4.BeautifulSoup(response.text)

    for element in soup.select('p'):
        # Check some things
        if things_checked == True:
             write_result(str(things))

    return True


def write_result(things=''):
    global output_file

    try:
        output_file.write(things)
        output_file.flush()
    except Exception as error:
        return False


def main(urls, phrases=[]):
    global output_file

    try:
        output_file = open('output.txt', 'a+', 1)
    except Exception as error:
        return False

    for url in urls:
        for phrase in phrases:
            t = Thread(target=partial(get_response, url, phrase))
            t.daemon = True
            t.start()

    time.sleep(10) # Actually, other non-related code runs here.
    return True


if __name__ == '__main__':
    urls = ['1.1.1.1', '2.2.2.2']
    phrases = ['john', 'paul', 'george', 'ringo']
    main(urls, phrases)
4

0 回答 0