python - Python 线程 - 我如何使用它同时运行多个任务？

Question

我是 Python 新手，我从 stackoverflow 社区获得了很大的帮助，以便将我的 shellscript 迁移到 python。但是我再次在如何实现线程方面苦苦挣扎，因为这个脚本在 ax 结果上运行，让它运行起来会更快，例如，脚本返回 120 个服务器来运行，我想一次运行 5 个并有一个队列。

我想在线程上运行的方法是在以下条件之后：（我用注释标记）

if checkServer.checkit(host,port):

Bellow，是 extract_adapter.py 文件内容：

import psycopg2
import urllib2
import base64
import sys
import re
import lxml.html as LH
import checkServer

def extractAdapter(env,family,iserver,login,password,prefix,proxyUser,proxyPass,proxyHost,service):

    print "Starting on \t"+iserver

    proxy_auth = "http://"+proxyUser+":"+proxyPass+"@"+proxyHost
    proxy_handler = urllib2.ProxyHandler({"http": proxy_auth})

    opener = urllib2.build_opener(proxy_handler)
    urllib2.install_opener(opener)
    request = urllib2.Request("http://"+iserver+"/invoke/listRegisteredAdapters")
    base64string = base64.encodestring('%s:%s' % (login, password)).replace('\n', '')
    request.add_header("Authorization", "Basic %s" % base64string)
    response = urllib2.urlopen(request)
    html = response.read()

    doc = LH.fromstring(html)
    tds = (td.text_content() for td in doc.xpath("//td[not(*)]"))

    for adapterType, adapterDescription in zip(*[tds]*2):

        proxy_auth = "http://"+proxyUser+":"+proxyPass+"@"+proxyHost
        proxy_handler = urllib2.ProxyHandler({"http": proxy_auth})
        opener = urllib2.build_opener(proxy_handler)
        opener = urllib2.build_opener()
        urllib2.install_opener(opener)
        request = urllib2.Request("http://"+iserver+service+""+adapterType)
        base64string = base64.encodestring('%s:%s' % (login, password)).replace('\n', '')
        request.add_header("Authorization", "Basic %s" % base64string)
        response = urllib2.urlopen(request)
        html2 = response.read()

        doc = LH.fromstring(html2)
        tds = (td.text_content() for td in doc.xpath("//td[not(*)]"))

        for connectionAlias,packageName,connectionFactoryType,mcfDisplayName,connectionState,hasError in zip(*[tds]*6):

            cur.execute("INSERT INTO wip.info_adapter (env,family,iserver,prefix,package,adapter_type,connection_name,status) values (%s,%s,%s,%s,%s,%s,%s,%s)",
            (env,family,iserver,prefix,packageName,adapterType,connectionAlias,connectionState))
            con.commit()

################################################################################

def extract(env):
    global cur,con
    con = None
    try:

        con = psycopg2.connect(database='xx', user='xx',password='xxx',host='localhost')
        cur = con.cursor()
        qry=" random non important query"

        cur.execute(qry)
        data = cur.fetchall()

        for result in data:

            family   = result[0]
            prefix   = result[1]
            iserver  = result[2]
            version  = result[3]
            login    = result[4]
            password = result[5]
            service  = result[6]
            proxyHost = result[7]
            proxyUser = result[8]
            proxyPass = result[9]

            parts=iserver.split(":")
            host=parts[0]
            port=parts[1]

            if checkServer.checkit(host,port):
            ##SUPOSE TO AS START THREAD 

                if version == '7' or version == '8':

                    extractAdapter(env,family,iserver,login,password,prefix,proxyUser,proxyPass,proxyHost,service)

                elif version == '60' or version == '61':
                    print "Version 6.0 and 6.1 not supported yet"
            else:
                print iserver+"is offline"
            #TO END  THREAD

    except psycopg2.DatabaseError, e:
        print 'Error %s' % e
        sys.exit(1)

    finally:

        if con:
            con.close()

这就是我在 runme.py 上调用方法提取的方式

import extract_adapter_thread
from datetime import datetime

startTime = datetime.now()
print"------------------------------"
extract_adapter_thread.extract('TEST')
print"------------------------------"
print(datetime.now()-startTime)

顺便说一句，代码工作得很好。没有错误。

score 1 · Accepted Answer

由于Global Interpreter Lock，线程将在 Python 中严重阻塞非 IO 绑定问题。因此，您可能最好进行多处理——它带有一个 Queue 类（有关使用 mp 队列的示例，请参见此SO Link ）。

这应该让您可以同时处理许多单独的进程（例如一次批处理 120 个作业中的 5 个作业）。请注意，进程的开销高于线程的开销，因此对于小型任务，您将为使用多处理而不是线程付出代价。不过，您的任务听起来足够大，足以保证这样的成本。

score 0 · Accepted Answer

我真的不知道这是否会有很大帮助，但这是我在 HD 中拥有的一段代码，嗯……就这样。查看并行或顺序 ping 一些 ip 的区别是一件基本的事情（尽管需要 linux）。它非常简单，不是您特定问题的直接答案，但是……既然您说您是Python 新手，它可能会给您一些想法。

#!/usr/bin/env python

import datetime
import subprocess
import threading

ipsToPing = [
    "google.com",
    "stackoverflow.com",
    "yahoo.com",
    "terra.es", 
]

def nonThreadedPinger():
    start = datetime.datetime.now()
    for ipToPing in ipsToPing:
        print "Not-threaded ping to %s" % ipToPing
        subprocess.call(["/bin/ping", "-c", "3", "-W", "1.0", ipToPing], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    end = datetime.datetime.now()
    print ("Non threaded ping of %s ips took: %s." % (len(ipsToPing), end-start))

def _threadedPingerAux(ipToPing):
    print "Threaded ping to %s" % ipToPing
    subprocess.call(["/bin/ping", "-c", "3", "-W", "1.0", ipToPing], stdout=subprocess.PIPE, stderr=subprocess.PIPE)

def threadedPinger():
    retval = dict.fromkeys(ipsToPing, -1)
    threads = list()
    start = datetime.datetime.now()
    for ipToPing in ipsToPing:
        thread = threading.Thread(target=_threadedPingerAux, args=[ipToPing])
        thread.start()
        threads.append(thread)
    for thread in threads:
        thread.join()
    end = datetime.datetime.now()
    print ("Treaded ping of %s ips took: %s" % (len(ipsToPing), end-start))


if __name__ == "__main__":
    threadedPinger()
    nonThreadedPinger()

score 0 · Accepted Answer

如果一切都是线程安全的，您可以使用该threading模块：

import threading
starttime=datetime.now()
print "-"*10
code=threading.thread(target=extract_adapter_thread.extract,args=['TEST'])
code.daemon=True
code.start()
print "-"*10
print(datetime.now()-starttime)

python - Python 线程 - 我如何使用它同时运行多个任务？

3 回答 3

Related

Reference