当我听说多线程编程时,我想到了加速我的程序的机会,但不是吗?
import eventlet
from eventlet.green import socket
from iptools import IpRangeList
class Scanner(object):
def __init__(self, ip_range, port_range, workers_num):
self.workers_num = workers_num or 1000
self.ip_range = self._get_ip_range(ip_range)
self.port_range = self._get_port_range(port_range)
self.scaned_range = self._get_scaned_range()
def _get_ip_range(self, ip_range):
return [ip for ip in IpRangeList(ip_range)]
def _get_port_range(self, port_range):
return [r for r in range(*port_range)]
def _get_scaned_range(self):
for ip in self.ip_range:
for port in self.port_range:
yield (ip, port)
def scan(self, address):
try:
return bool(socket.create_connection(address))
except:
return False
def run(self):
pool = eventlet.GreenPool(self.workers_num)
for status in pool.imap(self.scan, self.scaned_range):
if status:
yield True
def run_std(self):
for status in map(self.scan, self.scaned_range):
if status:
yield True
if __name__ == '__main__':
s = Scanner(('127.0.0.1'), (1, 65000), 100000)
import time
now = time.time()
open_ports = [i for i in s.run()]
print 'Eventlet time: %s (sec) open: %s' % (now - time.time(),
len(open_ports))
del s
s = Scanner(('127.0.0.1'), (1, 65000), 100000)
now = time.time()
open_ports = [i for i in s.run()]
print 'CPython time: %s (sec) open: %s' % (now - time.time(),
len(open_ports))
和结果:
Eventlet time: -4.40343403816 (sec) open: 2
CPython time: -4.48356699944 (sec) open: 2
我的问题是,如果我不是在我的笔记本电脑上而是在服务器上运行这段代码并设置更多的工人价值,它会比 CPython 的版本运行得更快吗?线程的优点是什么?
添加: 所以我使用原始 cpython 的线程重写应用程序
import socket
from threading import Thread
from Queue import Queue
from iptools import IpRangeList
class Scanner(object):
def __init__(self, ip_range, port_range, workers_num):
self.workers_num = workers_num or 1000
self.ip_range = self._get_ip_range(ip_range)
self.port_range = self._get_port_range(port_range)
self.scaned_range = [i for i in self._get_scaned_range()]
def _get_ip_range(self, ip_range):
return [ip for ip in IpRangeList(ip_range)]
def _get_port_range(self, port_range):
return [r for r in range(*port_range)]
def _get_scaned_range(self):
for ip in self.ip_range:
for port in self.port_range:
yield (ip, port)
def scan(self, q):
while True:
try:
r = bool(socket.create_conection(q.get()))
except Exception:
r = False
q.task_done()
def run(self):
queue = Queue()
for address in self.scaned_range:
queue.put(address)
for i in range(self.workers_num):
worker = Thread(target=self.scan,args=(queue,))
worker.setDaemon(True)
worker.start()
queue.join()
if __name__ == '__main__':
s = Scanner(('127.0.0.1'), (1, 65000), 5)
import time
now = time.time()
s.run()
print time.time() - now
结果是
Cpython's thread: 1.4 sec
我认为这是一个非常好的结果。我以标准的 nmap 扫描时间为标准:
$ nmap 127.0.0.1 -p1-65000
Starting Nmap 5.21 ( http://nmap.org ) at 2012-10-22 18:43 MSK
Nmap scan report for localhost (127.0.0.1)
Host is up (0.00021s latency).
Not shown: 64986 closed ports
PORT STATE SERVICE
53/tcp open domain
80/tcp open http
443/tcp open https
631/tcp open ipp
3306/tcp open mysql
6379/tcp open unknown
8000/tcp open http-alt
8020/tcp open unknown
8888/tcp open sun-answerbook
9980/tcp open unknown
27017/tcp open unknown
27634/tcp open unknown
28017/tcp open unknown
39900/tcp open unknown
Nmap done: 1 IP address (1 host up) scanned in 0.85 seconds
我现在的问题是:据我所知,Eventlet 中的线程是如何实现的,这不是线程,而是 Eventlet 的特别之处,为什么它们不能加速任务?
Eventlet 被 OpenStack 等许多主要项目使用。但为什么呢?只是以异步方式或其他方式对数据库进行繁重的查询?