我喜欢对这些东西使用zip和map:
from collections import Counter
# for test, import random:
import random
# define class
class User(object):
def __init__(self, a, b):
self.a = a # Always a bool
self.b = b # Always a bool
# create an arbitrary set
users = [ User( r % 2 == 0, r % 3 == 0 ) for r in (random.randint(0,100) for x in xrange(100)) ]
# and... count
aCounter, bCounter = map(Counter, zip(*((u.a, u.b) for u in users)))
更新:
map(sum, zip(*tuples))
在较小的样本量上比 for 循环略快,但对于较大的样本量,for 循环的扩展性要好得多。. for 循环不会像其他循环那样从处理元组列表中获得太多的性能提升方法。可能是因为它已经非常理想了。
collections.Counter
还是很慢。
import random
import itertools
import time
from collections import Counter
# define class
class User(object):
def __init__(self, a, b):
self.a = a # Always a bool
self.b = b # Always a bool
# create an arbitrary sample
users = [ User( r % 2 == 0, r % 3 == 0 ) for r in (random.randint(0,100) for x in xrange(100)) ]
# create a list of tuples of the arbitrary sample
users2 = [ ( u.a,u.b) for u in users ]
# useful function-timer decorator
def timer(times=1):
def outer(fn):
def wrapper(*args, **kwargs):
t0 = time.time()
for n in xrange(times):
r = fn(*args, **kwargs)
dt = time.time() - t0
print '{} ran {} times in {} seconds with {:f} ops/sec'.format(fn.__name__, times, dt, times/dt)
return r
return wrapper
return outer
# now create the timeable functions
n=10000
@timer(times=n)
def time_sum():
return map(sum, zip(*((u.a, u.b) for u in users)))
@timer(times=n)
def time_counter():
return map(Counter, zip(*((u.a, u.b) for u in users)))
@timer(times=n)
def time_for():
a,b=0,0
for u in users:
if u.a is True:
a += 1
if u.b is True:
b += 1
return a,b
@timer(times=n)
def time_itermapzip():
return list(itertools.imap(sum, itertools.izip(*((u.a, u.b) for u in users))))
@timer(times=n)
def time_sum2():
return map(sum, zip(*users2))
@timer(times=n)
def time_counter2():
return map(Counter, zip(*users2))
@timer(times=n)
def time_for2():
a,b=0,0
for _a,_b in users2:
if _a is True:
a += 1
if _b is True:
b += 1
return a,b
@timer(times=n)
def time_itermapzip2():
return list(itertools.imap(sum, itertools.izip(*users2)))
v = time_sum()
v = time_counter()
v = time_for()
v = time_itermapzip()
v = time_sum2()
v= time_counter2()
v = time_for2()
v = time_itermapzip2()
# time_sum ran 10000 times in 0.446894168854 seconds with 22376.662523 ops/sec
# time_counter ran 10000 times in 1.29836297035 seconds with 7702.006471 ops/sec
# time_for ran 10000 times in 0.267076015472 seconds with 37442.523554 ops/sec
# time_itermapzip ran 10000 times in 0.459508895874 seconds with 21762.364319 ops/sec
# time_sum2 ran 10000 times in 0.174293994904 seconds with 57374.323226 ops/sec
# time_counter2 ran 10000 times in 0.989939928055 seconds with 10101.623055 ops/sec
# time_for2 ran 10000 times in 0.183295965195 seconds with 54556.574605 ops/sec
# time_itermapzip2 ran 10000 times in 0.193426847458 seconds with 51699.131384 ops/sec
print "True a's: {}\t False a's: {}\nTrue b's: {}\t False b's:{}".format(v[0], len(users)-v[0], v[1], len(users)-v[1])
# True a's: 53 False a's: 47
# True b's: 31 False b's:69
v
# [53, 31]
样本大小为 1000 的相同代码:
# time_sum ran 10000 times in 9.30428719521 seconds with 1074.773359 ops/sec
# time_counter ran 10000 times in 16.7009849548 seconds with 598.767080 ops/sec
# time_for ran 10000 times in 2.61371207237 seconds with 3825.976130 ops/sec
# time_itermapzip ran 10000 times in 9.40824103355 seconds with 1062.897939 ops/sec
# time_sum2 ran 10000 times in 5.70988488197 seconds with 1751.348794 ops/sec
# time_counter2 ran 10000 times in 13.4643371105 seconds with 742.702735 ops/sec
# time_for2 ran 10000 times in 2.49017906189 seconds with 4015.775473 ops/sec
# time_itermapzip2 ran 10000 times in 6.10926699638 seconds with 1636.857581 ops/sec