性能差异主要是由OrderedDict
.
OrderedDict
使用dict
's get
and __getitem__
,但重新定义了自己的__iter__
and iteritems
。
def __iter__(self):
'od.__iter__() iter(od)'
# Traverse the linked list in order.
root = self.__root
curr = root[1] # start at the first node
while curr is not root:
yield curr[2] # yield the curr[KEY]
curr = curr[1] # move to next node
def iteritems(self):
'od.iteritems -> an iterator over the (key, value) pairs in od'
for k in self:
yield (k, self[k])
看看我们发现了什么:self[k]
。
您的第二个解决方案没有帮助我们避免 hash-bin-key 计算。虽然由 生成的迭代器dict
,更准确地说,items.iteritems().next()
如果items
是 a dict
,则不会进行该计算。
而且,iteritems
也比较贵。
from timeit import Timer
N = 1000
d = {i:i for i in range(10000)}
def f1():
for k in d: pass
def f2():
for k in d.iterkeys(): pass
def f3():
for v in d.itervalues(): pass
def f4():
for t in d.iteritems(): pass
print(Timer(stmt='f1()', setup='from __main__ import f1').timeit(number=N))
print(Timer(stmt='f2()', setup='from __main__ import f2').timeit(number=N))
print(Timer(stmt='f3()', setup='from __main__ import f3').timeit(number=N))
print(Timer(stmt='f4()', setup='from __main__ import f4').timeit(number=N))
输出
0.256800375467
0.265079360645
0.260599391822
0.492333103788
与iterkeys
'dictiter_iternextkey
和itervalues
'相比dictiter_iternextvalue
,iteritems
'dictiter_iternextitem
有额外的部分。
if (result->ob_refcnt == 1) {
Py_INCREF(result);
Py_DECREF(PyTuple_GET_ITEM(result, 0));
Py_DECREF(PyTuple_GET_ITEM(result, 1));
} else {
result = PyTuple_New(2);
if (result == NULL)
return NULL;
}
di->len--;
key = ep[i].me_key;
value = ep[i].me_value;
Py_INCREF(key);
Py_INCREF(value);
PyTuple_SET_ITEM(result, 0, key);
PyTuple_SET_ITEM(result, 1, value);
我认为创建元组可能会降低性能。
Python 确实倾向于重用元组。
tupleobject.c
节目
/* Speed optimization to avoid frequent malloc/free of small tuples */
#ifndef PyTuple_MAXSAVESIZE
#define PyTuple_MAXSAVESIZE 20 /* Largest tuple to save on free list */
#endif
#ifndef PyTuple_MAXFREELIST
#define PyTuple_MAXFREELIST 2000 /* Maximum number of tuples of each size to save */
#endif
这种优化只是意味着 Python 不会从头开始构建一些元组。但是还有很多工作要做。
案例:字典
如果OrderedDict
替换为dict
,我认为第二种解决方案总体上要好一些。
Python 字典是使用哈希表实现的。所以查找速度很快。查找的平均时间复杂度是 O(1),而最差的是 O(n) 1。第一个解决方案的平均时间复杂度与第二个解决方案的时间复杂度相同。它们都是 O(n)。因此,第二种解决方案没有任何优势,有时甚至更慢,尤其是在输入数据较小的情况下。在这种情况下,造成的额外费用iteritems
无法得到补偿。
from collections import OrderedDict
from operator import itemgetter
from timeit import Timer
from random import randint, random
N = 100000
xs = [('a', 10), ('b', 9), ('c', 4), ('d', 7), ('e', 3), ('f', 0), ('g', -5), ('h', 9)]
od = OrderedDict(xs)
d = dict(xs)
def f1od_min():
return min(od, key=od.get)
def f2od_min():
return min(od.iteritems(), key=itemgetter(1))[0]
def f1d_min():
return min(d, key=d.get)
def f2d_min():
return min(d.iteritems(), key=itemgetter(1))[0]
def f1od():
for k in od: pass
def f2od():
for t in od.iteritems(): pass
def f1d():
for k in d: pass
def f2d():
for t in d.iteritems(): pass
print 'min'
print(Timer(stmt='f1od_min()', setup='from __main__ import f1od_min').timeit(number=N))
print(Timer(stmt='f2od_min()', setup='from __main__ import f2od_min').timeit(number=N))
print(Timer(stmt='f1d_min()', setup='from __main__ import f1d_min').timeit(number=N))
print(Timer(stmt='f2d_min()', setup='from __main__ import f2d_min').timeit(number=N))
print
print 'traverse'
print(Timer(stmt='f1od()', setup='from __main__ import f1od').timeit(number=N))
print(Timer(stmt='f2od()', setup='from __main__ import f2od').timeit(number=N))
print(Timer(stmt='f1d()', setup='from __main__ import f1d').timeit(number=N))
print(Timer(stmt='f2d()', setup='from __main__ import f2d').timeit(number=N))
输出
min
0.398274431527
0.813040903243
0.185168156847
0.249574387248 <-- dict/the second solution
traverse
0.251634216081
0.642283865687
0.0565099754298
0.0958057518483
然后替换N
并由xs
N = 50
xs = [(x, randint(1, 100)) for x in range(100000)]
输出
min
1.5148923257
3.47020082161
0.712828585756
0.70823812803 <-- dict/the second solution
traverse
0.975989336634
2.92283956481
0.127676073356
0.253622387762
现在替换N
和xs
N = 10
xs = [(random(), random()) for x in range(1000000)]
输出
min
6.23311265817
10.702984667
4.32852708934
2.87853889251 <-- dict/the second solution
traverse
2.06231783648
9.49360449443
1.33297618831
1.73723008092
最后,第二种解决方案开始大放异彩。
第一个解决方案的最坏情况:哈希冲突
让
N = 10000
xs = [(2 ** (32 + x) - 2 ** x + 1, 1) for x in range(100)]
# hash(2 ** (32 + x) - 2 ** x + 1) is always 1
输出
min
2.44175265292 <-- lookup is slow
2.76424538594 <-- lookup is slow
2.26508627493 <-- lookup is slow
0.199363955475
traverse
0.200654482623
2.59635966303 <-- lookup is slow
0.0454684184722
0.0733798569371
1为 dict 对象列出的平均案例时间假定对象的散列函数足够稳健以使冲突不常见。平均情况假设参数中使用的键是从所有键的集合中均匀随机选择的。请参阅时间复杂度。