我得到了一些非常令人惊讶的结果,这似乎表明将迭代器包装在列表中并获得它的长度比使用 lambda 遍历它更有效。这怎么可能?直觉会建议分配所有这些列表会更慢。
是的 - 我知道你不能总是这样做,因为迭代器可以是无限的。:)
from itertools import groupby
from timeit import Timer
data = "abbbccacabbbccacabbbccacabbbccacabbbccacabbbccacabbbccacabbbccacabbbccacabbbccacabbbccacabbbccacabbbccacabbbccacabbbccacabbbccac"
def rle_walk(gen):
ilen = lambda gen : sum(1 for x in gen)
return [(ch, ilen(ich)) for ch,ich in groupby(data)]
def rle_list(data):
return [(k, len(list(g))) for k,g in groupby(data)]
# randomy data
t = Timer('rle_walk("abbbccacabbbccacabbbccacabbbccacabbbccacabbbccacabbbccacabbbccacabbbccacabbbccacabbbccacabbbccacabbbccacabbbccacabbbccacabbbccac")', "from __main__ import rle_walk; gc.enable()")
print t.timeit(1000)
t = Timer('rle_list("abbbccacabbbccacabbbccacabbbccacabbbccacabbbccacabbbccacabbbccacabbbccacabbbccacabbbccacabbbccacabbbccacabbbccacabbbccacabbbccac")', "from __main__ import rle_list; gc.enable()")
print t.timeit(1000)
# chunky blocks
t = Timer('rle_walk("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccc")', "from __main__ import rle_walk; gc.enable()")
print t.timeit(1000)
t = Timer('rle_list("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccc")', "from __main__ import rle_list; gc.enable()")
print t.timeit(1000)
1.42423391342
0.145968914032
1.41816806793
0.0165541172028