一些融化的时间和稍微修改的链答案:
import random
import timeit
from itertools import chain
import pandas as pd
print(pd.__version__)
dict_size = 1000000
randoms = [random.randint(0, 100) for __ in range(10000)]
max_list_size = 1000
d = {k: random.sample(randoms, random.randint(1, max_list_size)) for k in
range(dict_size)}
def chain_():
keys, values = map(chain.from_iterable,
zip(*(([k] * len(v), v) for k, v in d.items())))
pd.DataFrame({'letter': list(keys), 'value': list(values)})
def melt_():
pd.DataFrame.from_dict(d, orient='index'
).rename_axis('letter').reset_index(
).melt(id_vars=['letter'], value_name='value'
).drop('variable', axis=1).dropna()
setup ="""from __main__ import chain_, melt_"""
repeat = 3
numbers = 10
def timer(statement, _setup=''):
print(min(
timeit.Timer(statement, setup=_setup or setup).repeat(repeat, numbers)))
print('timing')
timer('chain_()')
timer('melt_()')
似乎 max_list_size 100 的熔化速度更快:
1.0.3
timing
246.71311019999996
204.33705529999997
max_list_size 1000 更慢:
2675.8446872
4565.838648400002
可能是因为分配内存的 df 比需要的大得多
链式答案的变体:
def chain_2():
keys, values = map(chain.from_iterable,
zip(*((itertools.repeat(k, len(v)), v) for k, v in d.items())))
pd.DataFrame({'letter': list(keys), 'value': list(values)})
似乎没有更快
(蟒蛇3.7.6)