3

Given

userplays = { "Alice"   : { "AC/DC" : 2,
                            "The Raconteurs" : 3,
                            "Mogwai" : 1
                          },
              "Bob"     : { "The XX" : 4,
                            "Lady Gaga" : 3,
                            "Mogwai" : 1,
                            "The Raconteurs" : 1
                          },
              "Charlie" : { "AC/DC" : 7,
                            "Lady Gaga" : 7
                          }
            }

get a list of all bands:

['Lady Gaga', 'Mogwai', 'AC/DC', 'The Raconteurs', 'The XX']

I can do

list(set(flatten([ [ band 
                     for band 
                     in playcounts.keys() ] 
                   for playcounts 
                   in userplays.values() ] ) ) )

where flatten is from Flatten (an irregular) list of lists, but is it possible without flatten, using only list/dict comprehensions?

4

2 回答 2

8

这将做到:

set(b for v in userplays.values() for b in v.keys())

产生:

set(['Lady Gaga', 'Mogwai', 'AC/DC', 'The Raconteurs', 'The XX'])
于 2012-06-23T00:33:40.627 回答
5

另一种方法是使用字典理解(Python 2.7+):

{k:v for v in userplays.values() for k in v.keys()}.keys()

产生:

['Lady Gaga', 'Mogwai', 'AC/DC', 'The Raconteurs', 'The XX']

至少在 Python 3.3 中,这也更快:

import timeit

userplays = { "Alice"   : { "AC/DC" : 2,
                            "The Raconteurs" : 3,
                            "Mogwai" : 1
                          },
              "Bob"     : { "The XX" : 4,
                            "Lady Gaga" : 3,
                            "Mogwai" : 1,
                            "The Raconteurs" : 1
                          },
              "Charlie" : { "AC/DC" : 7,
                            "Lady Gaga" : 7
                          }
            }


def f1():
    set(b for v in userplays.values() for b in v.keys())

def f2():
    {k:v for v in userplays.values() for k in v.keys()}.keys()    

t1=timeit.Timer(f1).timeit(10000)
t2=timeit.Timer(f2).timeit(10000)
faster=abs(t1-t2) / max(t1,t2)
print("""
set:                {:.4} seconds
dict:               {:.4} seconds
faster of those is  {:.4%} faster

""".format(t1,t2,faster))

输出:

set:                0.02448 seconds
dict:               0.01988 seconds
faster of those is  18.7907% faster

编辑

纯粹出于好奇,我比较了可以在单线中完成的各种方式。

结果如下:

f1: set from a generator expression
f2: keys from a dict comprehension
f3: set comprehension
f4: set from a list comprehension

       rate/s      f4       f1       f2       f3 
f4    358,650    0.0%   -13.4%   -31.7%   -41.3% 
f1    414,246   15.5%     0.0%   -21.1%   -32.2% 
f2    525,230   46.4%    26.8%     0.0%   -14.1% 
f3    611,158   70.4%    47.5%    16.4%     0.0% 

您可以看到集合理解最快,其次是 dict 理解。

下面是生成 Perl 风格基准的代码:

import timeit
import locale
locale.setlocale(locale.LC_NUMERIC, "")

userplays = { "Alice"   : { "AC/DC" : 2,
                            "The Raconteurs" : 3,
                            "Mogwai" : 1
                          },
              "Bob"     : { "The XX" : 4,
                            "Lady Gaga" : 3,
                            "Mogwai" : 1,
                            "The Raconteurs" : 1
                          },
              "Charlie" : { "AC/DC" : 7,
                            "Lady Gaga" : 7
                          }
            }

def f1():
    """set from a generator expression"""
    set(b for v in userplays.values() for b in v.keys())

def f2():
    """keys from a dict comprehension"""
    {k:v for v in userplays.values() for k in v.keys()}.keys()    

def f3():
    """set comprehension"""
    {b for v in userplays.values() for b in v.keys()}

def f4():
    """set from a list comprehension"""
    set([b for v in userplays.values() for b in v.keys()])

def test_table(funcs, c):
    results={k.__name__:timeit.Timer(k).timeit(c) for k in funcs}
    fastest=sorted(results,key=results.get, reverse=True)
    table=[]
    table.append([' ','rate/s']+fastest)
    for e in fastest:
        temp=[]
        temp.append(e)
        temp.append(int(round(float(c)/results[e])))
        t2=['{:.1%}'.format((results[x]-results[e])/results[e]) for x in fastest]
        table.append(temp+t2)
    print()    
    for e in funcs:
        print('{}: {}'.format(e.__name__, e.__doc__))
    print()            
    pprint_table(table)    

def format_num(num):
    """Format a number according to given places.
    Adds commas, etc. Will truncate floats into ints!"""

    try:
        inum = int(num)
        return locale.format("%.*f", (0, inum), True)

    except (ValueError, TypeError):
        return str(num)

def get_max_width(table, index):
    """Get the maximum width of the given column index"""
    return max([len(format_num(row[index])) for row in table])        

def pprint_table(table):
    col_paddings = []
    for i in range(len(table[0])):
        col_paddings.append(get_max_width(table, i))

    for row in table:
        # left col
        print(row[0].ljust(col_paddings[0] + 1),end=' ')
        # rest of the cols
        for i in range(1, len(row)):
            col = format_num(row[i]).rjust(col_paddings[i] + 2)
            print (col,end=' ')
        print()

test_table([f1,f2,f3,f4],100000) 
于 2012-06-23T01:04:59.490 回答