0

我有一个清单

old_list = [
        (1, 'AAA', None, 1),
        (2, 'AAA', 'x', 0),
        (5, 'AAB', 'z', 1),
        (6, 'ABB', 'x', 1),
        (9, 'ABB', 'x', 1)]

我希望如何获得一个具有唯一 i[1] 和更大 id i[0] 的新列表,就像这个结果

new_list = [
        (2, 'AAA', 'x', 0),
        (5, 'AAB', 'z', 1),
        (9, 'ABB', 'x', 1)]
]

有人能帮我吗?

4

2 回答 2

3

您可以使用itertools.groupby

old_list = [
        (1, 'AAA', None, 1),
        (2, 'AAA', 'x', 0),
        (5, 'AAB', 'z', 1),
        (6, 'ABB', 'x', 1),
        (9, 'ABB', 'x', 1)]
from itertools import groupby
from operator import itemgetter
print [sorted(list(group), key=itemgetter(0))[-1]
       for key, group in groupby(old_list, key=itemgetter(1))]

输出

[(2, 'AAA', 'x', 0), (5, 'AAB', 'z', 1), (9, 'ABB', 'x', 1)]

如果old_list还没有排序,你可以像这样排序

old_list = sorted([
        (1, 'AAA', None, 1),
        (2, 'AAA', 'x', 0),
        (5, 'AAB', 'z', 1),
        (6, 'ABB', 'x', 1),
        (9, 'ABB', 'x', 1)], key=itemgetter(1))
于 2013-10-28T03:31:00.220 回答
0

你可以这样做:

d={}
for t in old_list:
    d.setdefault(t[1],[]).append(t)
    
new_list=[]    
for k in sorted(d):       # sort by the keys ('AAA', 'AAB', etc)
    new_list.append(max(d[k], key=lambda t: t[0]))    # max index (t[0])
# [(2, 'AAA', 'x', 0), (5, 'AAB', 'z', 1), (9, 'ABB', 'x', 1)]

编辑

如果没有重复 ID 的可能性,您可以执行以下操作:

d={}
for t in old_list:
    d.setdefault(t[1],[]).append(t)
    
new_list=[]    
for k in sorted(d):
    new_list.append(d[k][-1]) 

这与样本数据的答案相同。


编辑 2,计时

from collections import defaultdict

s = [('yellow', 1), ('blue', 2), ('yellow', 3), ('blue', 4), ('red', 1)]

def f1():
    d = defaultdict(list)
    for k, v in s:
        d[k].append(v)
        
    return d.items()
    
def f2():
    d={}
    for k, v in s:
        d.setdefault(k, []).append(v)
        
    return d.items()            
  
if __name__ == '__main__':
    import timeit
    import sys
    print(sys.version)
    print('defaultdict:', timeit.timeit("f1()", setup="from __main__ import f1, s"))
    print('setdefault:', timeit.timeit("f2()", setup="from __main__ import f2, s"))

印刷:

3.3.2 (default, Jul  6 2013, 10:40:18) 
[GCC 4.2.1 Compatible Apple LLVM 4.2 (clang-425.0.28)]
defaultdict: 2.384568103996571
setdefault: 1.6183147379779257

与 post 的时间安排表明,这是一种更快的方法:

from __future__ import print_function 
from collections import OrderedDict
import itertools
from operator import itemgetter

old_list = [
        (1, 'AAA', None, 1),
        (2, 'AAA', 'x', 0),
        (5, 'AAB', 'z', 1),
        (6, 'ABB', 'x', 1),
        (9, 'ABB', 'x', 1)]
        
def f1():
    d={}
    for t in old_list:
        d.setdefault(t[1],[]).append(t)

    new_list=[]    
    for k in sorted(d):
        new_list.append(d[k][-1]) 
    
    return new_list    
            
def f2():
    nl = sorted(old_list, key=itemgetter(2,1))
    return OrderedDict((elem[1], elem) 
             for elem in nl).values()    
            
def f3():
    nl=sorted(old_list, key=lambda x: x[1])
    return [sorted(list(group), key=lambda x:x[0], reverse=True)[0]
               for key, group in itertools.groupby(nl, key=lambda x:x[1])]                  
  
if __name__ == '__main__':
    import timeit
    import sys
    print(sys.version) 
    print('drewk:',timeit.timeit("f1()", setup="from __main__ import f1, old_list"))
    print('Abhijit:', timeit.timeit("f2()", setup="from __main__ import f2, old_list, OrderedDict, itemgetter"))
    print('thefourtheye:', timeit.timeit("f3()", setup="from __main__ import f3, old_list, itertools"))

印刷:

2.7.5 (default, Aug 25 2013, 00:04:04) 
[GCC 4.2.1 Compatible Apple LLVM 5.0 (clang-500.0.68)]
drewk: 3.30526208878
Abhijit: 20.5611379147
thefourtheye: 13.2195081711
于 2013-10-28T03:37:59.280 回答