尝试以下实现
请注意,在我的实现中,我只对 2 对组合进行预排序和选择以减少迭代次数。这将确保密钥的大小始终小于或等于干草
>>> my_list =[
{'oranges':'big','apples':'green'},
{'oranges':'big','apples':'green','bananas':'fresh'},
{'oranges':'big','apples':'red'},
{'oranges':'big','apples':'green','bananas':'rotten'}
]
#Create a function remove_dup, name it anything you want
def remove_dup(lst):
#import combinations for itertools, mainly to avoid multiple nested loops
from itertools import combinations
#Create a generator function dup_gen, name it anything you want
def dup_gen(lst):
#Now read the dict pairs, remember key is always shorter than hay in length
for key, hay in combinations(lst, 2):
#if key is in hay then set(key) - set(hay) = empty set
if not set(key) - set(hay):
#and if key is in hay, yield it
yield key
#sort the list of dict based on lengths after converting to a item tuple pairs
#Handle duplicate elements, thanks to DSM for pointing out this boundary case
#remove_dup([{1:2}, {1:2}]) == []
lst = sorted(set(tuple(e.items()) for e in lst), key = len)
#Now recreate the dictionary from the set difference of
#the original list and the elements generated by dup_gen
#Elements generated by dup_gen are the duplicates that needs to be removed
return [dict(e) for e in set(lst) - set(dup_gen(lst))]
remove_dup(my_list)
[{'apples': 'green', 'oranges': 'big', 'bananas': 'fresh'}, {'apples': 'green', 'oranges': 'big', 'bananas': 'rotten'}, {'apples': 'red', 'oranges': 'big'}]
remove_dup([{1:2}, {1:2}])
[{1: 2}]
remove_dup([{1:2}])
[{1: 2}]
remove_dup([])
[]
remove_dup([{1:2}, {1:3}])
[{1: 2}, {1: 3}]
更快的实施
def remove_dup(lst):
#sort the list of dict based on lengths after converting to a item tuple pairs
#Handle duplicate elements, thanks to DSM for pointing out this boundary case
#remove_dup([{1:2}, {1:2}]) == []
lst = sorted(set(tuple(e.items()) for e in lst), key = len)
#Generate all the duplicates
dups = (key for key, hay in combinations(lst, 2) if not set(key).difference(hay))
#Now recreate the dictionary from the set difference of
#the original list and the duplicate elements
return [dict(e) for e in set(lst).difference(dups)]