这不是特别漂亮,但似乎适用于我能想到的大多数情况。我相信这也可以整理很多,并且应该很容易区分大小写。
def freqs(list):
words = {}
for word in list:
words[word] = words.get(word, 0) + 1
return words
def added_and_removed(a, b):
af = freqs(a.split())
bf = freqs(b.split())
removed = []
added = []
for key in af:
num = bf.get(key)
if num == None:
if af[key] > 1:
words = [key]*af[key]
removed.extend(words)
else:
removed.append(key)
for key in bf:
num = af.get(key)
if num == None:
added.append(key)
elif num > 1:
words = [key]*(num-1)
removed.extend(words)
return added, removed
a = 'hello hello hello my name is Dave dave bar foo'
b = 'hello my guys is test easy rob dave beef foo'
added, removed = added_and_removed(a, b)
print added
print removed
给
['beef', 'rob', 'easy', 'test', 'guys']
['bar', 'name', 'Dave', 'hello', 'hello']