这是一个完整的脚本,它使用正则表达式完全满足您的要求:
from collections import defaultdict
import re
myarch = 'C:/code/test5.txt' #this is your archive
mydict = defaultdict(int)
with open(myarch) as f:
for line in f:
codes = re.findall("\'(\S*)\'", line)
for key in codes:
mydict[key] +=1
out = []
for key, value in mydict.iteritems():
if value > 1:
text = "['%s'], %s" % (key, value)
out.append(text)
#save to a file
with open('C:/code/fileout.txt', 'w') as fo:
fo.write('\n'.join(out))
这可以简化为:
from collections import defaultdict
import re
myarch = 'C:/code/test5.txt'
mydict = defaultdict(int)
with open(myarch) as f:
for line in f:
for key in re.findall("\'(\S*)\'", line):
mydict[key] +=1
out = ["['%s'], %s" % (key, value) for key, value in mydict.iteritems() if value > 1]
#save to a file
with open('C:/code/fileout.txt', 'w') as fo:
fo.write('\n'.join(out))