我编写了这个程序来对文本文档中的单词进行分类和枚举。如果我不必用string.translate()
... 挑出每一个可能的标点符号,代码会非常简洁。是否有可能,而不是排除特定情况,只允许字母(也许是数字)字符?
from sys import argv
script_, filename = argv
bang = open(filename, 'r+')
words = bang.read()
words = words.translate(None, ',')
words = words.translate(None, '"')
words = words.translate(None, '.')
words = words.translate(None, '...')
words = words.translate(None, '?')
words = words.translate(None, '!')
words = words.translate(None, ';')
words = words.translate(None, '-')
words = words.translate(None, '\'')
words = words.translate(None, '.\'')
words = words.translate(None, '(')
words = words.translate(None, ')')
words = words.translate(None, ':')
words = str(words)
words = words.lower()
liste = words.split()
sorte = sorted(liste)
i = 0
f = 'nullooosdfgkjlkjasdihaiwuehlfkj898'
z = 1
w = 0
for wordss in sorte:
if f == wordss:
z += 1
w += 1
elif f != wordss:
w += 1
print "-", z
z = 1
i += 1
print "%d. %s" % (i, wordss),
f = wordss
print "\n\n word count - %d\n" % w