这是我的功能的代码:
def calcVowelProportion(wordList):
"""
Calculates the proportion of vowels in each word in wordList.
"""
VOWELS = 'aeiou'
ratios = []
for word in wordList:
numVowels = 0
for char in word:
if char in VOWELS:
numVowels += 1
ratios.append(numVowels/float(len(word)))
现在,我正在处理一个超过 87,000 个单词的列表,这个算法显然非常慢。
有一个更好的方法吗?
编辑:
我测试了以下类提供的算法@ExP:
import time
class vowelProportions(object):
"""
A series of methods that all calculate the vowel/word length ratio
in a list of words.
"""
WORDLIST_FILENAME = "words_short.txt"
def __init__(self):
self.wordList = self.buildWordList()
print "Original: " + str(self.calcMeanTime(10000, self.cvpOriginal, self.wordList))
print "Generator: " + str(self.calcMeanTime(10000, self.cvpGenerator, self.wordList))
print "Count: " + str(self.calcMeanTime(10000, self.cvpCount, self.wordList))
print "Translate: " + str(self.calcMeanTime(10000, self.cvpTranslate, self.wordList))
def buildWordList(self):
inFile = open(self.WORDLIST_FILENAME, 'r', 0)
wordList = []
for line in inFile:
wordList.append(line.strip().lower())
return wordList
def cvpOriginal(self, wordList):
""" My original, slow algorithm"""
VOWELS = 'aeiou'
ratios = []
for word in wordList:
numVowels = 0
for char in word:
if char in VOWELS:
numVowels += 1
ratios.append(numVowels/float(len(word)))
return ratios
def cvpGenerator(self, wordList):
""" Using a generator expression """
return [sum(char in 'aeiou' for char in word)/float(len(word)) for word in wordList]
def cvpCount(self, wordList):
""" Using str.count() """
return [sum(word.count(char) for char in 'aeiou')/float(len(word)) for word in wordList]
def cvpTranslate(self, wordList):
""" Using str.translate() """
return [len(word.translate(None, 'bcdfghjklmnpqrstxyz'))/float(len(word)) for word in wordList]
def timeFunc(self, func, *args):
start = time.clock()
func(*args)
return time.clock() - start
def calcMeanTime(self, numTrials, func, *args):
times = [self.timeFunc(func, *args) for x in range(numTrials)]
return sum(times)/len(times)
输出是(对于 200 个单词的列表):
Original: 0.0005613667
Generator: 0.0008402738
Count: 0.0012531976
Translate: 0.0003343548
令人惊讶的是,Generator 和 Count 比原来的还要慢(如果我的实现不正确,请告诉我)。
我想测试@John 的解决方案,但对树木一无所知。