我有一个使用马尔可夫链生成句子的 Python 代码,但对于代码工作,我必须定义 2 个起始词,但我希望第一个词是随机选择的。
这是代码:
import random
def getLines(filename):
return [line[0:-1] for line in open(filename).readlines()]
def getWords(lines):
words = []
for line in lines:
words.extend(line.split())
return words
def createProbabilityHash(words):
numWords = len(words)
wordCount = {}
for word in words:
if wordCount.has_key(word):
wordCount[word] += 1
else:
wordCount[word] = 1
for word in wordCount.keys():
wordCount[word] /= 1.0 * numWords
return wordCount
def getRandomWord(wordCount):
randomValue = random.random()
cumulative = 0.0
choosenWord = ""
print wordCount
for word in wordCount:
probability = wordCount[word]
if probability > cumulative:
cumulative = probability
choosenWord = word
return choosenWord
words = getWords(getLines("frases.txt"))
wordMap = {}
previous = (words[0], words[1])
for word in words[2:]:
if wordMap.has_key(previous):
wordMap[previous].append(word)
else:
wordMap[previous] = [word]
previous = (previous[1], word)
for word in wordMap.keys():
probabilityHash = createProbabilityHash(wordMap[word])
wordMap[word] = probabilityHash
palavras = ['hello', 'apple', 'something', 'yeah', 'nope', 'lalala']
previous = (".", "A") #Starting words
numWords = 10 # The number of words to print
print previous[0], previous[1],
for i in range(numWords):
word = getRandomWord(wordMap[previous])
print word,
if word.endswith(","):
print "\n"
if word.endswith("."):
break
previous = (previous[1], word)