我使用以下程序编写了一个将英语句子转换为马拉雅拉姆语的程序
# coding=utf-8
import re
import Eng_Manglish
import linecache
import nltk
line =" "
mal_sent=" "
out_file =open("Mal_sent",'w')
input_file = open("frequently_used_eng_sent",'r')
for line in input_file:
mal_sent=" "
convert_indicate =[]
print "*************************"
line = line.replace('.'," ")
line =line.replace('[0-9]',' ')
line =re.sub('[0-9].[\t]*',' ',line)
line = line.strip()
print line
line = line.lower()
text=nltk.word_tokenize(line)
data = nltk.pos_tag(text)
print data
words = line.split()
print words
for word in words:
print word[0]
dict_file = open("Dictionary/Eng-Mal-Dict/"+word[0], "r")
for dicline in dict_file:
flag=0
#print "*********************************"
dicline = dicline.strip()
dicline = dicline.split(':')
#print dicline[0]
if dicline[0] == word:#fing code for exact match
print dicline
print "found"
convert_indicate.append(1)
flag =1
print dicline[2]
mal_sent=mal_sent+dicline[2]+" "
print mal_sent.encode('utf-8')
break
if flag==0:
convert_indicate.append(0)
#transform word to manglish if not converted
mal_sent=mal_sent+Eng_Manglish.transform(word)+" "
dict_file.close()
print convert_indicate
print mal_sent
out_file.write(mal_sent)
与表格的字典内容
abbey:n:സന്യാസി മഠം
执行此程序后,我在行mal_sent=mal_sent+dicline[2]+" " as UnicodeDecodeError: 'ascii' codec can't decode byte 0xe0 in position 0: ordinal not in range(128) 中遇到错误
你能帮我解决这个错误吗?