我正在尝试创建一个字符串子类,以便将Arpabet 符号表示为单个字符。这是我到目前为止得到的:
import Levenshtein
class ArpabetChar(str):
"""
Class that turn string into an Arpabet character.
http://www.speech.cs.cmu.edu/cgi-bin/cmudict
"""
def __init__(self, chars: list):
self._chars = chars
def __repr__(self):
return "".join(char for char in self._chars)
def __str__(self):
return "".join(char for char in self._chars)
def __eq__(self, other):
if self._chars == other._chars:
return True
else:
return False
def __len__(self):
return len(self._chars)
def __getitem__(self, item):
return self._chars[item]
def __iter__(self):
for char in self._chars:
yield ArpabetChar([char])
def __add__(self, other):
added_char = [char for char in self._chars]
for char in other._chars:
added_char.append(char)
return ArpabetChar(added_char)
char1 = ArpabetChar(["AH0"])
char2 = ArpabetChar(["AH1"])
char3 = ArpabetChar(["AE1"])
print("Indexing:", char1[0])
print(f"Length of {char1}: {len(char1)}")
print(f"Length of {char2}: {len(char2)}")
print(f"Levenshtein distance {char1} and {char2}:{Levenshtein.distance(char1, char2)}")
print(f"Levenshtein distance {char1} and {char3}:{Levenshtein.distance(char1, char3)}")
我对这两种计算的期望输出是 1 的 Levenshtein 距离。有什么提示或建议吗?