忽略字符串中的大小写、标点符号和空格的最有效方法是什么?这些字符串应该被分成单词而不是字符应该忽略前面提到的比较细节,并且这些单词字符串的切片应该尽可能高效并考虑到速度。
我打算在下面的代码中使用不区分大小写和标点符号的字符串,但是在看到评估需要多长时间之后class Slice: def __eq__(self, other): return self.root == other.root
,我决定data = tuple(string.split())
改用它。对于已经在下面的代码中表达的计算量大的算法来说,让字符串对大小写、标点符号和间距不敏感,并且对单词而不是字符起作用,这太昂贵了。
class Slice:
def __init__(self, data, offset, length):
self.prefix = data[:offset]
self.root = data[offset:offset+length]
self.suffix = data[offset+length:]
def __eq__(self, other):
return self.root == other.root
def __len__(self):
return len(self.root)
################################################################################
class Match:
def __init__(self, data, key, prefix_tree, suffix_tree):
self.data = data
self.key = key
self.prefix_tree = prefix_tree
self.suffix_tree = suffix_tree
self.__value = len(key) + prefix_tree.value() + suffix_tree.value()
def value(self):
return self.__value
################################################################################
class Tree(tuple):
def __new__(cls, nodes):
tree = super().__new__(cls, nodes)
tree.__value = max(map(Match.value, tree)) if tree else 0
return tree
def value(self):
return self.__value
def find(self, value):
for index, match in enumerate(self):
if match.value() == value:
return index
raise ValueError()
################################################################################
def search(data, key):
length = 0
nodes = []
for d_block in shrink(data, len(key)):
block_len = len(d_block)
if length > block_len:
return Tree(nodes)
for k_block in slide(key, block_len):
if d_block == k_block:
length = block_len
prefix_tree = search(d_block.prefix, k_block.prefix)
suffix_tree = search(d_block.suffix, k_block.suffix)
match = Match(d_block, k_block, prefix_tree, suffix_tree)
nodes.append(match)
return Tree(nodes)
def shrink(data, max_len):
for length in range(min(len(data), max_len), 0, -1):
for block in slide(data, length):
yield block
def slide(data, length):
for offset in range(len(data) - length + 1):
yield Slice(data, offset, length)
################################################################################
def build_tree(nodes):
match = nodes[nodes.find(nodes.value())]
node = match.key
if match.prefix_tree:
node.prefix = build_tree(match.prefix_tree)
if match.suffix_tree:
node.suffix = build_tree(match.suffix_tree)
return node
def flatten_tree(node):
array = [0]
_flatten(node, array)
return tuple(array)
def _flatten(node, array):
if isinstance(node.prefix, Slice):
_flatten(node.prefix, array)
else:
array.append(node.prefix)
array[0] += 1
array.append((array[0], node.root))
if isinstance(node.suffix, Slice):
_flatten(node.suffix, array)
else:
array.append(node.suffix)