我需要创建一个从文本生成列表的函数:
text = '^to[by, from] all ^appearances[appearance]'
list = ['to all appearances', 'to all appearance', 'by all appearances',
'by all appearance', 'from all appearances', 'from all appearance']
也就是说,括号内的值应该替换前面的单词,它紧跟在 ^ 之后。我希望函数有五个参数,如下所示...
我的代码(它不起作用)
def addSubstitution(buf, substitutions, val1='[', val2=']', dsym=',', start_p="^"):
for i in range(1, len(buf), 2):
buff = []
buff.extend(buf)
if re.search('''[^{2}]+[{0}][^{1}{0}]+?[{1}]'''.format(val1, val2, start_p, buff[i]):
substrs = re.split('['+val1+']'+'|'+'['+val2+']'+'|'+dsym, buff[i])
for substr in substrs:
if substr:
buff[i] = substr
addSubstitution(buff, substitutions, val1, val2, dsym, start_p)
return
substitutions.add(''.join(buf))
pass
def getSubstitution(text, val1='[', val2=']', dsym=',', start_p="^"):
pattern = '''[^{2}]+[{0}][^{1}{0}]+?[{1}]'''.format(val1, val2, start_p)
texts = re.split(pattern,text)
opttexts = re.findall(pattern,text)
buff = []
p = iter(texts)
t = iter(opttexts)
buf = []
while True:
try:
buf.append(next(p))
buf.append(next(t))
except StopIteration:
break
substitutions = set()
addSubstitution(buf, substitutions, val1, val2, dsym, start_p)
substitutions = list(substitutions)
substitutions.sort(key=len)
return substitutions