我正在使用flair处理 NER 任务。我注意到有时天赋在处理一个句子后会引入空格。
示例:输入句子Herman Melvilles email is mobydick123@gmail.com ;-)
作为输出[PERSON_NAME] email is mobydick123 @ gmail.com ;-)
而不是[PERSON_NAME] email is mobydick123@gmail.com ;-)
.
我该如何解决?
from flair.data import Sentence
from flair.models import SequenceTagger
import re
tagger = SequenceTagger.load("flair/ner-english-ontonotes-large")
line = 'Herman Melvilles email is mobydick123@gmail.com ;-)'
sentence = Sentence(line)
tagger.predict(sentence)
ii = 0
sentence1 = sentence
if len(sentence.get_spans('ner')) > 0:
for entry in sentence.get_spans('ner'):
if 'PERSON' in str(entry):
person = re.findall('"([^"]*)"', str(entry))
sentence1 = str(sentence1).replace(str(person[0]), "[PERSON_NAME]")
# Return the output sequence
try:
sentence1 = re.findall('"([^"]*)"', sentence1)[0]
except:
sentence1 = line
else:
sentence1 = line
print(sentence1)