这是我的代码:
def _parse(self, text):
"""
This is the core interaction with the parser.
It returns a Python data-structure, while the parse()
function returns a JSON object
"""
# CoreNLP interactive shell cannot recognize newline
if '\n' in text or '\r' in text:
to_send = re.sub("[\r\n]", " ", text).strip()
else:
to_send = text
self.corenlp.sendline(to_send)
max_expected_time = max(300.0, len(to_send) / 3.0)
# repeated_input = self.corenlp.except("\n") # confirm it
t = self.corenlp.expect(["\nNLP> ", pexpect.TIMEOUT, pexpect.EOF,
"\nWARNING: Parsing of sentence failed, possibly because of out of memory."],
timeout=max_expected_time)
incoming = self.corenlp.before
lag = incoming.split(b"\r\n")
incoming = b"\r\n".join(lag).decode('latin-1').encode('utf-8')
if t == 1:
# TIMEOUT, clean up anything left in buffer
print >>sys.stderr, {'error': "timed out after %f seconds" % max_expected_time,
'input': to_send,
'output': incoming}
raise TimeoutError("Timed out after %d seconds" % max_expected_time)
elif t == 2:
# EOF, probably crash CoreNLP process
print >>sys.stderr, {'error': "CoreNLP terminates abnormally while parsing",
'input': to_send,
'output': incoming}
raise ProcessError("CoreNLP process terminates abnormally while parsing")
elif t == 3:
# out of memory
print >>sys.stderr, {'error': "WARNING: Parsing of sentence failed, possibly because of out of memory.",
'input': to_send,
'output': incoming}
raise OutOfMemoryError
if VERBOSE:
print("%s\n%s" % ('=' * 40, incoming))
try:
results = parse_parser_results(incoming)
except ixception as e:
if VERBOSE:
print(traceback.format_exc())
raise e
self.pre_loaded_analisys_dict[to_send] = results
with open(self.pre_analysis,"w", encoding = 'utf-8') as f:
json.dump(self.pre_loaded_analisys_dict,f)
return results
而且我遇到了这个错误(我正在解析很多术语,这是我第一次遇到这个错误):
>> 不支持的操作数类型:“builtin_function_or_method”和“_io.TextIOWrapper”
有任何想法吗?
编辑: printint 传入变量我有这个:
b'Q\r\n注释管道计时信息:\r\nTokenizerAnnotator:0.0 秒。\r\nWordsToSentencesAnnotator:0.0 秒。\r\nPOSTaggerAnnotator:0.0 秒。\r\nMorphaAnnotator:0.1 秒。\r\nNERCombinerAnnotator:0.4 秒.\r\n总计:0.6 秒。337 个令牌,速度为 606.1 个令牌/秒。\r\n管道设置:0.0 秒。\r\nStanfordCoreNLP 管道的总时间:138.7 秒。\r\n'
当我应该得到这样的东西时:
b'膝关节挫伤\r\n句子 #1(3 个标记):\r\n膝关节挫伤\r\n[Text=Contusion CharacterOffsetBegin=0 CharacterOffsetEnd=9 PartOfSpeech=NN Lemma=contusion NamedEntityTag=O] [Text=of CharacterOffsetBegin=10 CharacterOffsetEnd=12 PartOfSpeech=IN Lemma=of NamedEntityTag=O] [Text=knee CharacterOffsetBegin=13 CharacterOffsetEnd=17 PartOfSpeech=NN Lemma=knee NamedEntityTag=O] \r'