扩展 Pedram 的答案,可以使用以下代码:
from nltk.parse.corenlp import CoreNLPParser
nlp = CoreNLPParser('http://localhost:9000') # Assuming CoreNLP server is running locally at port 9000
def extract_phrase(trees, labels):
phrases = []
for tree in trees:
for subtree in tree.subtrees():
if subtree.label() in labels:
t = subtree
t = ' '.join(t.leaves())
return phrases
def get_chunks(sentence):
trees = next(nlp.raw_parse(sentence))
nps = extract_phrase(trees, ['NP', 'CC'])
vps = extract_phrase(trees, ['VP'])
return trees, nps, vps
if __name__ == '__main__':
dialog = [
"Anarchism is a political philosophy that advocates self-governed societies based on voluntary cooperative institutions rejecting unjust hierarchy"
for sentence in dialog:
trees, nps, vps = get_chunks(sentence)
print("Sentence: ", sentence)
print("Tree:\n", trees)
print("Noun Phrases: ", nps)
print("Verb Phrases: ", vps)
Sentence: Anarchism is a political philosophy that advocates self-governed societies based on voluntary cooperative institutions rejecting unjust hierarchy
(NP (NN Anarchism))
(VBZ is)
(NP (DT a) (JJ political) (NN philosophy))
(WHNP (WDT that))
(VBZ advocates)
(ADJP (NN self) (HYPH -) (VBN governed))
(NNS societies))
(VBN based)
(IN on)
(JJ voluntary)
(JJ cooperative)
(NNS institutions))
(VBG rejecting)
(NP (JJ unjust) (NN hierarchy)))))))))))))
Noun Phrases: ['Anarchism', 'a political philosophy that advocates self - governed societies based on voluntary cooperative institutions rejecting unjust hierarchy', 'a political philosophy', 'self - governed societies', 'voluntary cooperative institutions rejecting unjust hierarchy', 'voluntary cooperative institutions', 'unjust hierarchy']
Verb Phrases: ['is a political philosophy that advocates self - governed societies based on voluntary cooperative institutions rejecting unjust hierarchy', 'advocates self - governed societies based on voluntary cooperative institutions rejecting unjust hierarchy', 'rejecting unjust hierarchy']