我有一个文本 .. 我去从文本中提取三元组 .. 我在 php Standford-NLP中使用 Stanford-NLP 库如何提取三元组(主语 - 宾语 - 谓词)?
GitHub 自述文件中的示例代码展示了如何编写代码。输出是单词/词性对的列表。看第一个例子,“狐狸说什么?” 变成:
array(3) {
["wordsAndTags"]=>
array(6) {
[0]=>
array(2) {
[0]=>
string(4) "What"
[1]=>
string(2) "WP"
}
[1]=>
array(2) {
[0]=>
string(4) "does"
[1]=>
string(3) "VBZ"
}
[2]=>
array(2) {
[0]=>
string(3) "the"
[1]=>
string(2) "DT"
}
[3]=>
array(2) {
[0]=>
string(3) "fox"
[1]=>
string(2) "NN"
}
[4]=>
array(2) {
[0]=>
string(3) "say"
[1]=>
string(2) "VB"
}
[5]=>
array(2) {
[0]=>
string(1) "?"
[1]=>
string(1) "."
}
}
["penn"]=>
array(2) {
["parent"]=>
string(4) "ROOT"
["children"]=>
array(1) {
[0]=>
array(2) {
["parent"]=>
string(5) "SBARQ"
["children"]=>
array(3) {
[0]=>
array(2) {
["parent"]=>
string(4) "WHNP"
["children"]=>
array(1) {
[0]=>
array(2) {
["parent"]=>
string(7) "WP What"
["children"]=>
array(0) {
}
}
}
}
[1]=>
array(2) {
["parent"]=>
string(2) "SQ"
["children"]=>
array(3) {
[0]=>
array(2) {
["parent"]=>
string(8) "VBZ does"
["children"]=>
array(0) {
}
}
[1]=>
array(2) {
["parent"]=>
string(2) "NP"
["children"]=>
array(2) {
[0]=>
array(2) {
["parent"]=>
string(6) "DT the"
["children"]=>
array(0) {
}
}
[1]=>
array(2) {
["parent"]=>
string(6) "NN fox"
["children"]=>
array(0) {
}
}
}
}
[2]=>
array(2) {
["parent"]=>
string(2) "VP"
["children"]=>
array(1) {
[0]=>
array(2) {
["parent"]=>
string(6) "VB say"
["children"]=>
array(0) {
}
}
}
}
}
}
[2]=>
array(2) {
["parent"]=>
string(3) ". ?"
["children"]=>
array(0) {
}
}
}
}
}
}
["typedDependencies"]=>
array(5) {
[0]=>
array(3) {
["type"]=>
string(4) "dobj"
[0]=>
array(2) {
["feature"]=>
string(3) "say"
["index"]=>
int(5)
}
[1]=>
array(2) {
["feature"]=>
string(4) "What"
["index"]=>
int(1)
}
}
[1]=>
array(3) {
["type"]=>
string(3) "aux"
[0]=>
array(2) {
["feature"]=>
string(3) "say"
["index"]=>
int(5)
}
[1]=>
array(2) {
["feature"]=>
string(4) "does"
["index"]=>
int(2)
}
}
[2]=>
array(3) {
["type"]=>
string(3) "det"
[0]=>
array(2) {
["feature"]=>
string(3) "fox"
["index"]=>
int(4)
}
[1]=>
array(2) {
["feature"]=>
string(3) "the"
["index"]=>
int(3)
}
}
[3]=>
array(3) {
["type"]=>
string(5) "nsubj"
[0]=>
array(2) {
["feature"]=>
string(3) "say"
["index"]=>
int(5)
}
[1]=>
array(2) {
["feature"]=>
string(3) "fox"
["index"]=>
int(4)
}
}
[4]=>
array(3) {
["type"]=>
string(4) "root"
[0]=>
array(2) {
["feature"]=>
string(4) "ROOT"
["index"]=>
int(0)
}
[1]=>
array(2) {
["feature"]=>
string(3) "say"
["index"]=>
int(5)
}
}
}
}
然后我需要提取三元组.. 我该怎么做?