我已经使用stand ford解析器在java中编写了simler程序。您应该使用java stand ford解析器生成数组列表的标记词。
package postagger;
/*
*
*
* lphabetical list of part-of-speech tags used in the Penn Treebank Project:
Number
Tag
Description
1. CC Coordinating conjunction
2. CD Cardinal number
3. DT Determiner
4. EX Existential there
5. FW Foreign word
6. IN Preposition or subordinating conjunction
7. JJ Adjective
8. JJR Adjective, comparative
9. JJS Adjective, superlative
10. LS List item marker
11. MD Modal
12. NN Noun, singular or mass
13. NNS Noun, plural
14. NNP Proper noun, singular
15. NNPS Proper noun, plural
16. PDT Predeterminer
17. POS Possessive ending
18. PRP Personal pronoun
19. PRP$ Possessive pronoun
20. RB Adverb
21. RBR Adverb, comparative
22. RBS Adverb, superlative
23. RP Particle
24. SYM Symbol
25. TO to
26. UH Interjection
27. VB Verb, base form
28. VBD Verb, past tense
29. VBG Verb, gerund or present participle
30. VBN Verb, past participle
31. VBP Verb, non-3rd person singular present
32. VBZ Verb, 3rd person singular present
33. WDT Wh-determiner
34. WP Wh-pronoun
35. WP$ Possessive wh-pronoun
36. WRB Wh-adverb
*/
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Scanner;
import java.io.StringReader;
import semanticengine.Description;
import edu.stanford.nlp.objectbank.TokenizerFactory;
import edu.stanford.nlp.process.CoreLabelTokenFactory;
import edu.stanford.nlp.process.DocumentPreprocessor;
import edu.stanford.nlp.process.PTBTokenizer;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.trees.*;
import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
public class EnglishParser {
public static LexicalizedParser lp = null;
public static void main(String[] args)
{
EnglishParser MC=new EnglishParser();
Scanner sc=new Scanner(System.in);
String s="";
while(s!="end")
{
s=sc.nextLine();
ArrayList<TaggedWord> AT=MC.Parse(s);
Description obj= new Description(AT );
System.out.println (AT);
}
}
public static void demoDP(LexicalizedParser lp, String filename) {
// This option shows loading and sentence-segment and tokenizing
// a file using DocumentPreprocessor
TreebankLanguagePack tlp = new PennTreebankLanguagePack();
GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
// You could also create a tokenier here (as below) and pass it
// to DocumentPreprocessor
for (List<HasWord> sentence : new DocumentPreprocessor(filename)) {
Tree parse = lp.apply(sentence);
parse.pennPrint();
System.out.println();
GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
Collection tdl = gs.typedDependenciesCCprocessed(true);
System.out.println(tdl);
System.out.println();
}
}
//Method for Pos taging.(POS) tagger that assigns its class
//(verb, adjective, ...) to each word of the sentence,
//para@ english is the argument to be tagged
public ArrayList<TaggedWord> Parse(String English)
{
String[] sent =English.split(" ");// { "This", "is", "an", "easy", "sentence", "." };
List<CoreLabel> rawWords = new ArrayList<CoreLabel>();
for (String word : sent) {
CoreLabel l = new CoreLabel();
l.setWord(word);
rawWords.add(l);
}
Tree parse = lp.apply(rawWords);
return parse.taggedYield();
}
public EnglishParser()
{
lp =
new LexicalizedParser("grammar/englishPCFG.ser.gz");
} // static methods only
}
// return pattern of the sentence
public String getPattern(ArrayList<TaggedWord> Sen)
{
Iterator<TaggedWord> its = Sen.iterator();
while (its.hasNext()) {
TaggedWord obj = its.next();
if ((obj.tag().equals("VBZ")) || (obj.tag().equals("VBP"))) {
if (its.hasNext()) {
TaggedWord obj2 = its.next();
if (obj2.tag().equals("VBG")) {
if (its.hasNext()) {
TaggedWord obj3 = its.next();
if ((obj3.tag().equals("VBN"))) {
return "PRESENT_CONT_PASS";
}
}
return "PRESENT_CONT";
// Present Continues
} else if ((obj2.tag().equals("VBN"))) {
return "PRESENT_PASS";
}
return "PRESENT_SIMP";
} else {
return "PRESENT_SIMP";
}
} else if (obj.tag().equals("VBD")) {
if (its.hasNext()) {
TaggedWord obj2 = its.next();
if (obj2.tag().equals("VBG")) {
if (its.hasNext()) {
TaggedWord obj3 = its.next();
if ((obj3.tag().equals("VBN"))) {
return "PATT_CONT_PASS";
}
}
return "PAST_CONT";
} else if ((obj2.tag().equals("VBN"))) {
return "PAST_PASS";
}
return "PAST_SIMP";
} else {
return "PAST_SIMP";
}
}
else if (obj.tag().equals("VB")) {
if (its.hasNext()) {
TaggedWord obj2 = its.next();
if (obj2.tag().equals("VBG")) {
return "FUT_CONT";
} else if ((obj2.tag().equals("VBN"))) {
return "FUT_CONT";
}
} else {
return "FUT_SIMP";
}
}
}
return "NO_PATTERN";
}