main 方法中的文本似乎需要超过 2 秒才能返回 NER。我不是 NLP 方面的专家,而且这段代码根本无法扩展。我在我发现的瓶颈的 2 个地方添加了评论。您能否提出改进建议以提高程序的性能。
谢谢。
public class NERSentimentUtil
{
private static final Logger logger = Logger.getLogger(NERSentimentUtil.class);
private static final String serializedClassifier7 = "edu/stanford/nlp/models/ner/english.muc.7class.distsim.crf.ser.gz";
private static final String serializedClassifier4 = "edu/stanford/nlp/models/ner/english.conll.4class.distsim.crf.ser.gz";
private static final String serializedClassifier3 = "edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz";
private static NERClassifierCombiner ncc;
private static StanfordCoreNLP pipeline;
static
{
try
{
ncc = new NERClassifierCombiner(serializedClassifier3,serializedClassifier4,serializedClassifier7);
} catch (IOException e) {
e.printStackTrace();
logger.error(e);
}
}
static
{
Properties props = new Properties();
props.setProperty("annotators", "tokenize, ssplit, parse, sentiment, sutime");
/*props.setProperty("ner.useSUTime", "0");*/
String defs_sutime = "/edu/stanford/nlp/models/sutime/defs.sutime.txt";
String holiday_sutime = "/edu/stanford/nlp/models/sutime/english.holidays.sutime.txt";
String _sutime = "/edu/stanford/nlp/models/sutime/english.sutime.txt";
String sutimeRules = defs_sutime + "," + holiday_sutime + "," + _sutime;
props.setProperty("ner.useSUTime", "true");
props.setProperty("-sutime.rules", sutimeRules);
props.setProperty("sutime.binders", "0");
props.setProperty("sutime.markTimeRanges", "false");
props.setProperty("sutime.includeRange", "false");
props.setProperty("customAnnotatorClass.sutime", "edu.stanford.nlp.time.TimeAnnotator");
props.setProperty("parse.maxlen", "20");
//props.setProperty("ner.applyNumericClassifiers", "false");
//props.setProperty("nthreads", "16");
//props.setProperty("threads", "16");
//props.setProperty("parse.nthreads","16");
//props.setProperty("ssplit.eolonly","true");
props.setProperty("-parse.model", "edu/stanford/nlp/models/srparser/englishSR.ser.gz");
RedwoodConfiguration.current().clear().apply();
pipeline = new StanfordCoreNLP(props);
//RedwoodConfiguration.empty().capture(System.err).apply();
}
//A sentiment score of 0 or 1 is negative, 2 neutral and 3 or 4 positive.
private static int getScore(int score)
{
if(score<2)
return -1;
else if(score==2)
return 0;
else
return 1;
}
public static HashMap<String,Object> getStanford(String s, long dateString)//"2013-07-14"
{
int finalScore =0;
HashMap<String,Object> map = new HashMap<String,Object>();
HashMap<String, Integer> dateMap = new HashMap<String, Integer>();
HashMap<String, Integer> dateCountMap = new HashMap<String, Integer>();
HashMap<String, String> dateSentenceMap = new HashMap<String, String>();
HashMap<String, Integer> personMap = new HashMap<String, Integer>();
HashMap<String, Integer> personCountMap = new HashMap<String, Integer>();
HashMap<String, Integer> orgMap = new HashMap<String, Integer>();
HashMap<String, Integer> orgCountMap = new HashMap<String, Integer>();
HashMap<String, Integer> locationMap = new HashMap<String, Integer>();
HashMap<String, Integer> locationCountMap = new HashMap<String, Integer>();
HashMap<String, Article_Location> locationArticleMap = new HashMap<String, Article_Location>();
ArrayList<Articel_Ner> organisationlist = new ArrayList<Articel_Ner>();
ArrayList<Articel_Ner> personlist = new ArrayList<Articel_Ner>();
ArrayList<Artilcle_Ner_Date> datelist = new ArrayList<Artilcle_Ner_Date>();
ArrayList<Article_NerLocation> locationList = new ArrayList<Article_NerLocation>();
try
{
Annotation annotation = pipeline.process(s);//1/3 rd time is taken up by this line
List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
for (CoreMap sentence : sentences)
{
String str = sentence.toString();
int score = getSentiment(sentence);
finalScore+=score;
boolean dFlag = true;
List<Triple<String,Integer,Integer>> triples = ncc.classifyToCharacterOffsets(str);
for (Triple<String,Integer,Integer> trip : triples)
{
String ne = trip.first();
String word = str.substring(trip.second(), trip.third).toLowerCase();
switch(ne)
{
case "LOCATION":
extractLocation(locationMap, locationCountMap, locationArticleMap, score, word);
break;
case "ORGANIZATION":
extractOrg(orgMap, orgCountMap, score, word);
break;
case "PERSON":
extractPerson(personMap, personCountMap, score, word);
break;
case "DATE":
if(dFlag)
{
extractSUDate(dateString, dateMap, dateCountMap, dateSentenceMap, str, score);
dFlag = false;
}
break;
default:
break;
}
}
}
//2/3rd of the time taken by these 4 methods:: can be obtimized
mapDate(dateMap, dateCountMap, dateSentenceMap, datelist);
mapLocation(locationMap, locationCountMap, locationArticleMap, locationList);
mapOrg(orgMap, orgCountMap, organisationlist);
mapPerson(personMap, personCountMap, personlist);
//
}
catch(Exception e)
{
logger.error(e);
logger.error(s);
e.printStackTrace();
}
if(finalScore>0)
finalScore = 1;
else if(finalScore<0)
finalScore = -1;
else
finalScore = 0;
map.put("ORGANISATION", organisationlist);
map.put("PERSON", personlist);
map.put("DATE", datelist);
map.put("LOCATION", locationList);
map.put("SENTIMENT", finalScore);
return map;
}
private static void extractPerson(HashMap<String, Integer> personMap, HashMap<String, Integer> personCountMap,
int score, String word)
{
if(personMap.get(word)!=null)
{
personMap.put(word, personMap.get(word)+score);
personCountMap.put(word, personCountMap.get(word)+1);
}
else
{
personMap.put(word, score);
personCountMap.put(word, 1);
//personSentenceMap.put(pname, str);
}
}
private static void extractOrg(HashMap<String, Integer> orgMap, HashMap<String, Integer> orgCountMap,
int score, String word)
{
if(orgMap.get(word)!=null)
{
orgMap.put(word, orgMap.get(word)+score);
orgCountMap.put(word, orgCountMap.get(word)+1);
}
else
{
orgMap.put(word, score);
orgCountMap.put(word, 1);
//orgSentenceMap.put(oname, str);
}
}
private static void extractLocation(HashMap<String, Integer> locationMap,
HashMap<String, Integer> locationCountMap,
HashMap<String, Article_Location> locationArticleMap,
int score,
String word)
{
if(locationMap.get(word)!=null)
{
locationMap.put(word, locationMap.get(word)+score);
locationCountMap.put(word, locationCountMap.get(word)+1);
}
else
{
Article_Location articleLocation = LocationUtil.getLocation(word);
locationMap.put(word, score);
locationCountMap.put(word, 1);
locationArticleMap.put(word, articleLocation);
}
}
private static void extractSUDate(long dateString,
HashMap<String, Integer> dateMap,
HashMap<String, Integer> dateCountMap,
HashMap<String, String> dateSentenceMap,
String str,
int score) {
Annotation dateAnnotation = new Annotation(str);
dateAnnotation.set(CoreAnnotations.DocDateAnnotation.class, FormatUtil.getDate(dateString));
pipeline.annotate(dateAnnotation);
for(CoreMap timex:dateAnnotation.get(TimeAnnotations.TimexAnnotations.class))
{
TimeExpression timeExpression = timex.get(TimeExpression.Annotation.class);
if(timeExpression!=null && timeExpression.getTemporal()!=null &&
timeExpression.getTemporal().getTimexValue()!=null)
{
String word = checkDate(timeExpression.getTemporal().getTimexValue());
if(word!=null)
{
if(dateMap.get(word)!=null)
{
dateMap.put(word, dateMap.get(word)+score);
dateCountMap.put(word, dateCountMap.get(word)+1);
dateSentenceMap.put(word, dateSentenceMap.get(word)+" "+str);
}
else
{
dateMap.put(word, score);
dateCountMap.put(word, 1);
dateSentenceMap.put(word, str);
}
}
}
}
}
private static int getSentiment(CoreMap sentence) {
Tree annotatedTree = sentence.get(SentimentAnnotatedTree.class);
int localScore = RNNCoreAnnotations.getPredictedClass(annotatedTree);
int score = getScore(localScore);
return score;
}
private static void mapLocation(HashMap<String, Integer> locationMap,
HashMap<String, Integer> locationCountMap,
HashMap<String, Article_Location> locationArticleMap,
ArrayList<Article_NerLocation> locationList)
{
for(Map.Entry<String, Integer> entry : locationMap.entrySet())
{
String key = entry.getKey();
Integer value = entry.getValue();
Article_Location articleLocation = locationArticleMap.get(key);
Article_NerLocation l1 = new Article_NerLocation();
if(value>=1)
l1.setNerSentiment(1);
else if(value<=-1)
l1.setNerSentiment(-1);
else
l1.setNerSentiment(0);
l1.setKeyword(key);
l1.setCount(locationCountMap.get(key));
if(articleLocation!=null)
{
l1.setNerCountry(articleLocation.getCountryCode());
l1.setNerLatLong(articleLocation.getLatitude()+","+articleLocation.getLongitude());
l1.setTimeZone(articleLocation.getTimeZone());
l1.setCountryName(articleLocation.getCountryName());
}
locationList.add(l1);
}
}
private static void mapDate(HashMap<String, Integer> dateMap,
HashMap<String, Integer> dateCountMap,
HashMap<String, String> dateSentenceMap,
ArrayList<Artilcle_Ner_Date> datelist)
{
for(Map.Entry<String, Integer> entry : dateMap.entrySet())
{
String key = entry.getKey();
Integer value = entry.getValue();
Artilcle_Ner_Date d1 = new Artilcle_Ner_Date();
if(value>=1)
d1.setNerSentiment(1);
else if(value<=-1)
d1.setNerSentiment(-1);
else
d1.setNerSentiment(0);
d1.setKeyword(key);
d1.setCount(dateCountMap.get(key));
d1.setSentence(dateSentenceMap.get(key));
d1.setNerDateTheme1(SummaryThemeUtil.getSTByDate(dateSentenceMap.get(key)));
datelist.add(d1);
}
}
private static void mapOrg(HashMap<String, Integer> orgMap,
HashMap<String, Integer> orgCountMap,
ArrayList<Articel_Ner> organisationlist)
{
for(Map.Entry<String, Integer> entry : orgMap.entrySet())
{
String key = entry.getKey();
Integer value = entry.getValue();
Articel_Ner o1 = new Articel_Ner();
if(value>=1)
o1.setNerSentiment(1);
else if(value<=-1)
o1.setNerSentiment(-1);
else
o1.setNerSentiment(0);
o1.setKeyword(key);
o1.setCount(orgCountMap.get(key));
organisationlist.add(o1);
}
}
private static void mapPerson(HashMap<String, Integer> personMap,
HashMap<String, Integer> personCountMap,
ArrayList<Articel_Ner> personlist)
{
for(Map.Entry<String, Integer> entry : personMap.entrySet())
{
String key = entry.getKey();
Integer value = entry.getValue();
Articel_Ner p1 = new Articel_Ner();
if(value>=1)
p1.setNerSentiment(1);
else if(value<=-1)
p1.setNerSentiment(-1);
else
p1.setNerSentiment(0);
p1.setKeyword(key);
p1.setCount(personCountMap.get(key));
personlist.add(p1);
}
}
private static String checkDate(String date)
{
if(date.length()<10)
return null;
else if(date.length()>10)
date = date.substring(0,10);
if (date.matches("\\d{4}-\\d{2}-\\d{2}"))
return date;
else
return null;
}
public static void main(String args[])
{
String text = "Lets meet on every 2nd week. Night is young. Happy new Year. The festival will be held on the following dates are 18 Feb 1997, the 20th of july and 4 days from today.";
long pre = System.currentTimeMillis();
HashMap<String, Object> map = getStanford(text, 1508745558);
long post = System.currentTimeMillis();
long diff = post-pre;
System.out.println(diff);
System.out.println(map);
}
}