我下载了最新的 jar 文件(https://code.google.com/p/kea-algorithm/)并将其添加到我在 eclipse 中的类路径中。但是我收到以下错误“无法解析类型 weka.core.OptionHandler。它是从所需的 .class 文件中间接引用的”我在设置 PorterStemmer 时收到此错误。
如果您知道任何其他可以与 java 一起使用的关键字提取工具,我将不胜感激!
'''import java.io.FileNotFoundException;
import kea.main.KEAKeyphraseExtractor;
import kea.main.KEAModelBuilder;
import kea.stemmers.*;
import kea.stopwords.*;
public class KeywordExtractor {
private KEAModelBuilder km;
private KEAKeyphraseExtractor ke;
private void setOptionsTraining() throws FileNotFoundException {
km = new KEAModelBuilder();
// A. required arguments (no defaults):
// 1. Name of the directory -- give the path to your directory with documents
// and keyphrases
// documents should be in txt format with an extention "txt"
// keyphrases with the same name as documents, but extension "key"
// one keyphrase per line!
km.setDirName("testdocs/en/train");
// 2. Name of the model -- give the path to where the model is to be stored and
// its name
km.setModelName("testdocs/en/model");
// 3. Name of the vocabulary -- name of the file (without extension) that is
// stored in VOCABULARIES
// or "none" if no Vocabulary is used (free keyphrase extraction).
km.setVocabulary("none");
// 4. Format of the vocabulary in 3. Leave empty if vocabulary = "none", use
// "skos" or "txt" otherwise.
km.setVocabularyFormat("none");
//B. optional arguments if you want to change the defaults
// 5. Encoding of the document
km.setEncoding("UTF-8");
// 6. Language of the document -- use "es" for Spanish, "fr" for French
// or other languages as specified in your "skos" vocabulary
km.setDocumentLanguage("en"); // es for Spanish, fr for French
// 7. Stemmer -- adjust if you use a different language than English or if you
// want to alterate results
// (We have obtained better results for Spanish and French with NoStemmer)
km.getStemmer(new PorterStemmer());
// 8. Stopwords -- adjust if you use a different language than English!
km.setStopwords(new StopwordsEnglish());
// 9. Maximum length of a keyphrase
km.setMaxPhraseLength(5);
// 10. Minimum length of a keyphrase
km.setMinPhraseLength(1);
// 11. Minumum occurrence of a phrase in the document -- use 2 for long
// documents!
km.setMinNumOccur(2);
// Optional: turn off the keyphrase frequency feature
// km.setUseKFrequency(false);
}
private void setOptionsTesting(String m_testdir) throws FileNotFoundException {
ke = new KEAKeyphraseExtractor();
// A. required arguments (no defaults):
// 1. Name of the directory -- give the path to your directory with documents
// documents should be in txt format with an extention "txt".
// Note: keyphrases with the same name as documents, but extension "key"
// one keyphrase per line!
ke.setDirName(m_testdir);
// 2. Name of the model -- give the path to the model
// needs to be on classpath (i.e. include '.' in classpath for this to work)
ke.setModelName("/testdocs/en/model");
// 3. Name of the vocabulary -- name of the file (without extension) that is
// stored in VOCABULARIES
// or "none" if no Vocabulary is used (free keyphrase extraction).
ke.setVocabulary("none");
// 4. Format of the vocabulary in 3. Leave empty if vocabulary = "none", use "skos" or "txt" otherwise.
ke.setVocabularyFormat("none");
// B. optional arguments if you want to change the defaults
// 5. Encoding of the document
ke.setEncoding("UTF-8");
// 6. Language of the document -- use "es" for Spanish, "fr" for French
// or other languages as specified in your "skos" vocabulary
ke.setDocumentLanguage("en"); // es for Spanish, fr for French
// 7. Stemmer -- adjust if you use a different language than English or want to
// alterate results
// (We have obtained better results for Spanish and French with NoStemmer)
ke.setStemmer(new PorterStemmer());
// 8. Stopwords
ke.setStopwords(new StopwordsEnglish());
// 9. Number of Keyphrases to extract
ke.setNumPhrases(10);
// 10. Set to true, if you want to compute global dictionaries from the test
// collection
ke.setBuildGlobal(false);
ke.setAdditionalInfo(true);
}
private void createModel() {
try {
km.buildModel(km.collectStems());
km.saveModel();
} catch (Exception e) {
e.printStackTrace();
}
}
private void extractKeyphrases() {
try {
ke.loadModel();
ke.extractKeyphrases(ke.collectStems());
} catch (Exception e) {
e.printStackTrace();
}
}
public static void main(String[] args) {
// TODO Auto-generated method stub
}
}
'''