我已经使用 20newsGroupExample 为三个类别 Category_A、Category_B、Category_C 训练了 mahout 模型,现在我想使用这个模型对我的文档进行分类。有人可以帮我理解我从这个模型中得到的输出吗?
这是我的输出
{0:-2813549.8786637094,1:-2651723.736745838,2:-2710651.7525975127}
根据文档的输出类别为 1,但预期类别为 2。我是正确的还是我的代码中缺少某些东西?
public class NaiveBayesClassifierExample {
public static void loadClassifier(String strModelPath, Vector v)
throws IOException {
Configuration conf = new Configuration();
NaiveBayesModel model = NaiveBayesModel.materialize(new Path(strModelPath), conf);
AbstractNaiveBayesClassifier classifier = new StandardNaiveBayesClassifier(model);
Vector st = classifier.classifyFull(v);
System.out.println(st.asFormatString());
System.out.println(st.maxValueIndex());
st.asFormatString();
}
public static Vector createVect() throws IOException {
FeatureVectorEncoder encoder = new StaticWordValueEncoder("text");
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
String inputData=readData();
StringReader in = new StringReader(inputData);
TokenStream ts = analyzer.tokenStream("body", in);
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
Vector v1 = new RandomAccessSparseVector(100000);
while (ts.incrementToken()) {
char[] termBuffer = termAtt.buffer();
int termLen = termAtt.length();
String w = new String(termBuffer, 0, termLen);
encoder.addToVector(w, 1.0, v1);
}
v1.normalize();
return v1;
}
private static String readData() {
// TODO Auto-generated method stub
BufferedReader reader=null;
String line, results = "";
try{
reader = new BufferedReader(new FileReader("c:\\inputFile.txt"));
while( ( line = reader.readLine() ) != null)
{
results += line;
}
reader.close();
}
catch(Exception ex)
{
ex.printStackTrace();
}
return results;
}
public static void main(String[] args) throws IOException {
Vector v = createVect();
String mp = "E:\\Final_Model\\model";
loadClassifier(mp, v);
}
}