2

我已经使用 20newsGroupExample 为三个类别 Category_A、Category_B、Category_C 训练了 mahout 模型,现在我想使用这个模型对我的文档进行分类。有人可以帮我理解我从这个模型中得到的输出吗?

这是我的输出

{0:-2813549.8786637094,1:-2651723.736745838,2:-2710651.7525975127}

根据文档的输出类别为 1,但预期类别为 2。我是正确的还是我的代码中缺少某些东西?

public class NaiveBayesClassifierExample {

public static void loadClassifier(String strModelPath, Vector v)
        throws IOException {
    Configuration conf = new Configuration();

    NaiveBayesModel model = NaiveBayesModel.materialize(new Path(strModelPath), conf);
    AbstractNaiveBayesClassifier classifier = new StandardNaiveBayesClassifier(model);

    Vector st = classifier.classifyFull(v);
    System.out.println(st.asFormatString());
    System.out.println(st.maxValueIndex());
    st.asFormatString();
}

public static Vector createVect() throws IOException {
    FeatureVectorEncoder encoder = new StaticWordValueEncoder("text");
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);

    String inputData=readData();

    StringReader in = new StringReader(inputData);

    TokenStream ts = analyzer.tokenStream("body", in);

    CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
    Vector v1 = new RandomAccessSparseVector(100000);

    while (ts.incrementToken()) {
        char[] termBuffer = termAtt.buffer();
        int termLen = termAtt.length();
        String w = new String(termBuffer, 0, termLen);
        encoder.addToVector(w, 1.0, v1);
    }
    v1.normalize();
    return v1;
}

private static String readData() {
    // TODO Auto-generated method stub

    BufferedReader reader=null;
    String line, results = "";
    try{
    reader = new BufferedReader(new FileReader("c:\\inputFile.txt"));

    while( ( line = reader.readLine() ) != null)
    {
        results += line;
    }
    reader.close();


    }
    catch(Exception ex)
    {
        ex.printStackTrace();
    }
    return results;
}

public static void main(String[] args) throws IOException {
    Vector v = createVect();
    String mp = "E:\\Final_Model\\model";
    loadClassifier(mp, v);
}

}

4

0 回答 0