java - Apache lucene 搜索代码打印 null

Question

 Directory directory = FSDirectory.open(indexDir);
        IndexReader reader = DirectoryReader.open(directory);
        IndexSearcher searcher = new IndexSearcher(reader);
        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_41);

        QueryParser parser = new QueryParser(Version.LUCENE_41, "contents", analyzer);
        Query query = parser.parse(queryStr);
        System.out.println("Searching for: " + query.toString("contents"));
        TopDocs results = searcher.search(query, maxHits);

        ScoreDoc[] hits = results.scoreDocs;
        int numTotalHits = results.totalHits;

        System.out.println("\n\n\n-----------------------Results--------------------------\n\n\n");
       System.out.println(numTotalHits + " total matching documents");


        for (int i = 0; i < hits.length; i++) {
            int docId = hits[i].doc;
            Document d = searcher.doc(docId);
            System.out.println(i+":File name is"+d.get("filename"));
        }

        System.out.println("Found " + hits.length);

我在搜索模块中使用了上面的代码。现在代码工作正常，但我得到的输出为

390:File name is/home/maclean/Installations/apache-tomcat-7.0.21/logs/localhost_access_log.2012-12-31.txt
391:File name isnull
392:File name isnull
393:File name isnull
394:File name isnull
395:File name isnull
396:File name isnull
397:File name isnull
398:File name isnull
399:File name isnull
400:File name is/home/maclean/Installations/apache-tomcat-7.0.21/logs/localhost_access_log.2013-01-09.txt
401:File name isnull
402:File name isnull
403:File name isnull
404:File name isnull
405:File name isnull
406:File name isnull
407:File name isnull
408:File name isnull
409:File name isnull
410:File name is/home/maclean/Installations/apache-tomcat-7.0.21/logs/localhost_access_log.2013-01-10.txt

在这里，我只打印具有查询字符串的文件名，但我得到了太多结果，并且大多数结果的文件名为 null 为什么会发生这种情况？

对于索引，我正在使用此代码

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;



public class SimpleFileIndexer {

    public static void main() throws Exception {

        File dataDir = new File("/home/maclean/Installations/apache-tomcat-7.0.21/logs");
        File indexDir = new File("/home/maclean/NetBeansProjects/LogSearchEngine/Result");

        SimpleFileIndexer indexer = new SimpleFileIndexer();

        int numIndex = indexer.index(indexDir, dataDir);

        System.out.println("Total files indexed " + numIndex);

    }

    private int index(File indexDir, File dataDir) throws Exception {
    // API and code to convert text into indexable/searchable tokens.
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_41);
    //To store an index on disk
    Directory directory = FSDirectory.open(indexDir);
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_41, analyzer);
        int numIndexed;
        try (IndexWriter indexWriter = new IndexWriter(directory, config)) {
            indexDirectory(indexWriter, dataDir);
            numIndexed = indexWriter.maxDoc();
            indexWriter.close();

        }

        return numIndexed;


    }

    private void indexDirectory(IndexWriter indexWriter, File dataDir) throws IOException {

        File[] files = dataDir.listFiles();
        for (int i = 0; i < files.length; i++) {
            File f = files[i];
            if (f.isDirectory()) {
                indexDirectory(indexWriter, f);
            }
            else {
                indexFileWithIndexWriter(indexWriter, f);
            }
        }

    }

    private void indexFileWithIndexWriter(IndexWriter indexWriter, File file) throws IOException {

        FileInputStream fis = null;
        if (file.isHidden() || file.isDirectory() || !file.canRead() || !file.exists()) {
            return;
        }

        System.out.println("Indexing file " + file.getCanonicalPath());

        try {
          fis = new FileInputStream(file);
        } catch (FileNotFoundException fnfe) {
          System.out.println("File Not Found"+fnfe);

       }

        Document doc = new Document();
        doc.add(new LongField("modified", file.lastModified(), Field.Store.NO));
        doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8"))));

        if (indexWriter.getConfig().getOpenMode() == OpenMode.CREATE) {
          // New index, so we just add the document (no old document can be there):
           System.out.println("adding " + file);
          indexWriter.addDocument(doc);
       } else {
          // Existing index (an old copy of this document may have been indexed) so 
       // we use updateDocument instead to replace the old one matching the exact 
           // path, if present:
            System.out.println("updating " + file);
            indexWriter.updateDocument(new Term("path", file.getPath()), doc);
          }


         fis.close();




    }

}*

score 1 · Accepted Answer

hits 数组比 numTotalHits 长，所以你的 for 循环限制应该是 numTotalHits 而不是 hits.length。

java - Apache lucene 搜索代码打印 null

1 回答 1

Related

Reference