Directory directory = FSDirectory.open(indexDir);
IndexReader reader = DirectoryReader.open(directory);
IndexSearcher searcher = new IndexSearcher(reader);
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_41);
QueryParser parser = new QueryParser(Version.LUCENE_41, "contents", analyzer);
Query query = parser.parse(queryStr);
System.out.println("Searching for: " + query.toString("contents"));
TopDocs results = searcher.search(query, maxHits);
ScoreDoc[] hits = results.scoreDocs;
int numTotalHits = results.totalHits;
System.out.println("\n\n\n-----------------------Results--------------------------\n\n\n");
System.out.println(numTotalHits + " total matching documents");
for (int i = 0; i < hits.length; i++) {
int docId = hits[i].doc;
Document d = searcher.doc(docId);
System.out.println(i+":File name is"+d.get("filename"));
}
System.out.println("Found " + hits.length);
我在搜索模块中使用了上面的代码。现在代码工作正常,但我得到的输出为
390:File name is/home/maclean/Installations/apache-tomcat-7.0.21/logs/localhost_access_log.2012-12-31.txt
391:File name isnull
392:File name isnull
393:File name isnull
394:File name isnull
395:File name isnull
396:File name isnull
397:File name isnull
398:File name isnull
399:File name isnull
400:File name is/home/maclean/Installations/apache-tomcat-7.0.21/logs/localhost_access_log.2013-01-09.txt
401:File name isnull
402:File name isnull
403:File name isnull
404:File name isnull
405:File name isnull
406:File name isnull
407:File name isnull
408:File name isnull
409:File name isnull
410:File name is/home/maclean/Installations/apache-tomcat-7.0.21/logs/localhost_access_log.2013-01-10.txt
在这里,我只打印具有查询字符串的文件名,但我得到了太多结果,并且大多数结果的文件名为 null 为什么会发生这种情况?
对于索引,我正在使用此代码
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
public class SimpleFileIndexer {
public static void main() throws Exception {
File dataDir = new File("/home/maclean/Installations/apache-tomcat-7.0.21/logs");
File indexDir = new File("/home/maclean/NetBeansProjects/LogSearchEngine/Result");
SimpleFileIndexer indexer = new SimpleFileIndexer();
int numIndex = indexer.index(indexDir, dataDir);
System.out.println("Total files indexed " + numIndex);
}
private int index(File indexDir, File dataDir) throws Exception {
// API and code to convert text into indexable/searchable tokens.
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_41);
//To store an index on disk
Directory directory = FSDirectory.open(indexDir);
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_41, analyzer);
int numIndexed;
try (IndexWriter indexWriter = new IndexWriter(directory, config)) {
indexDirectory(indexWriter, dataDir);
numIndexed = indexWriter.maxDoc();
indexWriter.close();
}
return numIndexed;
}
private void indexDirectory(IndexWriter indexWriter, File dataDir) throws IOException {
File[] files = dataDir.listFiles();
for (int i = 0; i < files.length; i++) {
File f = files[i];
if (f.isDirectory()) {
indexDirectory(indexWriter, f);
}
else {
indexFileWithIndexWriter(indexWriter, f);
}
}
}
private void indexFileWithIndexWriter(IndexWriter indexWriter, File file) throws IOException {
FileInputStream fis = null;
if (file.isHidden() || file.isDirectory() || !file.canRead() || !file.exists()) {
return;
}
System.out.println("Indexing file " + file.getCanonicalPath());
try {
fis = new FileInputStream(file);
} catch (FileNotFoundException fnfe) {
System.out.println("File Not Found"+fnfe);
}
Document doc = new Document();
doc.add(new LongField("modified", file.lastModified(), Field.Store.NO));
doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8"))));
if (indexWriter.getConfig().getOpenMode() == OpenMode.CREATE) {
// New index, so we just add the document (no old document can be there):
System.out.println("adding " + file);
indexWriter.addDocument(doc);
} else {
// Existing index (an old copy of this document may have been indexed) so
// we use updateDocument instead to replace the old one matching the exact
// path, if present:
System.out.println("updating " + file);
indexWriter.updateDocument(new Term("path", file.getPath()), doc);
}
fis.close();
}
}*