/* * 要更改此模板,请选择工具 | 模板 * 并在编辑器中打开模板。*/
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
import java.util.Date;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.Version;
import org.apache.lucene.store.Directory;
public class OpenBookCrackIndexer {
public static final Version luceneVersion = Version.LUCENE_43;
/**
* @param args the command line arguments
*/
public static int index(File indexDir, Directory dataDir) throws IOException {
IndexWriterConfig luceneConfig = new IndexWriterConfig(
luceneVersion, new StandardAnalyzer(luceneVersion));
IndexWriter writer = new IndexWriter(dataDir, luceneConfig);
File[] files = indexDir.listFiles();
for (File file : files) {
if (file.getName().endsWith(".pdf")) {
Document document = new Document();
String path = file.getCanonicalPath();
document.getField(path);
Reader reader = new FileReader(file);
document.add(new TextField(path.toString(), reader));
writer.addDocument(document);
}
}
return writer.numDocs();
}
public static void search(String str) throws IOException, ParseException {
if (str.isEmpty()) {
System.out.println("Error :"+str.toString());
}
File dataDir = new File("C:\\Users\\XXXXX\\Desktop\\print_imp\\Lucene");
if (!dataDir.exists()) {
throw new IOException(dataDir + "does not exist or is not a directory");
}
// String[] files = dataDir.list();
// for(int i = 0;i< files.length;i++){
// System.out.println(files[i].toString());
// }
Directory toSearch = new SimpleFSDirectory(dataDir);
IndexReader indexreader = DirectoryReader.open(toSearch);
IndexSearcher searcher = new IndexSearcher(indexreader);
StandardAnalyzer analyzer = new StandardAnalyzer(luceneVersion);
QueryParser queryParser = new QueryParser(luceneVersion, "Contents", analyzer);
Query query = queryParser.parse(str);
TopDocs td = searcher.search(query, 20);
System.out.println("Number of hits: " + td.totalHits);
for (int i = 0; i < td.totalHits; i++) {
System.out.println("Doc Number " + td.scoreDocs[i].doc + "Score :" + td.scoreDocs[i].score);
}
}
public static void main(String[] args) throws Exception {
File indexDir = new File("C:\\Users\\XXXXX\\Desktop\\print_imp");
File dataDir = new File("C:\\Users\\XXXXX\\Desktop\\print_imp\\Lucene");
if (!indexDir.exists() || !dataDir.exists()) {
throw new IOException(dataDir + "does not exist or is not a directory");
}
//SimpleFSDirectory SDindexDir = new SimpleFSDirectory(indexDir);
SimpleFSDirectory SDdataDir = new SimpleFSDirectory(dataDir);
//Directory dirIndex = SDindexDir;
Directory dirData = SDdataDir;
//long start = new Date().getTime();
//int numIndexed = index(indexDir, dirData);
//long end = new Date().getTime();
//System.out.println("Indexed :" + numIndexed + " Time Took to Index: " + (end - start) + " milliseconds");
search("Algorithms");
}
}
在这里,我正在尝试使用 lucene 4.3 进行基本文件目录索引。来自 Lucene in Action 一书。并且由于版本更改,我无法按原样使用代码... 任何人都可以帮我解决我遇到的错误...
错误
Exception in thread "main" org.apache.lucene.index.IndexNotFoundException: no segments* file found in org.apache.lucene.store.SimpleFSDirectory@C:\<PATH>lockFactory=org.apache.lucene.store.NativeFSLockFactory@52fe85: files: [write.lock, _0.fdt, _0.fdx, _0.fnm, _0.nvd, _0.nvm, _0.si, _0_Lucene41_0.doc, _0_Lucene41_0.pos, _0_Lucene41_0.tim, _0_Lucene41_0.tip, _1.fdt, _1.fdx, _1.fnm, _1.nvd, _1.nvm, _1.si, _1_Lucene41_0.doc, _1_Lucene41_0.pos, _1_Lucene41_0.tim, _1_Lucene41_0.tip, _2.fdt, _2.fdx, _2.fnm, _2.nvd, _2.nvm, _2.si, _2_Lucene41_0.doc, _2_Lucene41_0.pos, _2_Lucene41_0.tim, _2_Lucene41_0.tip, _3.fdt, _3.fdx, _3.fnm, _3.nvd, _3.nvm, _3.si, _3_Lucene41_0.doc, _3_Lucene41_0.pos, _3_Lucene41_0.tim, _3_Lucene41_0.tip, _4.cfe, _4.cfs, _4.si, _5.cfe, _5.cfs, _5.si, _6.cfe, _6.cfs, _6.si, _7.cfe, _7.cfs, _7.si, _8.fdt, _8.fdx]
at org.apache.lucene.index.SegmentInfos$FindSegmentsFile.run(SegmentInfos.java:741)
at org.apache.lucene.index.StandardDirectoryReader.open(StandardDirectoryReader.java:52)
at org.apache.lucene.index.DirectoryReader.open(DirectoryReader.java:66)