我的文件大小为 1.43 GB。该文件包含文件中逐行分隔的 1 亿个字符串(长度为 3 - 80 个字符)。我正在使用 lucene 对文件进行通配符搜索。现在我正在为每个字符串创建一个文档。我想要搜索关键字(searchkeyword)的总数。这是我的代码
lucene.demo.java
公共类 LuceneDemo {
//a path to directory where Lucene will store index files
private static String indexDirectory = "C:\\indextofile";
// a path to directory which contains data files that need to be indexed
private static String dataDirectory = "C:\\indexofilef";
public static int count = 0;
private Searcher indexSearcher;
public static void main(String[] args) throws FileNotFoundException, IOException {
LuceneDemo luceneDemo = new LuceneDemo();
//create Lucene index
luceneDemo.createLuceneIndex();
//create IndexSearcher
luceneDemo.createIndexSearcher();
luceneDemo.termQueryExample();
}
private void createLuceneIndex(){
Indexer indexer = new Indexer(indexDirectory,dataDirectory);
//Create IndexWriter
indexer.createIndexWriter();
try {
//Index data
indexer.indexData();
} catch (FileNotFoundException e) {
throw new RuntimeException(e);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
private void createIndexSearcher() throws CorruptIndexException, IOException{
/* Create instance of IndexSearcher
*/
indexSearcher = new IndexSearcher(indexDirectory);
}
private void termQueryExample() throws CorruptIndexException, IOException{
try
{
Directory directory = FSDirectory.getDirectory(indexDirectory);
//IndexSearcher indexSearcher = new IndexSearcher(directory);
BooleanQuery.setMaxClauseCount(102400000);
Term term = new Term("reversecontent", "bubble*com");
Query query = new WildcardQuery(term);
Hits hits = indexSearcher.search(query);
System.out.println("######## Hits :"+hits.length());
}
catch (Exception e) {
e.printStackTrace();
}
}
}
索引器.java
公共类索引器 { 私有 IndexWriter indexWriter;
/*Location of directory where index files are stored */
private String indexDirectory ;
/*Location of data directory */
private String dataDirectory ;
public String FIELD_CONTENTS = "contents";
public Indexer(String indexDirectory, String dataDirectory){
this.indexDirectory = indexDirectory ;
this.dataDirectory = dataDirectory ;
}
void createIndexWriter(){
if(indexWriter == null){
try{
//Create instance of Directory where index files will be stored
Directory fsDirectory = FSDirectory.getDirectory(indexDirectory);
/* Create instance of analyzer, which will be used to tokenize
the input data */
Analyzer standardAnalyzer = new KeywordAnalyzer();
//Create a new index
boolean create = true;
//Create the instance of deletion policy
IndexDeletionPolicy deletionPolicy =
new KeepOnlyLastCommitDeletionPolicy();
indexWriter =
new IndexWriter(fsDirectory,standardAnalyzer,create,
deletionPolicy,IndexWriter.MaxFieldLength.UNLIMITED);
}catch(IOException ie){
System.out.println("Error in creating IndexWriter");
throw new RuntimeException(ie);
}
}
}
void indexData() throws FileNotFoundException, IOException{
File[] files = getFilesToBeIndxed();
for(File file:files){
FileReader fr = new FileReader(file);
// To store the contents read via File Reader
BufferedReader br = new BufferedReader(fr);
// Read br and store a line in 'data', print data
String data;
System.out.println("start");
while((data = br.readLine()) != null)
{
String newdata = data+".com";
Document doc = new Document();
//doc.add(new Field("content", newdata,
// Store.NO, Index.NOT_ANALYZED));
doc.add(new Field("reversecontent", new StringBuffer(newdata).reverse().toString(),
Store.NO, Index.NOT_ANALYZED));
indexWriter.addDocument(doc);
}
System.out.println("end");
// Add these fields to a Lucene Document
//Step 3: Add this document to Lucene Index.
}
/* Requests an "optimize" operation on an index, priming the
index for the fastest available search */
indexWriter.optimize();
System.out.println("optimization done");
/*
* Commits all changes to the index and closes all associated files.
*/
indexWriter.close();
}
private File[] getFilesToBeIndxed(){
File dataDir = new File(dataDirectory);
if(!dataDir.exists()){
throw new RuntimeException(dataDirectory+" does not exist");
}
File[] files = dataDir.listFiles();
return files;
}
}