在 java 中,它可以使用“MatchAllDocsQuery()”来完成,但是 Pylucene 没有文档提到它是如何完成的。
这是发布单个查询然后从检索到的文档中提取所有字段的 python 代码。
INDEX_DIR = "directory/where/the/document/index/is/stored"
import sys, os, lucene
from java.nio.file import Paths
from org.apache.lucene.analysis.standard import StandardAnalyzer
from org.apache.lucene.index import DirectoryReader
from org.apache.lucene.queryparser.classic import QueryParser
from org.apache.lucene.store import SimpleFSDirectory
from org.apache.lucene.search import IndexSearcher
def run(searcher, analyzer):
while True:
print
print("Hit enter with no input to quit.")
command = input("Query:")
if command == '':
return
print
print("Searching for:", command)
query = QueryParser("contents", analyzer).parse(command)
#query = "MatchAllDocsQuery()"
scoreDocs = searcher.search(query, 50).scoreDocs
print("%s total matching documents." % len(scoreDocs))
for scoreDoc in scoreDocs:
doc = searcher.doc(scoreDoc.doc)
table = dict((field.name(), field.stringValue()) for field in doc.getFields())
print(table['doi'])
#print('path:', doc.get("path"), 'name:', doc.get("name"), 'title:', doc.get("text"))
if __name__ == '__main__':
lucene.initVM()
print('lucene', lucene.VERSION)
base_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
directory = SimpleFSDirectory.open(Paths.get(INDEX_DIR))
print("Directory name is given below")
print(directory)
searcher = IndexSearcher(DirectoryReader.open(directory))
print(searcher)
analyzer = StandardAnalyzer()
# Calling the run function for execution
run(searcher, analyzer)
del searcher