没有直接的方法可以做到这一点,但是我重新考虑了 Jared 的建议,并且能够使解决方案发挥作用。
我在这里记录一下,以防其他人有同样的问题。
创建一个实现的类org.apache.lucene.search.highlight.Formatter
public class HitPositionCollector implements Formatter
{
// MatchOffset is a simple DTO
private List<MatchOffset> matchList;
public HitPositionCollector(
{
matchList = new ArrayList<MatchOffset>();
}
// this ie where the term start and end offset as well as the actual term is captured
@Override
public String highlightTerm(String originalText, TokenGroup tokenGroup)
{
if (tokenGroup.getTotalScore() <= 0)
{
}
else
{
MatchOffset mo= new MatchOffset(tokenGroup.getToken(0).toString(), tokenGroup.getStartOffset(),tokenGroup.getEndOffset());
getMatchList().add(mo);
}
return originalText;
}
/**
* @return the matchList
*/
public List<MatchOffset> getMatchList()
{
return matchList;
}
}
主要代码
public void testHitsWithHitPositionCollector() throws Exception
{
System.out.println(" .... testHitsWithHitPositionCollector");
String fuzzyStr = "bro*";
QueryParser parser = new QueryParser(Version.LUCENE_30, "f", analyzer);
Query fzyQry = parser.parse(fuzzyStr);
TopDocs hits = searcher.search(fzyQry, 10);
QueryScorer scorer = new QueryScorer(fzyQry, "f");
HitPositionCollector myFormatter= new HitPositionCollector();
//Highlighter(Formatter formatter, Scorer fragmentScorer)
Highlighter highlighter = new Highlighter(myFormatter,scorer);
highlighter.setTextFragmenter(
new SimpleSpanFragmenter(scorer)
);
Analyzer analyzer2 = new SimpleAnalyzer();
int loopIndex=0;
//for (ScoreDoc sd : hits.scoreDocs) {
Document doc = searcher.doc( hits.scoreDocs[0].doc);
String title = doc.get("f");
TokenStream stream = TokenSources.getAnyTokenStream(searcher.getIndexReader(),
hits.scoreDocs[0].doc,
"f",
doc,
analyzer2);
String fragment = highlighter.getBestFragment(stream, title);
System.out.println(fragment);
assertEquals("the quick brown fox jumps over the lazy dog", fragment);
MatchOffset mo= myFormatter.getMatchList().get(loopIndex++);
assertTrue(mo.getEndPos()==15);
assertTrue(mo.getStartPos()==10);
assertTrue(mo.getToken().equals("brown"));
}