0
import java.io.File;
import java.io.FileOutputStream;

import java.io.StringReader;

import org.apache.lucene.analysis.TokenStream;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.util.Version;

public class myclass {
    public static void main(String[] args) {
        FileOutputStream file = null;
        String result = "<html><body><div>(i) the recognised association shall have the approval of the Forward  Markets  Commission established under the Forward  Contracts (Regulation) Act, 1952 (74 of 1952) in respect of trading in derivatives and shall function in accordance with the guidelines or conditions laid down by the Forward  Markets  Commission; </div>  <body> </html>";

        try {

            TermQuery query = new TermQuery(new Term("f", "Forward  Markets"));
            QueryScorer scorer = new QueryScorer(query);
            SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(
                    "<span class=\"highlight\">", "</span>");
            org.apache.lucene.search.highlight.Highlighter highlighter = new org.apache.lucene.search.highlight.Highlighter(
                    formatter, scorer);
            Fragmenter fragmenter = new SimpleFragmenter(result.length());
            highlighter.setTextFragmenter(fragmenter);
            TokenStream tokenStream = new StandardAnalyzer(Version.LUCENE_29)
                    .tokenStream("f",

                    new StringReader(result));
            String result1 = highlighter.getBestFragments(tokenStream, result,
                    1, "...");
            if (result1.length() == 0) {
                result1 = result;
            }
            String finalhtml = "<html>" + "<style>\n" + ".highlight{\n"
                    + " background: yellow;\n" + "}\n" + "</style>" + "<body>"
                    + result1 + "</body></html>";

            byte[] contentInBytes = finalhtml.getBytes();
            file = new FileOutputStream(new File("E:\\myhtml.html"));
            file.write(contentInBytes);
            file.flush();
            file.close();
        } catch (Exception ex) {

        }
    }
}

这是我的代码,当我设置文本“已识别”或“关联”其突出显示完美但是当我设置 text="Forward Markets" 它的 noe 突出显示请告诉我在哪里做错请帮助我如何突出显示文本,如果空间会来。

4

1 回答 1

0

您的问题是recognised并且association是术语,但forward markets不是术语。它是两个术语,forwardmarkets,应该按顺序找到。这通常被称为“短语”。 PhraseQuery可以用来找到它,但通常更简单的方法是使用查询解析器,例如:

StandardQueryParser parser = new StandardQueryParser(analyzer);
Query query = parse.parse("f:\"Forward Markets\"")
Query Scorer scorer = new QueryScorer(query);
//.......

如果您决定手动构建您的PhraseQuery.,请仔细阅读文档。您必须将每个 Term 单独添加到查询中,例如:

phrasequery.add(new Term("f", "forward"));
phrasequery.add(new Term("f", "markets"));

同样,在手动构建查询时,请注意您的分析器。您上面的查询包括大写。TermQuery手动传递给 a 的术语不会被分析。如果您使用带有LowercaseFilter(StandardAnalyzer例如) 的分析器,这将导致您找不到任何结果。

于 2013-08-07T18:56:09.633 回答