3

在向文档添加字段时,我编写了以下代码来测试 SetBoost 方法。

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using Lucene;
using Lucene.Net;
using Lucene.Net.Analysis;
using Lucene.Net.Analysis.Standard;
using Lucene.Net.Documents;
using Lucene.Net.Index;
using Lucene.Net.QueryParsers;
using Lucene.Net.Search;
using Lucene.Net.Store;
using Directory = Lucene.Net.Store.Directory;
using Version = Lucene.Net.Util.Version;

namespace LuceneTest
{
    public class LuceneTest
    {
        static void Main(string[] args)
        {
            var product1 = new Document();
            product1.Add(new Field("Id", "1", Field.Store.YES, Field.Index.NOT_ANALYZED));
            var title1 = new Field("title", "Special One", Field.Store.YES, Field.Index.ANALYZED);
            title1.SetBoost(2f);
            product1.Add(title1);
            product1.Add(new Field("synopsis", "special synopsis", Field.Store.YES, Field.Index.ANALYZED));

            var product2 = new Document();
            product2.Add(new Field("Id", "2", Field.Store.YES, Field.Index.NOT_ANALYZED));
            var title2 = new Field("title", "Special Two", Field.Store.YES, Field.Index.ANALYZED);
            title2.SetBoost(2f);
            product2.Add(title2);
            product2.Add(new Field("synopsis", "special synopsis", Field.Store.YES, Field.Index.ANALYZED));

            var product3 = new Document();
            product3.Add(new Field("Id", "3", Field.Store.YES, Field.Index.NOT_ANALYZED));
            var title3 = new Field("title", "Normal One", Field.Store.YES, Field.Index.ANALYZED);
            title3.SetBoost(2f);
            product3.Add(title3);
            product3.Add(new Field("synopsis", "special synopsis", Field.Store.YES, Field.Index.ANALYZED));

            var product4 = new Document();
            product4.Add(new Field("Id", "4", Field.Store.YES, Field.Index.NOT_ANALYZED));
            var title4 = new Field("title", "Normal Two", Field.Store.YES, Field.Index.ANALYZED);
            title4.SetBoost(2f);
            product4.Add(title4);
            product4.Add(new Field("synopsis", "special synopsis", Field.Store.YES, Field.Index.ANALYZED));

            var product5 = new Document();
            product5.Add(new Field("Id", "5", Field.Store.YES, Field.Index.NOT_ANALYZED));
            var title5 = new Field("title", "Special Three", Field.Store.YES, Field.Index.ANALYZED);
            title5.SetBoost(2f);
            product5.Add(title5);
            product5.Add(new Field("synopsis", "normal synopsis", Field.Store.YES, Field.Index.ANALYZED));

            Directory directory = FSDirectory.Open(new DirectoryInfo(Environment.CurrentDirectory + "\\Lucene"));
            Analyzer analyzer = new StandardAnalyzer();
            var writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);

            writer.AddDocument(product1);
            writer.AddDocument(product2);
            writer.AddDocument(product3);
            writer.AddDocument(product4);
            writer.AddDocument(product5);
            writer.Optimize();
            writer.Close();

            Console.WriteLine("searching...");
            var indexReader = IndexReader.Open(directory, true);
            var indexSearcher = new IndexSearcher(indexReader);

            var booleanQuery1 = new BooleanQuery();
            booleanQuery1.Add(new BooleanClause(new PrefixQuery(new Term("title", "special")), BooleanClause.Occur.SHOULD));
            booleanQuery1.Add(new BooleanClause(new PrefixQuery(new Term("synopsis", "special")), BooleanClause.Occur.SHOULD));

            var booleanQuery2 = new BooleanQuery();
            booleanQuery2.Add(new BooleanClause((Query)booleanQuery1, BooleanClause.Occur.MUST));
            TopDocs results = indexSearcher.Search(booleanQuery2, (Filter)null, 200);
            var hits = results.ScoreDocs;

            foreach (var hit in hits)
            {
                var document = indexSearcher.Doc(hit.doc);
                Console.WriteLine(document.Get("Id") + " " + document.Get("title") + " " + hit.score);
            }
            Console.WriteLine("done...");
            Console.ReadLine();
        }

    }   

}

我正在使用 Lucene 版本 2.9.4.1。我在标题字段上设置了提升。当我在标题和概要字段中搜索术语“特殊”时,我希望产品 1、2 和 5 位于顶部,但我得到以下结果:

searching...
1 Special One 1.414214
2 Special Two 1.414214
3 Normal One 0.3535534
4 Normal Two 0.3535534
5 Special Three 0.3535534
done...

产品 5 与产品 3 和 4 的得分相同,尽管它的标题中有“特殊”一词,只是没有概要。

任何帮助或想法将不胜感激。谢谢

4

2 回答 2

4

我相信问题在于您正在使用 PrefixQueries。前缀查询被重写为恒定评分查询。您可以自己设置重写方法,例如:

PrefixQuery pquery = new PrefixQuery(new Term("title", "special"));
pquery.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);

或者您可以尝试只使用 TermQuery 而不是 PrefixQuery。无论哪种方式,您都应该看到场级提升生效。

哦,请注意,如果您想了解为什么结果会按原样进行评分,您应该查看Searcher.explain。评分变得复杂,这是理解和调整它的非常方便的工具。

于 2012-10-08T21:01:48.183 回答
0

在 Lucene In Action, second edition, page 49, section 2.5.2 Boosting fields “但请记住,当您想要更改字段或文档的提升时,您必须完全删除然后阅读整个文档,或者使用 updateDocument方法,它做同样的事情”。

当您使用相同的索引文件进行测试时,我认为您需要在打开索引之前调用 writer.updateDocument。

于 2012-10-08T20:59:07.103 回答