1

我有 SOLR 5.3.1 版本。我想在开始时显示那些具有更多匹配项的文档。

为此,我在模式的每个字段上都应用了 omitNorms=true。我还实现了自定义相似性类。我的相似性类如下所示:

package org.apache.lucene.search.similarities;

import org.apache.lucene.index.FieldInvertState;



 public class MyDefaultSimilarity extends DefaultSimilarity{

    @Override
    public float idf(long docFreq, long numDocs) {
        return 0.5f;
    }

    @Override
    public float lengthNorm(FieldInvertState arg0) {
        return 0.5f;
    }

    @Override
    public float tf(float freq) {
        return 0.5f;
    }

    @Override
    public float coord(int overlap, int maxOverlap) {
        System.out.println("Coord:"+Math.pow(super.coord(overlap, maxOverlap),2));
        return (float)Math.pow(super.coord(overlap, maxOverlap),2);
    }

    }

我在 schema.xml 中对相似性类进行了以下更改

 <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
      <analyzer type="index">
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
        <!-- in this example, we will only use synonyms at query time
        <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
        -->
        <filter class="solr.LowerCaseFilterFactory"/>
      </analyzer>
      <analyzer type="query">
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
        <filter class="solr.LowerCaseFilterFactory"/>
      </analyzer>

      //define custom similarity class there
      <similarity class="org.apache.lucene.search.similarities.MyDefaultSimilarity"> </similarity>
    </fieldType>

      //define global similarity class there
<similarity class="solr.SchemaSimilarityFactory"/>

我在 solrconfig.xml 中对相似类进行了以下更改

<lib dir="${solr.install.dir:../../../..}/dist/" regex="SimilaritySolr.*\.jar" />

我已经调试了查询。它仍然显示每个文档的分数等于 1。这是调试查询结果显示哪个参数影响分数。

1.0 = *:*, product of:
  1.0 = boost
  1.0 = queryNorm

请让我知道我是否遗漏了其他任何东西来提高具有更多匹配项的文档的分数?

4

0 回答 0