1

我们正在尝试将 Solr 评估为一种搜索解决方案,并在检索分数方面面临问题。我们确实进行了爬网和索引,但所有分数都为 0.0,尽管根据我们的知识,我们没有更改任何与评分相关的配置。

如果有人可以复制并在这里告诉我们原因,那就太好了。会有很大的帮助。

Solr 版本是 4.2

4

3 回答 3

0
    <schema name="nutch" version="1.5">
    <types>
    <fieldType name="text" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
      <analyzer type="index">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />        
        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>        
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
        <filter class="solr.RemoveDuplicatesTokenFilterFactory" />
        <filter class="solr.SnowballPorterFilterFactory" language="English" />
      </analyzer>
      <analyzer type="query">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>       
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>        
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
        <filter class="solr.RemoveDuplicatesTokenFilterFactory" />
        <filter class="solr.SnowballPorterFilterFactory" language="English" />
      </analyzer>
    </fieldType>
        <fieldType name="string" class="solr.StrField" sortMissingLast="true"
            omitNorms="true"/> 
        <fieldType name="long" class="solr.TrieLongField" precisionStep="0"
            omitNorms="true" positionIncrementGap="0"/>
    <fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
        <fieldType name="float" class="solr.TrieFloatField" precisionStep="0"
            omitNorms="true" positionIncrementGap="0"/>
     <!-- boolean type: "true" or "false" -->
    <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/>
    <!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
    <fieldtype name="binary" class="solr.BinaryField"/>
    <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
    <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
    <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
    <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
    <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
    <fieldType name="tdate" class="solr.TrieDateField" omitNorms="true" precisionStep="6" positionIncrementGap="0"/>

    <!-- A general unstemmed text field - good if one does not know the language of the field -->
    <fieldType name="textgen" class="solr.TextField" positionIncrementGap="100">
      <analyzer type="index">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
        <filter class="solr.LowerCaseFilterFactory"/>
      </analyzer>
      <analyzer type="query">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>        
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
        <filter class="solr.LowerCaseFilterFactory"/>
      </analyzer>
    </fieldType>


    <fieldType name="random" class="solr.RandomSortField" indexed="true" />   

        <fieldType name="date" class="solr.TrieDateField" precisionStep="0"
            omitNorms="true" positionIncrementGap="0"/>
<fieldType name="fullText" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
      <analyzer type="index">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />        
        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
        <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-FoldToASCII.txt"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
        <filter class="solr.RemoveDuplicatesTokenFilterFactory" />
        <filter class="solr.SnowballPorterFilterFactory" language="English" />
      </analyzer>
      <analyzer type="query">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>        
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
        <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-FoldToASCII.txt" />
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
        <filter class="solr.RemoveDuplicatesTokenFilterFactory" />
        <filter class="solr.SnowballPorterFilterFactory" language="English" />
      </analyzer>
    </fieldType>

  <!-- For Spellcheck -->
    <fieldType name="textSpell" class="solr.TextField" positionIncrementGap="100">
      <analyzer type="index">
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
      </analyzer>
      <analyzer type="query">
        <tokenizer class="solr.StandardTokenizerFactory"/>        
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />  
        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
      </analyzer>
    </fieldType>  

   <!-- lowercases the entire field value, keeping it as a single token.  -->
    <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
      <analyzer>
        <tokenizer class="solr.KeywordTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory" />
      </analyzer>
    </fieldType>

   <fieldType name="url" class="solr.TextField"
            positionIncrementGap="100">
            <analyzer>
                <tokenizer class="solr.StandardTokenizerFactory"/>
                <filter class="solr.LowerCaseFilterFactory"/>
                <filter class="solr.WordDelimiterFilterFactory"
                    generateWordParts="1" generateNumberParts="1"/>
            </analyzer>
        </fieldType>
    </types>
    <fields>
        <field name="id" type="string" stored="true" indexed="true" required="true" multiValued="false" />
    <!-- Spellcheck field -->
        <field name="spell" type="textSpell" indexed="true" stored="false" required="false" multiValued="true" />
    <!-- For Sorting purpose -->
        <field name="titleSort" type="lowercase" indexed="true" stored="false" required="false"/>
        <!-- core fields -->
        <field name="segment" type="string" stored="true" indexed="false"/>
        <field name="digest" type="string" stored="true" indexed="false"/>
        <field name="boost" type="float" stored="true" indexed="false"/>

        <!-- fields for index-basic plugin -->
        <field name="host" type="url" stored="false" indexed="true"/>
        <field name="site" type="string" stored="false" indexed="true"/>
        <field name="url" type="url" stored="true" indexed="true"/>
        <field name="content" type="text" stored="true" indexed="true"/>
        <field name="title" type="text" stored="true" indexed="true"/>
        <field name="cache" type="string" stored="true" indexed="false"/>
        <field name="tstamp" type="date" stored="true" indexed="true"/>
    <field name="_version_" type="long" indexed="true" stored="true"/>
        <!-- fields for index-anchor plugin -->
        <field name="anchor" type="string" stored="true" indexed="true"
            multiValued="true"/>

        <!-- fields for index-more plugin -->
        <field name="type" type="string" stored="true" indexed="true"
            multiValued="true"/>
        <field name="contentLength" type="long" stored="true"
            indexed="false"/>
        <field name="lastModified" type="date" stored="true"
            indexed="false"/>
        <field name="date" type="date" stored="true" indexed="true"/>

        <!-- fields for languageidentifier plugin -->
        <field name="lang" type="string" stored="true" indexed="true"/>
<!-- KS -->
<field name="metatag.description" type="text" stored="true" indexed="true"/>
<field name="metatag.keywords" type="text" stored="true" indexed="true"/>

        <!-- fields for subcollection plugin -->
        <field name="subcollection" type="string" stored="true"
            indexed="true" multiValued="true"/>

        <!-- fields for feed plugin (tag is also used by microformats-reltag)-->
        <field name="author" type="string" stored="true" indexed="true"/>
        <field name="tag" type="string" stored="true" indexed="true" multiValued="true"/>
        <field name="feed" type="string" stored="true" indexed="true"/>
        <field name="publishedDate" type="date" stored="true"
            indexed="true"/>
        <field name="updatedDate" type="date" stored="true"
            indexed="true"/>
        <!-- fields for creativecommons plugin -->
        <field name="cc" type="string" stored="true" indexed="true"
            multiValued="true"/>

<!-- This field has been added to implement the default search configuration -->
        <field name="text" type="fullText" indexed="true" stored="false" required="false" multiValued="true" /> 

<!-- For Extract Handler -->

        <!-- copyField commands copy one field to another at the time a document
        is added to the index.  It's used either to index the same field differently,
        or to add multiple fields to the same field for easier/faster searching.  -->
                <copyField source="anchor" dest="text"/>
                <copyField source="title" dest="text"/>
                <copyField source="content" dest="text"/>
<!--                <copyField source="description" dest="text"/>-->
<!--                <copyField source="keywords" dest="text"/> -->


       <!-- For Spell Check -->
        <copyField source="anchor" dest="spell"/>
        <copyField source="title" dest="spell"/>
        <copyField source="content" dest="spell"/>
        <!-- For Sorting purpose -->
        <copyField source="title" dest="titleSort"/>

 <!-- Dynamic field definitions.  If a field name is not found, dynamicFields
        will be used if the name matches any of the patterns.
        RESTRICTION: the glob-like pattern in the name attribute must have
        a "*" only at the start or the end.
        EXAMPLE:  name="*_i" will match any field ending in _i (like myid_i, z_i)
        Longer patterns will be matched first.  if equal size patterns
        both match, the first appearing in the schema will be used.  -->
   <dynamicField name="*_i"  type="int"    indexed="true"  stored="true"/>
   <dynamicField name="*_s"  type="string"  indexed="true"  stored="true"/>
   <dynamicField name="*_l"  type="long"   indexed="true"  stored="true"/>
   <dynamicField name="*_t"  type="text"    indexed="true"  stored="true"/>
   <dynamicField name="*_b"  type="boolean" indexed="true"  stored="true"/>
   <dynamicField name="*_f"  type="float"  indexed="true"  stored="true"/>
   <dynamicField name="*_d"  type="double" indexed="true"  stored="true"/>
   <dynamicField name="*_dt" type="date"    indexed="true"  stored="true"/>

   <!-- some trie-coded dynamic fields for faster range queries -->
   <dynamicField name="*_ti" type="tint"    indexed="true"  stored="true"/>
   <dynamicField name="*_tl" type="tlong"   indexed="true"  stored="true"/>
   <dynamicField name="*_tf" type="tfloat"  indexed="true"  stored="true"/>
   <dynamicField name="*_td" type="tdouble" indexed="true"  stored="true"/>
   <dynamicField name="*_tdt" type="tdate"  indexed="true"  stored="true"/>
   <dynamicField name="attr_*" type="textgen" indexed="true" stored="true" multiValued="true"/>

   <dynamicField name="random_*" type="random" />



    </fields>
    <uniqueKey>id</uniqueKey>
    <defaultSearchField>text</defaultSearchField>
    <solrQueryParser defaultOperator="OR"/>
    <similarity class="org.apache.lucene.search.similarities.DefaultSimilarity"/>
    </schema>
于 2013-04-18T08:48:34.350 回答
0
Query : select?q=Menu&wt=xml&indent=true&debug=true&fl=*,score

This XML file does not appear to have any style information associated with it. The document tree is shown below.

−
<response>
−
<lst name="responseHeader">
<int name="status">0</int>
<int name="QTime">3</int>
−
<lst name="params">
<str name="fl">id,title,score</str>
<str name="indent">true</str>
<str name="q">Menu</str>
<str name="debug">true</str>
<str name="wt">xml</str>
</lst>
</lst>
−
<result name="response" numFound="6" start="0" maxScore="0.0">
−
<doc>
<str name="title">English :: McDonalds.ca</str>
<str name="id">http://www.mcdonalds.ca/</str>
<float name="score">0.0</float>
</doc>
−
<doc>
<str name="title">Corporate Opportunities :: McDonalds.ca</str>
−
<str name="id">
http://www.mcdonalds.ca/ca/en/careers/corp_opp/corporate_opportunities.html
</str>
<float name="score">0.0</float>
</doc>
−
<doc>
<str name="title">People :: McDonalds.ca</str>
−
<str name="id">
http://www.mcdonalds.ca/ca/en/careers/rest_opp/people.html
</str>
<float name="score">0.0</float>
</doc>
−
<doc>
<str name="title">Training, Education and Benefits :: McDonalds.ca</str>
−
<str name="id">
http://www.mcdonalds.ca/ca/en/careers/rest_opp/training_education_and_benefits.html
</str>
<float name="score">0.0</float>
</doc>
−
<doc>
<str name="title">Working Here :: McDonalds.ca</str>
−
<str name="id">
http://www.mcdonalds.ca/ca/en/careers/rest_opp/working_here.html
</str>
<float name="score">0.0</float>
</doc>
−
<doc>
<str name="title">Environment :: McDonalds.ca</str>
−
<str name="id">
http://www.mcdonalds.ca/ca/en/communities/environment.html
</str>
<float name="score">0.0</float>
</doc>
</result>
−
<lst name="spellcheck">
<lst name="suggestions"/>
</lst>
−
<lst name="debug">
−
<lst name="queryBoosting">
<str name="q">menu</str>
<null name="match"/>
</lst>
<str name="rawquerystring">Menu</str>
<str name="querystring">Menu</str>
<str name="parsedquery">text:menu</str>
<str name="parsedquery_toString">text:menu</str>
−
<lst name="explain">
−
<str name="http://www.mcdonalds.ca/">

0.0 = (MATCH) weight(text:menu in 0) [DefaultSimilarity], result of:
  0.0 = fieldWeight in 0, product of:
    1.4142135 = tf(freq=2.0), with freq of:
      2.0 = termFreq=2.0
    0.84584934 = idf(docFreq=6, maxDocs=6)
    0.0 = fieldNorm(doc=0)
</str>
−
<str name="http://www.mcdonalds.ca/ca/en/careers/corp_opp/corporate_opportunities.html">

0.0 = (MATCH) weight(text:menu in 1) [DefaultSimilarity], result of:
  0.0 = fieldWeight in 1, product of:
    1.0 = tf(freq=1.0), with freq of:
      1.0 = termFreq=1.0
    0.84584934 = idf(docFreq=6, maxDocs=6)
    0.0 = fieldNorm(doc=1)
</str>
−
<str name="http://www.mcdonalds.ca/ca/en/careers/rest_opp/people.html">

0.0 = (MATCH) weight(text:menu in 2) [DefaultSimilarity], result of:
  0.0 = fieldWeight in 2, product of:
    1.0 = tf(freq=1.0), with freq of:
      1.0 = termFreq=1.0
    0.84584934 = idf(docFreq=6, maxDocs=6)
    0.0 = fieldNorm(doc=2)
</str>
−
<str name="http://www.mcdonalds.ca/ca/en/careers/rest_opp/training_education_and_benefits.html">

0.0 = (MATCH) weight(text:menu in 3) [DefaultSimilarity], result of:
  0.0 = fieldWeight in 3, product of:
    1.0 = tf(freq=1.0), with freq of:
      1.0 = termFreq=1.0
    0.84584934 = idf(docFreq=6, maxDocs=6)
    0.0 = fieldNorm(doc=3)
</str>
−
<str name="http://www.mcdonalds.ca/ca/en/careers/rest_opp/working_here.html">

0.0 = (MATCH) weight(text:menu in 4) [DefaultSimilarity], result of:
  0.0 = fieldWeight in 4, product of:
    1.0 = tf(freq=1.0), with freq of:
      1.0 = termFreq=1.0
    0.84584934 = idf(docFreq=6, maxDocs=6)
    0.0 = fieldNorm(doc=4)
</str>
−
<str name="http://www.mcdonalds.ca/ca/en/communities/environment.html">

0.0 = (MATCH) weight(text:menu in 5) [DefaultSimilarity], result of:
  0.0 = fieldWeight in 5, product of:
    1.0 = tf(freq=1.0), with freq of:
      1.0 = termFreq=1.0
    0.84584934 = idf(docFreq=6, maxDocs=6)
    0.0 = fieldNorm(doc=5)
</str>
</lst>
<str name="QParser">LuceneQParser</str>
−
<lst name="timing">
<double name="time">3.0</double>
−
<lst name="prepare">
<double name="time">1.0</double>
−
<lst name="query">
<double name="time">0.0</double>
</lst>
−
<lst name="facet">
<double name="time">0.0</double>
</lst>
−
<lst name="mlt">
<double name="time">0.0</double>
</lst>
−
<lst name="highlight">
<double name="time">0.0</double>
</lst>
−
<lst name="stats">
<double name="time">0.0</double>
</lst>
−
<lst name="spellcheck">
<double name="time">0.0</double>
</lst>
−
<lst name="elevator">
<double name="time">1.0</double>
</lst>
−
<lst name="debug">
<double name="time">0.0</double>
</lst>
</lst>
−
<lst name="process">
<double name="time">2.0</double>
−
<lst name="query">
<double name="time">0.0</double>
</lst>
−
<lst name="facet">
<double name="time">0.0</double>
</lst>
−
<lst name="mlt">
<double name="time">0.0</double>
</lst>
−
<lst name="highlight">
<double name="time">0.0</double>
</lst>
−
<lst name="stats">
<double name="time">0.0</double>
</lst>
−
<lst name="spellcheck">
<double name="time">0.0</double>
</lst>
−
<lst name="elevator">
<double name="time">0.0</double>
</lst>
−
<lst name="debug">
<double name="time">2.0</double>
</lst>
</lst>
</lst>
</lst>
</response>
于 2013-04-18T08:57:38.253 回答
0

我的过滤器中有 www.mcdonalds.ca 域配置。由于这被重定向到 www.mcdonalds.ca.ca/en.html,因此任何 URL 都没有得分。

我修改了顶部 URL,而不是 www.mcdonalds.ca,我确实提到了 www.mcdonalds.ca/ca/en.html 并且它有效。

请通过以下网址了解更多详情 -

http://grokbase.com/t/nutch/user/117cpvp70q/a-possible-solution-to-my-url-redirection-and-zero-scores-problem

于 2013-04-19T07:24:50.123 回答