我们正在尝试将 Solr 评估为一种搜索解决方案,并在检索分数方面面临问题。我们确实进行了爬网和索引,但所有分数都为 0.0,尽管根据我们的知识,我们没有更改任何与评分相关的配置。
如果有人可以复制并在这里告诉我们原因,那就太好了。会有很大的帮助。
Solr 版本是 4.2
<schema name="nutch" version="1.5">
<types>
<fieldType name="text" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory" />
<filter class="solr.SnowballPorterFilterFactory" language="English" />
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory" />
<filter class="solr.SnowballPorterFilterFactory" language="English" />
</analyzer>
</fieldType>
<fieldType name="string" class="solr.StrField" sortMissingLast="true"
omitNorms="true"/>
<fieldType name="long" class="solr.TrieLongField" precisionStep="0"
omitNorms="true" positionIncrementGap="0"/>
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="float" class="solr.TrieFloatField" precisionStep="0"
omitNorms="true" positionIncrementGap="0"/>
<!-- boolean type: "true" or "false" -->
<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/>
<!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
<fieldtype name="binary" class="solr.BinaryField"/>
<fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="tint" class="solr.TrieIntField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="tdate" class="solr.TrieDateField" omitNorms="true" precisionStep="6" positionIncrementGap="0"/>
<!-- A general unstemmed text field - good if one does not know the language of the field -->
<fieldType name="textgen" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="random" class="solr.RandomSortField" indexed="true" />
<fieldType name="date" class="solr.TrieDateField" precisionStep="0"
omitNorms="true" positionIncrementGap="0"/>
<fieldType name="fullText" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
<charFilter class="solr.MappingCharFilterFactory" mapping="mapping-FoldToASCII.txt"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory" />
<filter class="solr.SnowballPorterFilterFactory" language="English" />
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
<charFilter class="solr.MappingCharFilterFactory" mapping="mapping-FoldToASCII.txt" />
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory" />
<filter class="solr.SnowballPorterFilterFactory" language="English" />
</analyzer>
</fieldType>
<!-- For Spellcheck -->
<fieldType name="textSpell" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
</fieldType>
<!-- lowercases the entire field value, keeping it as a single token. -->
<fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.KeywordTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory" />
</analyzer>
</fieldType>
<fieldType name="url" class="solr.TextField"
positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.WordDelimiterFilterFactory"
generateWordParts="1" generateNumberParts="1"/>
</analyzer>
</fieldType>
</types>
<fields>
<field name="id" type="string" stored="true" indexed="true" required="true" multiValued="false" />
<!-- Spellcheck field -->
<field name="spell" type="textSpell" indexed="true" stored="false" required="false" multiValued="true" />
<!-- For Sorting purpose -->
<field name="titleSort" type="lowercase" indexed="true" stored="false" required="false"/>
<!-- core fields -->
<field name="segment" type="string" stored="true" indexed="false"/>
<field name="digest" type="string" stored="true" indexed="false"/>
<field name="boost" type="float" stored="true" indexed="false"/>
<!-- fields for index-basic plugin -->
<field name="host" type="url" stored="false" indexed="true"/>
<field name="site" type="string" stored="false" indexed="true"/>
<field name="url" type="url" stored="true" indexed="true"/>
<field name="content" type="text" stored="true" indexed="true"/>
<field name="title" type="text" stored="true" indexed="true"/>
<field name="cache" type="string" stored="true" indexed="false"/>
<field name="tstamp" type="date" stored="true" indexed="true"/>
<field name="_version_" type="long" indexed="true" stored="true"/>
<!-- fields for index-anchor plugin -->
<field name="anchor" type="string" stored="true" indexed="true"
multiValued="true"/>
<!-- fields for index-more plugin -->
<field name="type" type="string" stored="true" indexed="true"
multiValued="true"/>
<field name="contentLength" type="long" stored="true"
indexed="false"/>
<field name="lastModified" type="date" stored="true"
indexed="false"/>
<field name="date" type="date" stored="true" indexed="true"/>
<!-- fields for languageidentifier plugin -->
<field name="lang" type="string" stored="true" indexed="true"/>
<!-- KS -->
<field name="metatag.description" type="text" stored="true" indexed="true"/>
<field name="metatag.keywords" type="text" stored="true" indexed="true"/>
<!-- fields for subcollection plugin -->
<field name="subcollection" type="string" stored="true"
indexed="true" multiValued="true"/>
<!-- fields for feed plugin (tag is also used by microformats-reltag)-->
<field name="author" type="string" stored="true" indexed="true"/>
<field name="tag" type="string" stored="true" indexed="true" multiValued="true"/>
<field name="feed" type="string" stored="true" indexed="true"/>
<field name="publishedDate" type="date" stored="true"
indexed="true"/>
<field name="updatedDate" type="date" stored="true"
indexed="true"/>
<!-- fields for creativecommons plugin -->
<field name="cc" type="string" stored="true" indexed="true"
multiValued="true"/>
<!-- This field has been added to implement the default search configuration -->
<field name="text" type="fullText" indexed="true" stored="false" required="false" multiValued="true" />
<!-- For Extract Handler -->
<!-- copyField commands copy one field to another at the time a document
is added to the index. It's used either to index the same field differently,
or to add multiple fields to the same field for easier/faster searching. -->
<copyField source="anchor" dest="text"/>
<copyField source="title" dest="text"/>
<copyField source="content" dest="text"/>
<!-- <copyField source="description" dest="text"/>-->
<!-- <copyField source="keywords" dest="text"/> -->
<!-- For Spell Check -->
<copyField source="anchor" dest="spell"/>
<copyField source="title" dest="spell"/>
<copyField source="content" dest="spell"/>
<!-- For Sorting purpose -->
<copyField source="title" dest="titleSort"/>
<!-- Dynamic field definitions. If a field name is not found, dynamicFields
will be used if the name matches any of the patterns.
RESTRICTION: the glob-like pattern in the name attribute must have
a "*" only at the start or the end.
EXAMPLE: name="*_i" will match any field ending in _i (like myid_i, z_i)
Longer patterns will be matched first. if equal size patterns
both match, the first appearing in the schema will be used. -->
<dynamicField name="*_i" type="int" indexed="true" stored="true"/>
<dynamicField name="*_s" type="string" indexed="true" stored="true"/>
<dynamicField name="*_l" type="long" indexed="true" stored="true"/>
<dynamicField name="*_t" type="text" indexed="true" stored="true"/>
<dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>
<dynamicField name="*_f" type="float" indexed="true" stored="true"/>
<dynamicField name="*_d" type="double" indexed="true" stored="true"/>
<dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
<!-- some trie-coded dynamic fields for faster range queries -->
<dynamicField name="*_ti" type="tint" indexed="true" stored="true"/>
<dynamicField name="*_tl" type="tlong" indexed="true" stored="true"/>
<dynamicField name="*_tf" type="tfloat" indexed="true" stored="true"/>
<dynamicField name="*_td" type="tdouble" indexed="true" stored="true"/>
<dynamicField name="*_tdt" type="tdate" indexed="true" stored="true"/>
<dynamicField name="attr_*" type="textgen" indexed="true" stored="true" multiValued="true"/>
<dynamicField name="random_*" type="random" />
</fields>
<uniqueKey>id</uniqueKey>
<defaultSearchField>text</defaultSearchField>
<solrQueryParser defaultOperator="OR"/>
<similarity class="org.apache.lucene.search.similarities.DefaultSimilarity"/>
</schema>
Query : select?q=Menu&wt=xml&indent=true&debug=true&fl=*,score
This XML file does not appear to have any style information associated with it. The document tree is shown below.
−
<response>
−
<lst name="responseHeader">
<int name="status">0</int>
<int name="QTime">3</int>
−
<lst name="params">
<str name="fl">id,title,score</str>
<str name="indent">true</str>
<str name="q">Menu</str>
<str name="debug">true</str>
<str name="wt">xml</str>
</lst>
</lst>
−
<result name="response" numFound="6" start="0" maxScore="0.0">
−
<doc>
<str name="title">English :: McDonalds.ca</str>
<str name="id">http://www.mcdonalds.ca/</str>
<float name="score">0.0</float>
</doc>
−
<doc>
<str name="title">Corporate Opportunities :: McDonalds.ca</str>
−
<str name="id">
http://www.mcdonalds.ca/ca/en/careers/corp_opp/corporate_opportunities.html
</str>
<float name="score">0.0</float>
</doc>
−
<doc>
<str name="title">People :: McDonalds.ca</str>
−
<str name="id">
http://www.mcdonalds.ca/ca/en/careers/rest_opp/people.html
</str>
<float name="score">0.0</float>
</doc>
−
<doc>
<str name="title">Training, Education and Benefits :: McDonalds.ca</str>
−
<str name="id">
http://www.mcdonalds.ca/ca/en/careers/rest_opp/training_education_and_benefits.html
</str>
<float name="score">0.0</float>
</doc>
−
<doc>
<str name="title">Working Here :: McDonalds.ca</str>
−
<str name="id">
http://www.mcdonalds.ca/ca/en/careers/rest_opp/working_here.html
</str>
<float name="score">0.0</float>
</doc>
−
<doc>
<str name="title">Environment :: McDonalds.ca</str>
−
<str name="id">
http://www.mcdonalds.ca/ca/en/communities/environment.html
</str>
<float name="score">0.0</float>
</doc>
</result>
−
<lst name="spellcheck">
<lst name="suggestions"/>
</lst>
−
<lst name="debug">
−
<lst name="queryBoosting">
<str name="q">menu</str>
<null name="match"/>
</lst>
<str name="rawquerystring">Menu</str>
<str name="querystring">Menu</str>
<str name="parsedquery">text:menu</str>
<str name="parsedquery_toString">text:menu</str>
−
<lst name="explain">
−
<str name="http://www.mcdonalds.ca/">
0.0 = (MATCH) weight(text:menu in 0) [DefaultSimilarity], result of:
0.0 = fieldWeight in 0, product of:
1.4142135 = tf(freq=2.0), with freq of:
2.0 = termFreq=2.0
0.84584934 = idf(docFreq=6, maxDocs=6)
0.0 = fieldNorm(doc=0)
</str>
−
<str name="http://www.mcdonalds.ca/ca/en/careers/corp_opp/corporate_opportunities.html">
0.0 = (MATCH) weight(text:menu in 1) [DefaultSimilarity], result of:
0.0 = fieldWeight in 1, product of:
1.0 = tf(freq=1.0), with freq of:
1.0 = termFreq=1.0
0.84584934 = idf(docFreq=6, maxDocs=6)
0.0 = fieldNorm(doc=1)
</str>
−
<str name="http://www.mcdonalds.ca/ca/en/careers/rest_opp/people.html">
0.0 = (MATCH) weight(text:menu in 2) [DefaultSimilarity], result of:
0.0 = fieldWeight in 2, product of:
1.0 = tf(freq=1.0), with freq of:
1.0 = termFreq=1.0
0.84584934 = idf(docFreq=6, maxDocs=6)
0.0 = fieldNorm(doc=2)
</str>
−
<str name="http://www.mcdonalds.ca/ca/en/careers/rest_opp/training_education_and_benefits.html">
0.0 = (MATCH) weight(text:menu in 3) [DefaultSimilarity], result of:
0.0 = fieldWeight in 3, product of:
1.0 = tf(freq=1.0), with freq of:
1.0 = termFreq=1.0
0.84584934 = idf(docFreq=6, maxDocs=6)
0.0 = fieldNorm(doc=3)
</str>
−
<str name="http://www.mcdonalds.ca/ca/en/careers/rest_opp/working_here.html">
0.0 = (MATCH) weight(text:menu in 4) [DefaultSimilarity], result of:
0.0 = fieldWeight in 4, product of:
1.0 = tf(freq=1.0), with freq of:
1.0 = termFreq=1.0
0.84584934 = idf(docFreq=6, maxDocs=6)
0.0 = fieldNorm(doc=4)
</str>
−
<str name="http://www.mcdonalds.ca/ca/en/communities/environment.html">
0.0 = (MATCH) weight(text:menu in 5) [DefaultSimilarity], result of:
0.0 = fieldWeight in 5, product of:
1.0 = tf(freq=1.0), with freq of:
1.0 = termFreq=1.0
0.84584934 = idf(docFreq=6, maxDocs=6)
0.0 = fieldNorm(doc=5)
</str>
</lst>
<str name="QParser">LuceneQParser</str>
−
<lst name="timing">
<double name="time">3.0</double>
−
<lst name="prepare">
<double name="time">1.0</double>
−
<lst name="query">
<double name="time">0.0</double>
</lst>
−
<lst name="facet">
<double name="time">0.0</double>
</lst>
−
<lst name="mlt">
<double name="time">0.0</double>
</lst>
−
<lst name="highlight">
<double name="time">0.0</double>
</lst>
−
<lst name="stats">
<double name="time">0.0</double>
</lst>
−
<lst name="spellcheck">
<double name="time">0.0</double>
</lst>
−
<lst name="elevator">
<double name="time">1.0</double>
</lst>
−
<lst name="debug">
<double name="time">0.0</double>
</lst>
</lst>
−
<lst name="process">
<double name="time">2.0</double>
−
<lst name="query">
<double name="time">0.0</double>
</lst>
−
<lst name="facet">
<double name="time">0.0</double>
</lst>
−
<lst name="mlt">
<double name="time">0.0</double>
</lst>
−
<lst name="highlight">
<double name="time">0.0</double>
</lst>
−
<lst name="stats">
<double name="time">0.0</double>
</lst>
−
<lst name="spellcheck">
<double name="time">0.0</double>
</lst>
−
<lst name="elevator">
<double name="time">0.0</double>
</lst>
−
<lst name="debug">
<double name="time">2.0</double>
</lst>
</lst>
</lst>
</lst>
</response>
我的过滤器中有 www.mcdonalds.ca 域配置。由于这被重定向到 www.mcdonalds.ca.ca/en.html,因此任何 URL 都没有得分。
我修改了顶部 URL,而不是 www.mcdonalds.ca,我确实提到了 www.mcdonalds.ca/ca/en.html 并且它有效。
请通过以下网址了解更多详情 -