我是 Solr 的新手。我正在使用 spring 3.x 和 Solr 4.6
下面是我的Schema.xml
<schema name="customer_site_address" version="1.5">
<fields>
<field name="id" type="long" indexed="true" stored="true" required="true" multiValued="false" />
<field name="name" type="string" indexed="false" stored="true" required="true" multiValued="false"/>
<field name="number" type="string" indexed="false" stored="true" required="true" multiValued="false"/>
<field name="address" type="text_general" indexed="false" stored="true" required="true" multiValued="false"/>
<field name="city" type="string" indexed="false" stored="true" multiValued="false"/>
<field name="state" type="string" indexed="false" stored="true" multiValued="false"/>
<field name="zipcode" type="string" indexed="false" stored="true" multiValued="false"/>
<field name="country" type="string" indexed="false" stored="true" multiValued="false"/>
<field name="latlng" type="string" indexed="false" stored="true" multiValued="false"/>
<field name="text" type="text_general" indexed="true" stored="false" multiValued="true"/>
<field name="_version_" type="long" indexed="true" stored="true"/>
</fields>
<!-- Configure unique key -->
<uniqueKey>id</uniqueKey>
<copyField source="name" dest="text"/>
<copyField source="number" dest="text"/>
<copyField source="address" dest="text"/>
<copyField source="city" dest="text"/>
<copyField source="state" dest="text"/>
<copyField source="zipcode" dest="text"/>
<copyField source="country" dest="text"/>
<types>
<!-- Long -->
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0" />
<!-- String -->
<fieldType name="string" class="solr.StrField" sortMissingLast="true" />
<!-- Text -->
<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
<!-- Configures the analysis done at the index phase -->
<analyzer type="index">
<!-- Uses word break rules of the Unicode Text Segmentation algorith
when splitting text into words. -->
<tokenizer class="solr.StandardTokenizerFactory" />
<!-- <tokenizer class="solr.KeywordTokenizerFactory"/> -->
<!-- <tokenizer class="solr.WhitespaceTokenizerFactory"/> -->
<!-- Removes words found from stopwords.txt file. This filter is case
insensitive. -->
<filter class="solr.StopFilterFactory" ignoreCase="true"
words="stopwords.txt" enablePositionIncrements="true" />
<filter class="solr.PatternReplaceFilterFactory" pattern="'" replacement="" replace="all" />
<filter class="solr.WordDelimiterFilterFactory"
generateWordParts="1"
generateNumberParts="1"
catenateWords="1"
catenateNumbers="1"
catenateAll="0"
splitOnCaseChange="1"
/>
<!-- Transforms text to lower case -->
<filter class="solr.LowerCaseFilterFactory" />
<filter class="solr.EdgeNGramFilterFactory" minGramSize="1" maxGramSize="15" side="front"/>
</analyzer>
<!-- Configures the analysis done at the query time -->
<analyzer type="query">
<!-- Uses word break rules of the Unicode Text Segmentation algorith
when splitting text into words. -->
<tokenizer class="solr.StandardTokenizerFactory" />
<!-- <tokenizer class="solr.KeywordTokenizerFactory"/> -->
<!-- <tokenizer class="solr.WhitespaceTokenizerFactory"/> -->
<!-- Removes words found from stopwords.txt file. This filter is case
insensitive. -->
<filter class="solr.StopFilterFactory" ignoreCase="true"
words="stopwords.txt" enablePositionIncrements="true" />
<!-- Applies synonyms found from the synonyms.txt file. -->
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt"
ignoreCase="true" expand="true" />
<!-- Transforms text to lower case -->
<filter class="solr.LowerCaseFilterFactory" />
<filter class="solr.EdgeNGramFilterFactory" minGramSize="1" maxGramSize="15" side="front"/>
</analyzer>
</fieldType>
</types>
下面是SearchComponent
在solrconfig.xml
<searchComponent name="spellcheck" class="solr.SpellCheckComponent">
<str name="queryAnalyzerFieldType">text_general</str>
<lst name="spellchecker">
<str name="name">default</str>
<str name="field">text</str>
<str name="classname">solr.DirectSolrSpellChecker</str>
<str name="distanceMeasure">internal</str>
<float name="accuracy">0.5</float>
<int name="maxEdits">2</int>
<int name="minPrefix">1</int>
<int name="maxInspections">5</int>
<int name="minQueryLength">4</int>
<float name="maxQueryFrequency">0.01</float>
</lst>
<!-- a spellchecker that can break or combine words. See "/spell" handler below for usage -->
<lst name="spellchecker">
<str name="name">wordbreak</str>
<str name="classname">solr.WordBreakSolrSpellChecker</str>
<str name="field">text</str>
<str name="combineWords">true</str>
<str name="breakWords">true</str>
<int name="maxChanges">10</int>
</lst>
下面是RequestHandler
在solrconfig.xml
<requestHandler name="/spell" class="solr.SearchHandler" startup="lazy">
<lst name="defaults">
<str name="df">text</str>
<str name="spellcheck.dictionary">default</str>
<str name="spellcheck.dictionary">wordbreak</str>
<str name="spellcheck">on</str>
<str name="spellcheck.extendedResults">true</str>
<str name="spellcheck.count">10</str>
<str name="spellcheck.alternativeTermCount">5</str>
<str name="spellcheck.maxResultsForSuggest">5</str>
<str name="spellcheck.collate">true</str>
<str name="spellcheck.collateExtendedResults">true</str>
<str name="spellcheck.maxCollationTries">10</str>
<str name="spellcheck.maxCollations">5</str>
</lst>
<arr name="last-components">
<str>spellcheck</str>
</arr>
现在,当我调用/spell
一个frgo
生成以下查询字符串的城市时。
qt=/spell&spellcheck.q=frgo&spellcheck=true&mm=100%
我得到以下替代方案:(当前结果)
[f r, f r g, fargo, f r g o, farg]
它应该给我:(预期)
[fargo]
wset frgo
当我输入时生成以下查询字符串的另一种情况
qt=/spell&spellcheck.q=wset+frgo&spellcheck=true&mm=100%
我得到以下替代方案:(当前结果)
[w s, w s e, west, w s e t, wert, f r, f r g, fargo, f r g o, farg]
它应该给我:(预期)
[West, West Fargo]
我确实应用了通过谷歌找到的一些解决方案,但我认为我在配置时犯了错误。我也尝试过使用solr.KeywordTokenizerFactory
and solr.WhitespaceTokenizerFactory
forindex
和query
analyzer。
请指导我。
编辑:
我删除了下面的过滤器,它在某些时候起作用。
<filter class="solr.EdgeNGramFilterFactory" minGramSize="1" maxGramSize="15" side="front"/>
现在,如果我输入wset frgo
生成下面的查询字符串
qt=/spell&spellcheck.q=wset+frgo&spellcheck=true
我得到以下替代方案:(当前结果)
[west, fargo, farg]
它应该给我:(预期)
[west, fargo, west fargo]
仍然West Fargo
没有返回。
请指导我
谢谢