我正在尝试将termFreq()
函数与短语一起使用,例如
termfreq(field, "cool phrase")
我正在使用shingleFilter
它可以对单词进行分组并像单个术语一样查找结果。当我这样使用它时:
termfreq(field,phrase)
“短语”一词将由为该字段注入的所有过滤器处理,例如
stemming
lowercase
stopwords
- 等等
当我使用短语时,除非我指定确切的术语,否则它不起作用。一个例子是这样的:
termFreq(field, "cool phrase") -> x
termFreq(field, "cooL PHRASE") -> y
那是一个错误吗?
编辑:
我的 schema.xml
<?xml version="1.0" encoding="UTF-8" ?>
<schema name="acordaoDocumentSchema" version="1.5">
<fields>
<field name="_version_" type="string" indexed="true" stored="true" multiValued="false" />
<field name="chave" type="string" stored="true" indexed="true" />
<field name="cdAcordao" type="string" stored="true" indexed="true" />
<field name="nuRegistro" type="texto_indexado" indexed="true" stored="true" />
<field name="deInteiroTeor" type="texto_indexado" indexed="true" stored="false" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
<field name="deEmenta" type="texto_indexado" stored="true" indexed="true" />
<field name="deEmentaParaFacetas" type="texto_para_facetas" stored="false" indexed="true" />
<field name="dtJulgamento" type="date" stored="true" indexed="true" />
<field name="dtRegistro" type="date" stored="true" indexed="true" />
<field name="dtAtualizacao" type="date" stored="true" />
<field name="flJurisprudencia" type="string" stored="false" indexed="true" />
<field name="flSegredoJustica" type="string" stored="false" indexed="true" />
<field name="flMostraInternet" type="string" stored="false" indexed="true" />
<field name="flAtivo" type="string" stored="false" indexed="true" />
<field name="flTpDecisao" type="string" stored="false" indexed="true" />
<field name="cdAgente" type="string" stored="true" indexed="true" />
<field name="cdAgenteForo" type="string" stored="true" indexed="true" />
<field name="cdJuizProlator" type="string" stored="true" indexed="true" />
<field name="cdComarca" type="string" stored="true" indexed="true" />
<field name="cdOrgaoJulgador" type="string" stored="true" indexed="true" />
<field name="cdForo" type="string" stored="true" indexed="true" />
<field name="cdVara" type="string" stored="true" indexed="true" />
<field name="cdClasse" type="string" stored="true" indexed="true" />
<field name="cdAssuntoPrinc" type="string" stored="true" indexed="true" />
<field name="nuProcOrigem" type="texto_numero_processo" indexed="true" stored="true" />
<field name="nuProcesso" type="texto_numero_processo" stored="true" indexed="true" />
</fields>
<uniqueKey>chave</uniqueKey>
<copyField source="deEmenta" dest="deEmentaParaFacetas" />
<copyField source="nuRegistro" dest="deInteiroTeor" />
<copyField source="nuProcOrigem" dest="deInteiroTeor" />
<copyField source="nuProcesso" dest="deInteiroTeor" />
<types>
<fieldType name="texto_indexado" class="solr.TextField" omitNorms="false">
<analyzer type="index">
<charFilter class="solr.PatternReplaceCharFilterFactory" pattern="(\r)" replacement=" "/>
<charFilter class="solr.PatternReplaceCharFilterFactory" pattern="(\r\n)" replacement=" "/>
<charFilter class="solr.PatternReplaceCharFilterFactory" pattern="(\n)" replacement=" "/>
<charFilter class="solr.HTMLStripCharFilterFactory"/>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
<filter class="solr.LowerCaseFilterFactory" />
<filter class="solr.PortugueseLightStemFilterFactory" />
<filter class="solr.ShingleFilterFactory" maxShingleSize="4" outputUnigrams="true"/>
</analyzer>
<analyzer type="query">
<charFilter class="solr.PatternReplaceCharFilterFactory" pattern="(\r)" replacement=" "/>
<charFilter class="solr.PatternReplaceCharFilterFactory" pattern="(\r\n)" replacement=" "/>
<charFilter class="solr.PatternReplaceCharFilterFactory" pattern="(\n)" replacement=" "/>
<charFilter class="solr.HTMLStripCharFilterFactory"/>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory" />
<filter class="solr.PortugueseLightStemFilterFactory" />
</analyzer>
</fieldType>
<fieldType name="texto_numero_processo" class="solr.TextField">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.PatternReplaceFilterFactory" pattern="[^a-zA-Z0-9 ]" replacement="" replace="all"/>
</analyzer>
</fieldType>
<fieldType name="texto_para_facetas" class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory" />
<filter class="solr.StopFilterFactory" ignoreCase="true" words="../../conf/stopwords.txt" format="snowball" enablePositionIncrements="true" />
<filter class="solr.WordDelimiterFilterFactory" splitOnCaseChange="0" splitOnNumerics="0" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="1" />
<filter class="solr.LengthFilterFactory" min="4" max="200" />
</analyzer>
</fieldType>
<fieldType name="string" class="solr.StrField" sortMissingLast="true" />
<fieldType name="date" class="solr.DateField" sortMissingLast="true" omitNorms="true"/>
</types>