0

我无法从 solr 索引中搜索名为 doc_id 的特定字段。我得到的数据来自 DB2,而 doc_id 是表中的 PK。

这是数据导入和模式 xml

请帮忙

<dataConfig>
  <dataSource driver="com.ibm.db2.jcc.DB2Driver" url="jdbc:db2://infbz1051.in.ibm.com:50000/SCION" user="db2admin" password="db2admin" batchSize="10000" readOnly="true" autoCommit="true" transactionIsolation="TRANSACTION_READ_UNCOMMITTED" connectionTimeout="5000000" />
  <document name="socialpost">        
    <entity name="post" pk="doc_id" query="select d.DOC_ID,d.SRC_DOC_ID,d.PARENT_ID,d.SRC_PARENT_ID,d.AUTHOR_ID,d.FORUM,d.CUSTOMER as customer,d.TEXT as post_text,d.text as raw_text,d.URL as doc_url,d.LIKES,d.FWD,d.COMMENTS,a.AUTHOR_ID,a.AUTHOR_NAME,a.EMAIL as author_email,a.AGE as author_age,a.GENDER as author_gender from DOC_DETAILS d,AUTHOR_DETAILS a where d.AUTHOR_ID=a.AUTHOR_ID" deltaImportQuery="select d.DOC_ID,d.SRC_DOC_ID,d.PARENT_ID,d.SRC_PARENT_ID,d.AUTHOR_ID,d.FORUM,d.CUSTOMER,d.TEXT as post_text,d.text as raw_text,d.URL as doc_url,d.LIKES,d.FWD,d.COMMENTS,a.AUTHOR_ID,a.AUTHOR_NAME,a.EMAIL as author_email,a.AGE as author_age,a.GENDER as author_gender from DOC_DETAILS d,AUTHOR_DETAILS a where  d.AUTHOR_ID=a.AUTHOR_ID AND d.doc_id='${dataimporter.delta.DOC_ID}'  with ur" deltaQuery="select DOC_ID from doc_details where lastupdatedtime &gt; '${dataimporter.last_index_time}' with ur">
       <entity name="concept" query="SELECT S.concept as concept FROM SENTIMENT_AND_CONCEPTS S where S.DOC_ID='${post.DOC_ID}' with ur" >
       </entity>
       <entity name="category" query="SELECT ann.doc_id, ann.ANNOTATIONMETAVALUES as category FROM annotations ann where ann.annotator_id=125  and ann.doc_id='${post.DOC_ID}' with ur" >
       </entity>
       <entity name="hcategory" query="SELECT ann.doc_id, ann.ANNOTATIONMETAVALUES as hcategory FROM annotations ann where ann.annotator_id=129  and ann.doc_id='${post.DOC_ID}' with ur" >
       </entity>
       <entity name="tcategory" query="SELECT ann.doc_id, ann.ANNOTATIONMETAVALUES as tcategory FROM annotations ann where ann.annotator_id=127  and ann.doc_id='${post.DOC_ID}' with ur" >
       </entity>
       <entity name="isparent" query="SELECT count(*) as isparent  FROM db2admin.doc_details where doc_id='${post.DOC_ID}' and parent_id=doc_id with ur" >
       </entity>
       <entity name="rankscore" query="select c.CATEGORY_RANK_SCORE as rankscore from CATEGORY_ASSIGNMENT_TABLE c,ANNOTATIONS A where a.doc_id=c.doc_id AND a.ANNOTATIONMETAVALUES =c.category_name AND a.ANNOTATOR_ID=125 and  a.doc_id='${post.DOC_ID}' with ur" >
       </entity>
       <entity name="posttype" query="SELECT ann.doc_id, ann.ANNOTATIONMETAVALUES as posttype FROM annotations ann where ann.annotator_id=35  and ann.doc_id='${post.DOC_ID}' with ur" >
       </entity>
       <entity name="sentimentinfo" query="select c.CATEGORY_SENTIMENT as sentimentinfo from CATEGORY_ASSIGNMENT_TABLE c,ANNOTATIONS A where a.doc_id=c.doc_id AND a.ANNOTATIONMETAVALUES =c.category_name AND a.ANNOTATOR_ID=125 and  a.doc_id='${post.DOC_ID}' with ur" >
       </entity>
    </entity>
  </document>
</dataConfig>

架构.xml

<field name="doc_id" type="string" indexed="true" stored="true" required="true" multiValued="false"/> 
<field name="customer" type="string" indexed="true" stored="true" multiValued="false"/>
<field name="src_doc_id" type="string" indexed="true" stored="true" omitNorms="true" multiValued="false"/>
<field name="parentid" type="string" indexed="true" stored="true" multiValued="false"/>
<field name="src_parent_id" type="string" indexed="true" stored="true" multiValued="false"/>
<field name="author_id" type="string" indexed="true" stored="true" multiValued="false"/>
<field name="forum" type="string" indexed="true" stored="true" multiValued="false"/>
<field name="timeposted" type="date" indexed="true" stored="true" multiValued="false"/>
<field name="post_text" type="string" indexed="true" stored="true" termVectors="true" termPositions="true" termOffsets="true" multiValued="false"/>
<!-- Raw text copy for retrieval convenience --> 
<field name="raw_text" type="string" indexed="true" stored="true" omitNorms="true" termVectors="true" termPositions="true" termOffsets="true" multiValued="true"/>
<copyfield src="post_text" dest="text_raw" />
<field name="url" type="string" indexed="true" stored="true" omitNorms="true" multiValued="false"/>
<field name="likes_fwd_comments" type="int" indexed="true" stored="true" multiValued="false"/>
<field name="lastupdatedtime" type="date" indexed="true" stored="true" omitNorms="true" multiValued="false"/>
4

1 回答 1

0

SQL 是非常字面的——由此产生的各种 SQL 结果将具有像“d.DOC_ID”或“ann.doc_id”这样的列......而不是像您在 Solr 模式中那样的“doc_id”。

您的某些 SQL 字段中确实有“作为客户”之类的内容,这意味着假设整个文档通过验证,这些特定字段将正常工作 - 标记为必需(或 uniqueKey)的字段都存在,等等。

您可能需要在您的选择语句中为每个字段添加“AS xxx”。

如果您正在搜索使用“as”语法的字段,请注意它们中的大多数都设置为“string”类型。在示例模式中,“字符串”类型是 StrField,它没有分析能力,因此只能进行精确的整个字段或通配符匹配。不能在 StrField 类型的多字串中搜索单个字。

于 2013-09-26T20:59:13.697 回答