0

我刚开始学习solr。我已经安装了 apache tomcat 服务器和 solr 3.5。我已成功配置 solr 以从 oracle 数据库中搜索一个实体的数据。当我在 data-config.xml 中添加两个实体并在 solr schema.xml 中添加字段时遇到问题。我像这样配置了我的 data-config.xml...

<dataConfig>
    <dataSource name="JdbcDataSource" 
        driver="oracle.jdbc.driver.OracleDriver"
        url="jdbc:oracle:thin:@//192.168.1.3:1521/orcl" 
        user="SSOHANI" 
        password="Ssohani123"/>

    <document name="doc">
        <entity name="PROJECTS" 
                query="select PROJECTS.ID, PROJECTS.BATCH_ID, PROJECTS.OPERATION, PROJECTS.NAME,
                PROJECTS.DESCRIPTION, PROJECTS.ESTIMATED_COST, PROJECTS.GRANTOR_AGENCY_ID,
                PROJECTS.GRANTEE_AGENCY_ID, PROJECTS.PROJECT_STATUS_ID,
                PROJECTS.PROJECT_TYPE_ID, PROJECTS.START_DATE, PROJECTS.END_DATE,
                NVL(PROJECTS.TRACS_PARENT_PROJECT_ID,0) TRACS_PARENT_PROJECT_ID,
                NVL(PROJECTS.STATE_PARENT_PROJECT_ID,0) STATE_PARENT_PROJECT_ID,
                NVL(PROJECTS.PLAN_ID,0) PLAN_ID,
                NVL(PROJECTS.PLAN_ID_TYPE,0) PLAN_ID_TYPE,
                NVL(PROJECTS.TRACS_ID,0) TRACS_ID,
                NVL(PROJECTS.STATE_ID,0) STATE_ID,
                PROJECTS.VALID, PROJECTS.APPLIED,
                NVL(PROJECTS.COMMENTS,'NULL') COMMENTS,
                PROJECTS.GENERATED_PLAN_ID, PROJECTS.TRACS_PROJECT_ID,
                PROJECTS.STATE_PLAN_ID from SSOHANI.PROJECTS" >

        <field column="ID" name="projects_id" />
        <field column="BATCH_ID" name="projects_batch_id" />
        <field column="OPERATION" name="projects_operation" />
        <field column="NAME" name="projects_name" />
        <field column="DESCRIPTION" name="projects_description" />
        <field column="ESTIMATED_COST" name="projects_estimated_cost" />
        <field column="GRANTOR_AGENCY_ID" name="projects_grantor_agency_id" />
        <field column="GRANTEE_AGENCY_ID" name="projects_grantee_agency_id" />
        <field column="PROJECT_STATUS_ID" name="projects_project_status_id" />
        <field column="PROJECT_TYPE_ID" name="projects_project_type_id" />
        <field column="START_DATE" name="projects_start_date" />
        <field column="END_DATE" name="projects_end_date" />
        <field column="TRACS_PARENT_PROJECT_ID" name="projects_tracs_parent_project_id" />
        <field column="STATE_PARENT_PROJECT_ID" name="projects_state_parent_project_id" />
        <field column="PLAN_ID" name="projects_plan_id" />
        <field column="PLAN_ID_TYPE" name="projects_plan_id_type" />
        <field column="TRACS_ID" name="projects_tracs_id" />
        <field column="STATE_ID" name="projects_state_id" />
        <field column="VALID" name="projects_valid" />
        <field column="APPLIED" name="projects_applied" />
        <field column="COMMENTS" name="projects_comments" />
        <field column="GENERATED_PLAN_ID" name="projects_generated_plan_id" />
        <field column="TRACS_PROJECT_ID" name="projects_tracs_project_id" />
        <field column="STATE_PLAN_ID" name="projects_state_plan_id" />

        </entity>

    <entity name="PLANS" 
          query="select PLANS.ID, PLANS.BATCH_ID, PLANS.OPERATION, PLANS.NAME, PLANS.DESCRIPTION,
               PLANS.CONTACT_ID, PLANS.PLAN_TYPE_ID, PLANS.AGENCY_ID, PLANS.START_DATE,
               NVL(PLANS.END_DATE,0) END_DATE,
               NVL(PLANS.TRACS_PARENT_PLAN_ID,0) TRACS_PARENT_PLAN_ID,
               NVL(PLANS.STATE_PARENT_PLAN_ID,0) STATE_PARENT_PLAN_ID,
               NVL(PLANS.TRACS_ID,0) TRACS_ID,
               NVL(PLANS.STATE_ID,0) STATE_ID,          
               PLANS.VALID, PLANS.APPLIED,
               NVL(PLANS.COMMENTS,'NULL') COMMENTS from SSOHANI.PLANS" >

        <field column="ID" name="plans_id" />
        <field column="BATCH_ID" name="plans_batch_id" />
        <field column="OPERATION" name="plans_operation" />
        <field column="NAME" name="plans_name" />
        <field column="DESCRIPTION" name="plans_description" />
        <field column="CONTACT_ID" name="plans_contact_id" />
        <field column="PLAN_TYPE_ID" name="plans_plan_type_id" />       
        <field column="AGENCY_ID" name="plans_agency_id" />
        <field column="START_DATE" name="plans_start_date" />
        <field column="END_DATE" name="plans_end_date" />
        <field column="TRACS_PARENT_PLAN_ID" name="plans_tracs_parent_plan_id" />
        <field column="STATE_PARENT_PLAN_ID" name="plans_state_parent_plan_id" />
        <field column="TRACS_ID" name="plans_tracs_id" />
        <field column="STATE_ID" name="plans_state_id" />
        <field column="VALID" name="plans_valid" />
        <field column="APPLIED" name="plans_applied" />
        <field column="COMMENTS" name="plans_comments" />       
    </entity>

</document>   
</dataConfig>

我像这样配置了我的 schema.xml ......

<schema>
<fields>

        <field name="projects_id" type="long" indexed="true" stored="true" required="true"/>
        <field name="projects_batch_id" type="long" indexed="true" stored="true" required="true"/>
        <field name="projects_operation" type="string" indexed="true" stored="true" required="true"/>
        <field name="projects_name" type="string" indexed="true" stored="true" required="true"/>
        <field name="projects_description" type="string" indexed="true" stored="true" required="true"/>
        <field name="projects_estimated_cost" type="long" indexed="true" stored="true" required="true"/>
        <field name="projects_grantor_agency_id" type="long" indexed="true" stored="true" required="true"/>
        <field name="projects_grantee_agency_id" type="long" indexed="true" stored="true" required="true"/>
        <field name="projects_project_status_id" type="long" indexed="true" stored="true" required="true"/>
        <field name="projects_project_type_id" type="long" indexed="true" stored="true" required="true"/>
        <field name="projects_start_date" type="date" indexed="true" stored="true" required="true"/>
        <field name="projects_end_date" type="date" indexed="true" stored="true" required="true"/>      
        <field name="projects_tracs_parent_project_id" type="long" indexed="true" stored="true" required="true"/>       
        <field name="projects_state_parent_project_id" type="long" indexed="true" stored="true" required="true"/>
        <field name="projects_plan_id" type="long" indexed="true" stored="true" required="true"/>
        <field name="projects_plan_id_type" type="long" indexed="true" stored="true" required="true"/>
        <field name="projects_tracs_id" type="long" indexed="true" stored="true" required="true"/>
        <field name="projects_state_id" type="long" indexed="true" stored="true" required="true"/>      
        <field name="projects_valid" type="string" indexed="true" stored="true" required="true"/>
        <field name="projects_applied" type="string" indexed="true" stored="true" required="true"/>
        <field name="projects_comments" type="string" indexed="true" stored="true" required="true"/>
        <field name="projects_generated_plan_id" type="long" indexed="true" stored="true" required="true"/>
        <field name="projects_tracs_project_id" type="long" indexed="true" stored="true" required="true"/>
        <field name="projects_state_plan_id" type="long" indexed="true" stored="true" required="true"/>

                <!--         fields for plan enity -->

        <field name="plans_id" type="long" indexed="true" stored="true" required="true"/>
        <field name="plans_batch_id" type="long" indexed="true" stored="true" required="true"/>
        <field name="plans_operation" type="string" indexed="true" stored="true" required="true"/>
        <field name="plans_name" type="string" indexed="true" stored="true" required="true"/>
        <field name="plans_description" type="string" indexed="true" stored="true" required="true"/>
        <field name="plans_contact_id" type="long" indexed="true" stored="true" required="true"/>
        <field name="plans_plan_type_id" type="long" indexed="true" stored="true" required="true"/>
        <field name="plans_agency_id" type="long" indexed="true" stored="true" required="true"/>
        <field name="plans_start_date" type="date" indexed="true" stored="true" required="true"/>
        <field name="plans_end_date" type="date" indexed="true" stored="true" required="true"/>
        <field name="plans_tracs_parent_plan_id" type="long" indexed="true" stored="true" required="true"/>
        <field name="plans_state_parent_plan_id" type="string" indexed="true" stored="true" required="true"/>       
        <field name="plans_tracs_id" type="long" indexed="true" stored="true" required="true"/>     
        <field name="plans_state_id" type="long" indexed="true" stored="true" required="true"/>
        <field name="plans_valid" type="string" indexed="true" stored="true" required="true"/>
        <field name="plans_applied" type="string" indexed="true" stored="true" required="true"/>
        <field name="plans_comments" type="string" indexed="true" stored="true" required="true"/>

    </fields>

    <uniqueKey>projects_id</uniqueKey>
    <uniqueKey>plans_id</uniqueKey>
    <defaultSearchField>projects_id</defaultSearchField>
</schema>

我的 solrconfig.xml 是...

<requestHandler name="/dataimport" class="org.apache.solr.handler.dataimport.DataImportHandler">
      <lst name="defaults">
          <str name="config">/opt/solr/core0/conf/data-config.xml</str>
      </lst>
  </requestHandler>

现在,当我运行完全导入命令时,出现以下错误..

Apr 16, 2012 4:11:46 PM org.apache.solr.handler.dataimport.SolrWriter upload
WARNING: Error creating document : SolrInputDocument[{projects_tracs_id=projects_tracs_id(1.0)={0}, projects_name=projects_name(1.0)={Minnesota Firearms Safety Training Program}, projects_description=projects_description(1.0)={To train 17,500 students and 425 new instructors at 45 recruiting workshops. Hold one statwide training academy. Award 2,650 recognition awards for length of service.}, projects_comments=projects_comments(1.0)={NULL}, projects_plan_id=projects_plan_id(1.0)={0}, projects_end_date=projects_end_date(1.0)={2002-12-31 00:00:00.0}, projects_tracs_parent_project_id=projects_tracs_parent_project_id(1.0)={0}, projects_plan_id_type=projects_plan_id_type(1.0)={0}, projects_project_status_id=projects_project_status_id(1.0)={4}, projects_state_plan_id=projects_state_plan_id(1.0)={1126}, projects_estimated_cost=projects_estimated_cost(1.0)={600000}, projects_valid=projects_valid(1.0)={N}, projects_grantor_agency_id=projects_grantor_agency_id(1.0)={1154}, projects_start_date=projects_start_date(1.0)={2001-12-31 00:00:00.0}, projects_applied=projects_applied(1.0)={N}, projects_state_id=projects_state_id(1.0)={0}, projects_batch_id=projects_batch_id(1.0)={1433468017}, projects_generated_plan_id=projects_generated_plan_id(1.0)={2050667163}, projects_id=projects_id(1.0)={2009553709}, projects_operation=projects_operation(1.0)={INSERT}, projects_state_parent_project_id=projects_state_parent_project_id(1.0)={0}, projects_grantee_agency_id=projects_grantee_agency_id(1.0)={1235}, projects_tracs_project_id=projects_tracs_project_id(1.0)={1123}, projects_project_type_id=projects_project_type_id(1.0)={3}}]
org.apache.solr.common.SolrException: [doc=2009553709] missing required field: plans_applied
    at org.apache.solr.update.DocumentBuilder.toDocument(DocumentBuilder.java:346)
    at org.apache.solr.update.processor.RunUpdateProcessor.processAdd(RunUpdateProcessorFactory.java:60)
    at org.apache.solr.update.processor.LogUpdateProcessor.processAdd(LogUpdateProcessorFactory.java:115)
    at org.apache.solr.handler.dataimport.SolrWriter.upload(SolrWriter.java:73)
    at org.apache.solr.handler.dataimport.DataImportHandler$1.upload(DataImportHandler.java:293)
    at org.apache.solr.handler.dataimport.DocBuilder.buildDocument(DocBuilder.java:636)
    at org.apache.solr.handler.dataimport.DocBuilder.doFullDump(DocBuilder.java:268)
    at org.apache.solr.handler.dataimport.DocBuilder.execute(DocBuilder.java:187)
    at org.apache.solr.handler.dataimport.DataImporter.doFullImport(DataImporter.java:359)
    at org.apache.solr.handler.dataimport.DataImporter.runCmd(DataImporter.java:427)
    at org.apache.solr.handler.dataimport.DataImporter$1.run(DataImporter.java:408)

solr 无法读取第二个实体的任何字段。谁能帮我解决这个问题..?请告诉我在配置 data-config.xml 或 schema.xml 或这两个文件时我犯了什么错误..

4

1 回答 1

0

尽管从堆栈跟踪看起来您的一个计划文档在必需的计划应用字段中缺少值,但我认为您需要注意的第一件事是数据不应该在 Solr 中进行规范化。它应该在进入索引之前被展平。

因此,您应该在这两个表之间创建一个连接(直接在 data-config.xml 中,而不是您的查询),而不是将这两个表作为单独的实体进行索引,以便每个结果(连接的)表行成为一个 Solr 文档。

这样,当您想要获取有关单个项目的所有数据时,它们都将位于一个文档中 - 这种用例无需连接。

在 Solr 中,您应该接受冗余,而不是关系和约束。

说得通?

于 2012-04-16T13:00:08.750 回答