0

我第一次尝试术语聚合,我使用的自定义模式标记器似乎存在问题。

这是映射:

{
  "mappings": {
    "properties": {
      "contentItemType": {
        "type": "text",
        "fields": {
          "keyword": {
            "type": "keyword",
            "ignore_above": 256
          }
        },
        "analyzer": "patternAnalyzer"
      },
      "theme": {
        "type": "text",
        "fields": {
          "keyword": {
            "type": "keyword",
            "ignore_above": 256
          }
        },
        "analyzer": "patternAnalyzer"
      }
    }
  },
  "settings": {
    "analysis": {
      "analyzer": {
        "patternAnalyzer": {
          "tokenizer": "patternTokenizer"
        }
      },
      "tokenizer": {
        "patternTokenizer": {
          "type": "pattern",
          "pattern": ";"
        }
      }
    }
  }
}

当我尝试使用聚合 API http://my_server/index_name/_search进行搜索时,结果如下:

{
  "aggregations": {
    "group_by_contentItemType": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "Correspondence; Reports",
          "doc_count": 3
        },
        {
          "key": "Correspondence",
          "doc_count": 2
        },
        {
          "key": "Meeting Minutes; Administrative Records; Reports",
          "doc_count": 2
        },
        {
          "key": "Correspondence; Legal and Treaty Material; Reports",
          "doc_count": 1
        },
        {
          "key": "Correspondence; Memoranda",
          "doc_count": 1
        },
        {
          "key": "Memoranda",
          "doc_count": 1
        },
        {
          "key": "Reports",
          "doc_count": 1
        }
      ]
    },
    "group_by_theme": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "International Relations",
          "doc_count": 2
        },
        {
          "key": "Key Events; Dissent; Dissent; Resistance; Human Rights",
          "doc_count": 2
        },
        {
          "key": "Border Security and Migration; Key Events",
          "doc_count": 1
        },
        {
          "key": "Border Security and Migration; Second World War Aftermath",
          "doc_count": 1
        },
        {
          "key": "Domestic Politics",
          "doc_count": 1
        },
        {
          "key": "Domestic Politics; Border Security and Migration",
          "doc_count": 1
        },
        {
          "key": "Economics and Trade; International Relations",
          "doc_count": 1
        },
        {
          "key": "Embassy and Consulate Administration; Industry and Agriculture; International Relations",
          "doc_count": 1
        },
        {
          "key": "Populations and Social Policy; Second World War Aftermath; International Relations",
          "doc_count": 1
        }
      ]
    }
  }
}

正如您所看到的聚合问题。我已经被这个问题困扰了好几天了。我已经看到了很多例子,但仍然无法解决这个问题。请帮忙。提前致谢!!!

编辑!!! 这是@CatalinM 回答后的完整映射:

{
    "local_cwee": {
        "mappings": {
            "dynamic": "false",
            "properties": {
                "author": {
                    "type": "text",
                    "fields": {
                        "keyword": {
                            "type": "keyword",
                            "ignore_above": 256
                        }
                    }
                },
                "commentaries": {
                    "type": "text",
                    "fields": {
                        "keyword": {
                            "type": "keyword",
                            "ignore_above": 256
                        }
                    }
                },
                "contentDateEndMonth": {
                    "type": "integer"
                },
                "contentDateEndSpecified": {
                    "type": "boolean"
                },
                "contentDateEndYear": {
                    "type": "integer"
                },
                "contentDateMonth": {
                    "type": "integer"
                },
                "contentDateMonthSpecified": {
                    "type": "boolean"
                },
                "contentDateStartMonth": {
                    "type": "integer"
                },
                "contentDateStartSpecified": {
                    "type": "boolean"
                },
                "contentDateStartYear": {
                    "type": "integer"
                },
                "contentDateYear": {
                    "type": "integer"
                },
                "contentDoi": {
                    "type": "text",
                    "fields": {
                        "keyword": {
                            "type": "keyword",
                            "ignore_above": 256
                        }
                    }
                },
                "contentItemType": {
                    "type": "text",
                    "analyzer": "patternAnalyzer",
                    "fielddata": true
                },
                "contentItemTypeFacets": {
                    "type": "text",
                    "analyzer": "patternAnalyzer",
                    "fielddata": true
                },
                "contentTitle": {
                    "type": "text",
                    "fields": {
                        "keyword": {
                            "type": "keyword",
                            "ignore_above": 256
                        }
                    }
                },
                "copyrightNotices": {
                    "type": "nested",
                    "properties": {
                        "imageName": {
                            "type": "text",
                            "fields": {
                                "keyword": {
                                    "type": "keyword",
                                    "ignore_above": 256
                                }
                            }
                        },
                        "text": {
                            "type": "text",
                            "fields": {
                                "keyword": {
                                    "type": "keyword",
                                    "ignore_above": 256
                                }
                            }
                        }
                    }
                },
                "countries": {
                    "type": "text",
                    "fields": {
                        "keyword": {
                            "type": "keyword",
                            "ignore_above": 256
                        }
                    }
                },
                "country": {
                    "type": "text",
                    "fields": {
                        "keyword": {
                            "type": "keyword",
                            "ignore_above": 256
                        }
                    }
                },
                "coverDateEndMonth": {
                    "type": "integer"
                },
                "coverDateEndSpecified": {
                    "type": "boolean"
                },
                "coverDateEndYear": {
                    "type": "integer"
                },
                "coverDateMonth": {
                    "type": "integer"
                },
                "coverDateMonthSpecified": {
                    "type": "boolean"
                },
                "coverDateStartMonth": {
                    "type": "integer"
                },
                "coverDateStartSpecified": {
                    "type": "boolean"
                },
                "coverDateStartYear": {
                    "type": "integer"
                },
                "coverDateYear": {
                    "type": "integer"
                },
                "displayName": {
                    "type": "text",
                    "fields": {
                        "keyword": {
                            "type": "keyword",
                            "ignore_above": 256
                        }
                    }
                },
                "documentDoi": {
                    "type": "text",
                    "fields": {
                        "keyword": {
                            "type": "keyword",
                            "ignore_above": 256
                        }
                    }
                },
                "documentLevel": {
                    "type": "integer"
                },                
                "keyEvents": {
                    "type": "text",
                    "fields": {
                        "keyword": {
                            "type": "keyword",
                            "ignore_above": 256
                        }
                    }
                },
                "language": {
                    "type": "text",
                    "analyzer": "patternAnalyzer",
                    "fielddata": true
                },
                "languageFacets": {
                    "type": "text",
                    "analyzer": "patternAnalyzer",
                    "fielddata": true
                },
                "languages": {
                    "type": "text",
                    "analyzer": "patternAnalyzer",
                    "fielddata": true
                },
                "languagesFacets": {
                    "type": "text",
                    "analyzer": "patternAnalyzer",
                    "fielddata": true
                },
                "moduleNumber": {
                    "type": "integer"
                },
                "notes": {
                    "type": "text",
                    "fields": {
                        "keyword": {
                            "type": "keyword",
                            "ignore_above": 256
                        }
                    }
                },
                "pageTranscript": {
                    "type": "text",
                    "term_vector": "with_positions",
                    "fields": {
                        "keyword": {
                            "type": "keyword",
                            "ignore_above": 256
                        }
                    },
                    "analyzer": "whiteSpaceAnalyzer"
                },
                "people": {
                    "type": "text",
                    "fields": {
                        "keyword": {
                            "type": "keyword",
                            "ignore_above": 256
                        }
                    }
                },
                "publicationDate": {
                    "type": "integer"
                },
                "publicationDateEndMonth": {
                    "type": "integer"
                },
                "publicationDateEndSpecified": {
                    "type": "boolean"
                },
                "publicationDateEndYear": {
                    "type": "integer"
                },
                "publicationDateMonth": {
                    "type": "integer"
                },
                "publicationDateMonthSpecified": {
                    "type": "boolean"
                },
                "publicationDateStartMonth": {
                    "type": "integer"
                },
                "publicationDateStartSpecified": {
                    "type": "boolean"
                },
                "publicationDateStartYear": {
                    "type": "integer"
                },
                "publicationDateYear": {
                    "type": "integer"
                },
                "publicationDoi": {
                    "type": "text",
                    "fields": {
                        "keyword": {
                            "type": "keyword",
                            "ignore_above": 256
                        }
                    }
                },
                "publicationId": {
                    "type": "text",
                    "analyzer": "patternAnalyzer",
                    "fielddata": true
                },
                "publicationIdFacet": {
                    "type": "text",
                    "analyzer": "patternAnalyzer",
                    "fielddata": true
                },
                "publicationTitle": {
                    "type": "text",
                    "fields": {
                        "keyword": {
                            "type": "keyword",
                            "ignore_above": 256
                        }
                    }
                },
                "publicationType": {
                    "type": "text",
                    "analyzer": "patternAnalyzer",
                    "fielddata": true
                },
                "publicationTypeFacets": {
                    "type": "text",
                    "analyzer": "patternAnalyzer",
                    "fielddata": true
                },
                "publicationYear": {
                    "type": "integer"
                },
                "publisherName": {
                    "type": "text",
                    "analyzer": "patternAnalyzer",
                    "fielddata": true
                },
                "publisherNameFacet": {
                    "type": "text",
                    "analyzer": "patternAnalyzer",
                    "fielddata": true
                }
                "subject": {
                    "type": "text",
                    "analyzer": "patternAnalyzer",
                    "fielddata": true
                },
                "subjectAreas": {
                    "type": "text",
                    "analyzer": "patternAnalyzer",
                    "fielddata": true
                },
                "subjectAreasFacets": {
                    "type": "text",
                    "analyzer": "patternAnalyzer",
                    "fielddata": true
                },
                "subjectCountries": {
                    "type": "text",
                    "analyzer": "patternAnalyzer",
                    "fielddata": true
                },
                "subjectCountriesFacets": {
                    "type": "text",
                    "analyzer": "patternAnalyzer",
                    "fielddata": true
                },
                "subjectKeyword": {
                    "type": "text",
                    "analyzer": "patternAnalyzer",
                    "fielddata": true
                },
                "subjectKeywordFacets": {
                    "type": "text",
                    "analyzer": "patternAnalyzer",
                    "fielddata": true
                },
                "subthemeFacets": {
                    "type": "text",
                    "analyzer": "patternAnalyzer",
                    "fielddata": true
                },
                "subthemes": {
                    "type": "text",
                    "analyzer": "patternAnalyzer",
                    "fielddata": true
                },
                "theme": {
                    "type": "text",
                    "analyzer": "patternAnalyzer",
                    "fielddata": true
                },
                "themeFacets": {
                    "type": "text",
                    "analyzer": "patternAnalyzer",
                    "fielddata": true
                },
                "themes": {
                    "type": "text",
                    "analyzer": "patternAnalyzer",
                    "fielddata": true
                }
            }
        }
    }
}
4

1 回答 1

2

使用您的自定义标记器,文本字段中的标记是“通信”、“会议纪要”、“管理记录”等。所以我认为你不需要关键字字段。

要使聚合在文本字段上起作用,您必须添加"fielddata": true映射。默认情况下禁用此功能,因为不需要对大型文本字段进行聚合,但在您的情况下,标记正是您想要聚合的值。

这是简化的配置

{
  "mappings": {
    "properties": {
      "contentItemType": {
        "type": "text",
        "fielddata": true,
        "analyzer": "patternAnalyzer"
      }
    }
  },
  "settings": {
    "analysis": {
      "analyzer": {
        "patternAnalyzer": {
          "tokenizer": "patternTokenizer"
        }
      },
      "tokenizer": {
        "patternTokenizer": {
          "type": "pattern",
          "pattern": ";"
        }
      }
    }
  }
}

查询:

{
  "aggregations" : {
      "test" : {
          "terms" : { "field" : "contentItemType" }
      }
  }
}

结果:

"aggregations": {
    "test": {
        "doc_count_error_upper_bound": 0,
        "sum_other_doc_count": 0,
        "buckets": [
            {
                "key": " Administrative Records",
                "doc_count": 1
            },
            {
                "key": "Meeting Minutes",
                "doc_count": 1
            },
            {
                "key": " Reports",
                "doc_count": 1
            }
        ]
    }
}
于 2020-02-18T23:19:33.687 回答