请参阅下面的更新。
我正在努力在 Ruby on Rails 应用程序(Rails:5.2.6,Ruby:2.5.8)中将我们的 ES 从 5.6 升级到 6.8。我所做的一个重大改变是将主题索引上的多类型映射(类型:主题、帖子)合并为单个映射类型(主题)。除此之外,我使用新的连接字段(parent_mapping)和自定义类型字段更新了旧的父/子关系,以模仿旧的行为(如 ES 文档中所建议的那样)。
这是具有自定义类型和连接字段的主题索引当前映射
{
"topics" : {
"mappings" : {
"topic" : {
"properties" : {
"answered_post_id" : {
"type" : "integer"
},
"author_host" : {
"type" : "keyword"
},
"author_name" : {
"type" : "keyword"
},
"awaiting_moderation" : {
"type" : "boolean"
},
"boosted" : {
"type" : "boolean"
},
"by_employee" : {
"type" : "boolean"
},
"content_updated_at" : {
"type" : "date"
},
"created_at" : {
"type" : "date"
},
"deleted" : {
"type" : "integer"
},
"deletion_reason_id" : {
"type" : "integer"
},
"engagement_count" : {
"type" : "integer"
},
"first_post_text" : {
"type" : "text",
"fields" : {
"letters" : {
"type" : "text",
"analyzer" : "letters"
},
"standard" : {
"type" : "text",
"analyzer" : "standard_no_html",
"search_analyzer" : "standard"
},
"synonym" : {
"type" : "text",
"analyzer" : "synonym_snowball"
}
},
"analyzer" : "snowball"
},
"forum_id" : {
"type" : "integer"
},
"forum_tags" : {
"type" : "integer"
},
"forum_type" : {
"type" : "keyword"
},
"from_vendor_page" : {
"type" : "boolean"
},
"has_poll" : {
"type" : "boolean"
},
"hotness" : {
"type" : "float"
},
"id" : {
"type" : "integer"
},
"is_best_answer" : {
"type" : "boolean"
},
"is_denied_forum" : {
"type" : "boolean"
},
"is_helpful_post" : {
"type" : "boolean"
},
"last_editor_id" : {
"type" : "integer"
},
"last_post_author" : {
"type" : "keyword"
},
"last_post_created_at" : {
"type" : "date"
},
"last_post_text" : {
"type" : "text",
"fields" : {
"letters" : {
"type" : "text",
"analyzer" : "letters"
},
"standard" : {
"type" : "text",
"analyzer" : "standard_no_html",
"search_analyzer" : "standard"
},
"synonym" : {
"type" : "text",
"analyzer" : "synonym_snowball"
}
},
"analyzer" : "snowball"
},
"last_post_user_id" : {
"type" : "integer"
},
"locked" : {
"type" : "boolean"
},
"muted" : {
"type" : "boolean"
},
"needs_answer" : {
"type" : "boolean"
},
"non_it" : {
"type" : "boolean"
},
"not_a_vendor" : {
"type" : "boolean"
},
"parent_id" : {
"type" : "integer"
},
"parent_mapping" : {
"type" : "join",
"eager_global_ordinals" : true,
"relations" : {
"topic" : "post"
}
},
"percolator_query" : {
"type" : "percolator"
},
"post_counter" : {
"type" : "integer"
},
"post_method" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"post_votes_count" : {
"type" : "integer"
},
"primary_text" : {
"type" : "text",
"fields" : {
"autocomplete" : {
"type" : "text",
"analyzer" : "autocomplete",
"search_analyzer" : "standard"
},
"english" : {
"type" : "text",
"analyzer" : "folded_english"
}
},
"analyzer" : "standard"
},
"private" : {
"type" : "boolean"
},
"ranking" : {
"type" : "integer"
},
"resource_type" : {
"type" : "text"
},
"root_post" : {
"type" : "boolean"
},
"root_post_id" : {
"type" : "integer"
},
"searchable" : {
"type" : "boolean"
},
"secondary_text" : {
"type" : "text",
"fields" : {
"english" : {
"type" : "text",
"analyzer" : "folded_english"
}
},
"analyzer" : "standard"
},
"spotlight" : {
"type" : "boolean"
},
"stripped_text" : {
"type" : "text",
"fields" : {
"letters" : {
"type" : "text",
"analyzer" : "letters"
},
"standard" : {
"type" : "text",
"analyzer" : "standard_no_html",
"search_analyzer" : "standard"
},
"synonym" : {
"type" : "text",
"analyzer" : "synonym_snowball"
}
},
"analyzer" : "snowball"
},
"subject" : {
"type" : "text",
"fields" : {
"autocomplete" : {
"type" : "text",
"analyzer" : "autocomplete",
"search_analyzer" : "standard"
},
"letters" : {
"type" : "text",
"analyzer" : "letters"
},
"standard" : {
"type" : "text",
"analyzer" : "standard_no_html",
"search_analyzer" : "standard"
},
"synonym" : {
"type" : "text",
"analyzer" : "synonym_snowball"
}
},
"analyzer" : "snowball"
},
"text" : {
"type" : "text",
"fields" : {
"letters" : {
"type" : "text",
"analyzer" : "letters"
},
"standard" : {
"type" : "text",
"analyzer" : "standard_no_html",
"search_analyzer" : "standard"
},
"synonym" : {
"type" : "text",
"analyzer" : "synonym_snowball"
}
},
"analyzer" : "snowball"
},
"topic_id" : {
"type" : "integer"
},
"type" : {
"type" : "keyword"
},
"unanswered" : {
"type" : "boolean"
},
"updated_at" : {
"type" : "date"
},
"user_id" : {
"type" : "integer"
},
"version" : {
"type" : "integer"
}
}
}
}
}
}
该索引有一个用于警报的 percolator 字段。我遇到的问题是当文档被索引到主题索引(自定义类型:post,parent_mapping:{name:post,parent:id})时,也会通过elasticsearch-api gem(版本6.0.3) .search 方法的有效载荷看起来像这样:
{
index: topics,
body: {
query: {
percolate: {
field: 'percolator_query',
index: 'topics',
type: 'topic',
id: 123,
}
}
}
}
此时,索引中有一个文档,其中包含要在索引新文档时匹配的查询,如下所示:
{
"_index" : "topics",
"_type" : "topic",
"_id" : "Alert:49",
"_score" : 1.0,
"_source" : {
"percolator_query" : {
"bool" : {
"should" : [
{
"query_string" : {
"boost" : 2,
"fields" : [
"stripped_text.standard"
],
"query" : "capybara"
}
},
{
"query_string" : {
"analyzer" : "standard",
"fields" : [
"stripped_text"
],
"query" : "capybara"
}
},
{
"bool" : {
"minimum_should_match" : 1,
"must" : [
{
"term" : {
"root_post" : true
}
}
],
"should" : [
{
"query_string" : {
"boost" : 2,
"fields" : [
"subject.standard"
],
"query" : "capybara"
}
},
{
"query_string" : {
"analyzer" : "standard",
"fields" : [
"subject"
],
"query" : "capybara"
}
}
]
}
}
],
"filter" : {
"bool" : {
"must" : [
{
"term" : {
"private" : false
}
},
{
"term" : {
"deleted" : 0
}
}
],
"must_not" : [
{
"term" : {
"awaiting_moderation" : true
}
},
{
"term" : {
"user_id" : 52342
}
}
]
}
}
}
}
}
}
因此,当我调用该es.client.search { ... percolate: { field: ... }
方法时,出现以下错误:{"type":"illegal_argument_exception","reason":"[routing] is missing for join field [parent_mapping]"}
.
这是 ES 抛出的完整的、有点未格式化的错误:
"Elasticsearch"::"Transport"::"Transport"::"Errors"::"BadRequest":[
400
]{
"error":{
"root_cause":[
{
"type":"mapper_parsing_exception",
"reason":"failed to parse"
}
],
"type":"search_phase_execution_exception",
"reason":"all shards failed",
"phase":"query",
"grouped":true,
"failed_shards":[
{
"shard":0,
"index":"2021-08-24-ae9a89e1ab242572_topics",
"node":"uHaQHJi0QcCSocaTGwH44w",
"reason":{
"type":"query_shard_exception",
"reason":"failed to create query: {\n \"percolate\" : {\n \"document_type\" : null,\n \"field\" : \"percolator_query\",\n \"documents\" : [\n {\n \"id\" : 1054172433,\n \"created_at\" : \"2021-08-24T18:42:23Z\",\n \"updated_at\" : \"2021-08-24T18:44:56Z\",\n \"deleted\" : 0,\n \"author_host\" : null,\n \"subject\" : \"The topic of discussion 1\",\n \"text\" : \"The topic of discussion 1\",\n \"post_method\" : \"web\",\n \"post_votes_count\" : 0,\n \"moderation_status\" : null,\n \"content_updated_at\" : null,\n \"version\" : 1,\n \"muted\" : false,\n \"forum_type\" : \"GroupForum\",\n \"forum_id\" : 1073075633,\n \"is_denied_forum\" : false,\n \"private\" : false,\n \"type\" : \"post\",\n \"awaiting_moderation\" : false,\n \"root_post\" : true,\n \"author_name\" : \"user1\",\n \"not_a_vendor\" : true,\n \"stripped_text\" : \"The topic of discussion 1\",\n \"is_helpful_post\" : false,\n \"is_best_answer\" : false,\n \"parent_mapping\" : {\n \"name\" : \"post\",\n \"parent\" : 1017419981\n },\n \"topic_id\" : 1017419981,\n \"parent_id\" : null,\n \"user_id\" : 1059399168,\n \"last_editor_id\" : null,\n \"deletion_reason_id\" : null\n }\n ],\n \"boost\" : 1.0\n }\n}",
"index_uuid":"viXlvCPzRGOdjIS8y6NBHg",
"index":"2021-08-24-ae9a89e1ab242572_topics",
"caused_by":{
"type":"mapper_parsing_exception",
"reason":"failed to parse",
"caused_by":{
"type":"illegal_argument_exception",
"reason":"[routing] is missing for join field [parent_mapping]"
}
}
}
}
],
"caused_by":{
"type":"mapper_parsing_exception",
"reason":"failed to parse",
"caused_by":{
"type":"illegal_argument_exception",
"reason":"[routing] is missing for join field [parent_mapping]"
}
}
},
"status":400
}
我已经尝试将带有父文档 ID 的路由键以及类似body: ... { document: {name: 'post', parent: parent_id}
负载内部的内容添加到 .search 调用。并且仍然不断收到相同的错误。我正在浏览代码并阅读过滤器的变化,但我的想法已经不多了。这部分代码库对我来说是新的,而 ES 对我来说也是相当新的,所以我确信我缺少一些东西。我还没有找到一个很好的例子来说明渗透器如何处理使用连接字段来创建父/子关系的索引,所以如果存在的话,一个指向它的链接肯定会有所帮助。
提前感谢您的任何建议或帮助。如果需要,我很乐意提供更多信息和上下文。
更新:
我能够在一个更简单的例子中缩小问题的范围。"document": { "type": "post" }
当我进行渗透搜索时,我还通过添加来解决路由问题。路由字段丢失消息的原因是索引中的父文档没有“_routing”字段,因为这些文档在创建时没有路由到任何地方(只有子文档被路由到与其父文档相同的分片中)。现在我遇到的问题是渗透查询没有找到我期望它找到的文档。下面是我用来直接在 ES 6.8 上重新创建问题的当前设置。
# create index with percolator and join field
PUT /perc-index?include_type_name=true
{
"mappings": {
"perc" : {
"properties": {
"type": { "type": "keyword" },
"message": { "type": "text" },
"id": { "type": "integer" },
"percolator_query": { "type": "percolator" },
"parent_mapping": {
"type": "join",
"relations": {
"perc": "perc_child"
}
}
}
}
}
}
PUT /perc-index/perc/alert:1
{
"percolator_query": {
"bool": {
"filter": [
{ "match": {
"message": { "query": "capybara" }
}
}
]
}
}
}
# index parent document
PUT /perc-index/perc/1?refresh=true
{
"id": 1,
"type": "perc",
"message": "perc message",
"parent_mapping": "perc"
}
# index child document
PUT /perc-index/perc/80?routing=1
{
"id": 80,
"type": "perc_child",
"message": "perc child capybara",
"parent_mapping": {
"name": "perc_child",
"parent": 1
}
}
GET /perc-index/_search
{
"query": {
"match_all": {}
}
}
#send a percolate through the search api
GET /perc-index/_search
{
"query": {
"percolate": {
"field": "percolator_query",
"type": "perc",
"routing": "1",
"id": "80",
"document": {
"type": "perc_child"
}
}
}
}
因此,在上面的示例中,我创建了一个新索引,然后使用 id 存储一个查询alert:1
。之后,我创建了一个父子文档(使用连接字段和自定义类型来模仿旧的多类型映射)。之后,我使用 percolate 字段进行搜索,并期望它返回 id 为 80 的子文档,但我没有得到任何结果。
所以在这一点上,如果在 ES 6.8 中存在与此设置相关的实际错误,我不知道我错过了什么或我做错了什么。