我们有按年份划分的索引,例如:
items-2019
items-2020
考虑以下数据:
POST items-2019/_doc
{
"@timestamp": "2019-01-01"
}
POST items-2020/_doc
{
"@timestamp": "2020-01-01"
}
POST /_aliases
{
"actions": [
{
"add": {
"index": "items-*",
"alias": "items"
}
}
]
}
现在,当我查询数据并显式排序结果时,它会跳过items-2020分片:
GET items/_search
{
"query": {
"range": {
"@timestamp": {
"lt": "2020-01-01"
}
}
},
"sort": {
"@timestamp": "desc"
}
}
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 2,
"successful" : 2,
"skipped" : 1, <--- skipped
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "items-2019",
"_type" : "_doc",
"_id" : "BTdSb3UBRFH0Yqe1vm_W",
"_score" : null,
"_source" : {
"@timestamp" : "2019-01-01"
},
"sort" : [
1546300800000
]
}
]
}
}
但是,当我不明确对结果进行排序时,它不会跳过分片,但是 ES 会发出 MatchNoDocsQuery:
GET items/_search
{
"profile": "true",
"query": {
"range": {
"@timestamp": {
"lt": "2020-01-01"
}
}
}
}
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 2,
"successful" : 2,
"skipped" : 0, <--- nothing skipped
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "items-2019",
"_type" : "_doc",
"_id" : "BTdSb3UBRFH0Yqe1vm_W",
"_score" : 1.0,
"_source" : {
"@timestamp" : "2019-01-01"
}
}
]
},
"profile" : {
"shards" : [
{
"id" : "[Axyv60mYQEGAREa2TwbgMQ][items-2019][0]",
"searches" : [
{
"query" : [
{
"type" : "ConstantScoreQuery",
"description" : "ConstantScore(DocValuesFieldExistsQuery [field=@timestamp])",
"time_in_nanos" : 69525,
"breakdown" : {
"set_min_competitive_score_count" : 0,
"match_count" : 0,
"shallow_advance_count" : 0,
"set_min_competitive_score" : 0,
"next_doc" : 3766,
"match" : 0,
"next_doc_count" : 1,
"score_count" : 1,
"compute_max_score_count" : 0,
"compute_max_score" : 0,
"advance" : 4123,
"advance_count" : 1,
"score" : 1123,
"build_scorer_count" : 2,
"create_weight" : 29745,
"shallow_advance" : 0,
"create_weight_count" : 1,
"build_scorer" : 30768
},
"children" : [
{
"type" : "DocValuesFieldExistsQuery",
"description" : "DocValuesFieldExistsQuery [field=@timestamp]",
"time_in_nanos" : 18317,
"breakdown" : {
"set_min_competitive_score_count" : 0,
"match_count" : 0,
"shallow_advance_count" : 0,
"set_min_competitive_score" : 0,
"next_doc" : 1474,
"match" : 0,
"next_doc_count" : 1,
"score_count" : 0,
"compute_max_score_count" : 0,
"compute_max_score" : 0,
"advance" : 1541,
"advance_count" : 1,
"score" : 0,
"build_scorer_count" : 2,
"create_weight" : 1184,
"shallow_advance" : 0,
"create_weight_count" : 1,
"build_scorer" : 14118
}
}
]
}
],
"rewrite_time" : 4660,
"collector" : [
{
"name" : "SimpleTopScoreDocCollector",
"reason" : "search_top_hits",
"time_in_nanos" : 22374
}
]
}
],
"aggregations" : [ ]
},
{
"id" : "[Axyv60mYQEGAREa2TwbgMQ][items-2020][0]",
"searches" : [
{
"query" : [
{
"type" : "MatchNoDocsQuery",
"description" : """MatchNoDocsQuery("User requested "match_none" query.")""", <-- here
"time_in_nanos" : 4166,
"breakdown" : {
"set_min_competitive_score_count" : 0,
"match_count" : 0,
"shallow_advance_count" : 0,
"set_min_competitive_score" : 0,
"next_doc" : 0,
"match" : 0,
"next_doc_count" : 0,
"score_count" : 0,
"compute_max_score_count" : 0,
"compute_max_score" : 0,
"advance" : 0,
"advance_count" : 0,
"score" : 0,
"build_scorer_count" : 1,
"create_weight" : 1791,
"shallow_advance" : 0,
"create_weight_count" : 1,
"build_scorer" : 2375
}
}
],
"rewrite_time" : 4353,
"collector" : [
{
"name" : "SimpleTopScoreDocCollector",
"reason" : "search_top_hits",
"time_in_nanos" : 12887
}
]
}
],
"aggregations" : [ ]
}
]
}
}
所以这里有几个问题:
- 跳过真的跳过分片吗?
- 跳过的分片和 MatchNoDocsQuery 有何不同?
- MatchNoDocsQuery 的成本是多少?
- 排序如何允许跳过分片?
- 如果我们对结果进行排序,我们真的会完全跳过分片,甚至在搜索过程中不碰它们吗?