1

我在一个分片上有一个 Elasticsearch 索引,我正在为此运行一个[dis_max][4]查询,给定一些用户详细信息

(名字、姓氏、出生日期、地址、电话、用户名、电子邮件等)

从组合了一组标准/匹配子句的索引中查询用户。

例如

  • 匹配用户名( [fuzzy][1], boosted 2x)
  • 应该匹配名字和姓氏[bool][3]结合[match-term][2]查询 FN 和 LN,提升 1.1 倍)
  • 必须匹配 FN、LN 和 DOB[bool][3]结合[fuzzy][1]FN 和 LN 以及[match-term][2]DOB,提升 3 倍)
  • 匹配手机[match-term][2]提升 2 倍)

等等

请参阅下面的查询(输入数据模糊):

{
   "from":0,
   "size":100,
   "explain": true,
   "query":{
      "dis_max":{
         "tie_breaker":0.5,
         "queries":[
            {
               "fuzzy":{
                  "username":{
                     "value":"xxx",
                     "fuzziness":"AUTO",
                     "prefix_length":0,
                     "max_expansions":50,
                     "transpositions":false,
                     "boost":2.0
                  }
               }
            },
            {
               "term":{
                  "email":{
                     "value":"gmail.com",
                     "boost":1.0
                  }
               }
            },
            {
               "fuzzy":{
                  "email":{
                     "value":"xxx",
                     "fuzziness":"AUTO",
                     "prefix_length":0,
                     "max_expansions":50,
                     "transpositions":false,
                     "boost":1.0
                  }
               }
            },
            {
               "term":{
                  "password-hash":{
                     "value":"xxx",
                     "boost":1.0
                  }
               }
            },
            {
               "term":{
                  "currency-code":{
                     "value":"xxx",
                     "boost":0.5
                  }
               }
            },
            {
               "match":{
                  "first-name":{
                     "query":"xxx",
                     "operator":"OR",
                     "prefix_length":0,
                     "max_expansions":50,
                     "fuzzy_transpositions":true,
                     "lenient":false,
                     "zero_terms_query":"NONE",
                     "auto_generate_synonyms_phrase_query":true,
                     "boost":1.0
                  }
               }
            },
            {
               "match":{
                  "last-name":{
                     "query":"xxx",
                     "operator":"OR",
                     "prefix_length":0,
                     "max_expansions":50,
                     "fuzzy_transpositions":true,
                     "lenient":false,
                     "zero_terms_query":"NONE",
                     "auto_generate_synonyms_phrase_query":true,
                     "boost":1.0
                  }
               }
            },
            {
               "dis_max":{
                  "queries":[
                     {
                        "match":{
                           "first-name":{
                              "query":"xxx",
                              "operator":"OR",
                              "prefix_length":0,
                              "max_expansions":50,
                              "fuzzy_transpositions":true,
                              "lenient":false,
                              "zero_terms_query":"NONE",
                              "auto_generate_synonyms_phrase_query":true,
                              "boost":1.0
                           }
                        }
                     },
                     {
                        "match":{
                           "last-name":{
                              "query":"xxx",
                              "operator":"OR",
                              "prefix_length":0,
                              "max_expansions":50,
                              "fuzzy_transpositions":true,
                              "lenient":false,
                              "zero_terms_query":"NONE",
                              "auto_generate_synonyms_phrase_query":true,
                              "boost":1.0
                           }
                        }
                     }
                  ],
                  "boost":1.1
               }
            },
            {
               "match":{
                  "date-of-birth":{
                     "query":"xxxx-xx-xx",
                     "operator":"OR",
                     "prefix_length":0,
                     "max_expansions":50,
                     "fuzzy_transpositions":true,
                     "lenient":false,
                     "zero_terms_query":"NONE",
                     "auto_generate_synonyms_phrase_query":true,
                     "boost":1.0
                  }
               }
            },
            {
               "bool":{
                  "must":[
                     {
                    "fuzzy" : {
                        "first-name" : {
                        "value" : "xxx",
                        "fuzziness" : "AUTO",
                        "prefix_length" : 0,
                        "max_expansions" : 50,
                        "transpositions" : true,
                        "boost" : 1.5
                        }
                    }
                     },
                     {
                        "fuzzy" : {
                            "last-name" : {
                            "value" : "xxx",
                            "fuzziness" : "AUTO",
                            "prefix_length" : 0,
                            "max_expansions" : 50,
                            "transpositions" : true,
                            "boost" : 1.5
                            }
                        }
                     },
                     {
                        "match":{
                           "date-of-birth":{
                              "query":"xxxx-xx-xx",
                              "operator":"OR",
                              "prefix_length":0,
                              "max_expansions":50,
                              "fuzzy_transpositions":true,
                              "lenient":false,
                              "zero_terms_query":"NONE",
                              "auto_generate_synonyms_phrase_query":true,
                              "boost":1.0
                           }
                        }
                     }
                  ],
                  "adjust_pure_negative":true,
                  "boost":3.0
               }
            },
            {
               "match":{
                  "address":{
                     "query":"xxx",
                     "operator":"OR",
                     "analyzer":"whitespace",
                     "fuzziness":"AUTO",
                     "prefix_length":0,
                     "max_expansions":50,
                     "minimum_should_match":"60%",
                     "fuzzy_transpositions":true,
                     "lenient":false,
                     "zero_terms_query":"NONE",
                     "auto_generate_synonyms_phrase_query":true,
                     "boost":1.1
                  }
               }
            },
            {
               "match":{
                  "city":{
                     "query":"xxx",
                     "operator":"OR",
                     "prefix_length":0,
                     "max_expansions":50,
                     "fuzzy_transpositions":true,
                     "lenient":false,
                     "zero_terms_query":"NONE",
                     "auto_generate_synonyms_phrase_query":true,
                     "boost":1.0
                  }
               }
            },
            {
               "term":{
                  "postal-code":{
                     "value":"xxx ",
                     "boost":1.5
                  }
               }
            },
            {
               "boosting":{
                  "positive":{
                     "match":{
                        "address":{
                           "query":"xxx",
                           "operator":"OR",
                           "analyzer":"whitespace",
                           "fuzziness":"AUTO",
                           "prefix_length":0,
                           "max_expansions":50,
                           "minimum_should_match":"60%",
                           "fuzzy_transpositions":true,
                           "lenient":false,
                           "zero_terms_query":"NONE",
                           "auto_generate_synonyms_phrase_query":true,
                           "boost":1.1
                        }
                     }
                  },
                  "negative":{
                     "bool":{
                        "must_not":[
                           {
                              "term":{
                                 "postal-code":{
                                    "value":"xxx ",
                                    "boost":1.5
                                 }
                              }
                           }
                        ],
                        "adjust_pure_negative":true,
                        "boost":1.0
                     }
                  },
                  "negative_boost":0.7,
                  "boost":1.0
               }
            },
            {
               "term":{
                  "country-code":{
                     "value":"xxx",
                     "boost":1.0
                  }
               }
            },
            {
               "match":{
                  "phone":{
                     "query":"xxx",
                     "operator":"OR",
                     "prefix_length":0,
                     "max_expansions":50,
                     "fuzzy_transpositions":true,
                     "lenient":false,
                     "zero_terms_query":"NONE",
                     "auto_generate_synonyms_phrase_query":true,
                     "boost":2.0
                  }
               }
            },
            {
               "match":{
                  "security-question":{
                     "query":"xxx?",
                     "operator":"OR",
                     "prefix_length":0,
                     "max_expansions":50,
                     "fuzzy_transpositions":true,
                     "lenient":false,
                     "zero_terms_query":"NONE",
                     "auto_generate_synonyms_phrase_query":true,
                     "boost":1.0
                  }
               }
            },
            {
               "match":{
                  "security-answer":{
                     "query":"xxx ",
                     "operator":"OR",
                     "prefix_length":0,
                     "max_expansions":50,
                     "fuzzy_transpositions":true,
                     "lenient":false,
                     "zero_terms_query":"NONE",
                     "auto_generate_synonyms_phrase_query":true,
                     "boost":1.0
                  }
               }
            }
         ],
         "boost":1.0
      }
   }
}

所有标准都考虑一个分数 ,我设置了一个tie_breaker0.5以便结果的分数将是所有分数中的最大值,再加0.5上其余分数的倍数。

使用很少的输入组合执行查询,

  • 在某些情况下,我得到了很好的分数,可以很好地匹配,
  • 在其他情况下,即使期望分数相同或足够高,我也会得到非常低的分数,因为似乎跳过了一些最相关的匹配子句。

"explain": true事实上,我已经在解释中调试了查询执行

  • 第一个结果在所有查询子句中得分很高,
  • 第二个(从数据中得分应该足够)只是得分较低,并且某些子句没有出现在解释中,就好像它们被排除/忽略了一样。

我想了解为什么在某些情况下会忽略/跳过这些。有人知道这是否可能是 ES 在 Solr 中构建查询的方式中的一个问题?

请参见下面的结果示例(所有数据都被遮盖了,但结果在不同的字段中会非常接近)。

{
    "took": 312,
    "timed_out": false,
    "_shards": {
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": {
            "value": 10000,
            "relation": "gte"
        },
        "max_score": 164.04868,
        "hits": [
            {
                "_shard": "[my-shard][0]",
                "_node": "acccSjFfQnOffqbiHV5nAg",
                "_index": "my-index",
                "_type": "_doc",
                "_id": "360086",
                "_score": 164.04868,
                "_source": {
                    "id": "360086",
                    "user-id": 389872,
                    "username": "xxx",
                    "email": "xxx@gmail.com",
                    "password-hash": "xxx",
                    "currency-code": "xxx",
                    "first-name": "xxx",
                    "last-name": "xxx",
                    "date-of-birth": "xxxx-xx-xx",
                    "address": "xxx",
                    "city": "N/A",
                    "postal-code": "xxx",
                    "country-code": "xxx",
                    "phone": "xxx",
                    "security-question": "xxx?",
                    "security-answer": "xxx "
                },
                "_explanation": {
                    "value": 164.04868,
                    "description": "max plus 0.5 times others of:",
                    "details": [
                        {
                            "value": 22.275639,
                            "description": "weight(username:xxx in 3223) [PerFieldSimilarity], result of:",
                            "details": [...]
                        },
                        {
                            "value": 0.88973737,
                            "description": "weight(email:gmail.com in 3223) [PerFieldSimilarity], result of:",
                            "details": [...]
                        },
                        {
                            "value": 12.44133,
                            "description": "weight(email:XXX in 3223) [PerFieldSimilarity], result of:",
                            "details": [...]
                        },
                        {
                            "value": 11.066888,
                            "description": "weight(password-hash:XXX in 3223) [PerFieldSimilarity], result of:",
                            "details": [...]
                        },
                        {
                            "value": 12.547058,
                            "description": "weight(first-name:XXX in 3223) [PerFieldSimilarity], result of:",
                            "details": [...]
                        },
                        {
                            "value": 12.619294,
                            "description": "weight(last-name:XXX in 3223) [PerFieldSimilarity], result of:",
                            "details": [...]
                        },
                        {
                            "value": 13.881224,
                            "description": "max of:",
                            "details": [
                                {
                                    "value": 13.801764,
                                    "description": "weight(first-name:xxx in 3223) [PerFieldSimilarity], result of:",
                                    "details": [...]
                                },
                                {
                                    "value": 13.881224,
                                    "description": "weight(last-name:xxx in 3223) [PerFieldSimilarity], result of:",
                                    "details": [...]
                                }
                            ]
                        },
                        {
                            "value": 1.0,
                            "description": "date-of-birth:[XXX TO XXX]",
                            "details": []
                        },
                        {
                            "value": 92.82605,
                            "description": "sum of:",
                            "details": [
                                {
                                    "value": 46.42945,
                                    "description": "sum of:",
                                    "details": [
                                        {
                                            "value": 46.42945,
                                            "description": "weight(first-name:XXX in 3223) [PerFieldSimilarity], result of:",
                                            "details": [...]
                                        }
                                    ]
                                },
                                {
                                    "value": 43.3966,
                                    "description": "sum of:",
                                    "details": [
                                        {
                                            "value": 43.3966,
                                            "description": "weight(last-name:XXX in 3223) [PerFieldSimilarity], result of:",
                                            "details": [...]
                                        }
                                    ]
                                },
                                {
                                    "value": 3.0,
                                    "description": "date-of-birth:[XXX TO XXX]^3.0",
                                    "details": []
                                }
                            ]
                        },
                        {
                            "value": 17.182709,
                            "description": "weight(postal-code:XXX  in 3223) [PerFieldSimilarity], result of:",
                            "details": []
                        },
                        {
                            "value": 0.6058445,
                            "description": "weight(country-code:XXX in 3223) [PerFieldSimilarity], result of:",
                            "details": []
                        },
                        {
                            "value": 24.692732,
                            "description": "weight(phone:XXX in 3223) [PerFieldSimilarity], result of:",
                            "details": [...]
                        },
                        {
                            "value": 4.2287235,
                            "description": "sum of:",
                            "details": [...]
                                },
                                {
                                    "value": 0.7475863,
                                    "description": "weight(security-question:XXX in 3223) [PerFieldSimilarity], result of:",
                                    "details": [...]
                                },
                                {
                                    "value": 1.7405679,
                                    "description": "weight(security-question:XXX in 3223) [PerFieldSimilarity], result of:",
                                    "details": [...]
                                },
                                {
                                    "value": 1.7405679,
                                    "description": "weight(security-question:XXX in 3223) [PerFieldSimilarity], result of:",
                                    "details": [...]
                                }
                            ]
                        },
                        {
                            "value": 9.014062,
                            "description": "weight(security-answer:XXX in 3223) [PerFieldSimilarity], result of:",
                            "details": [...]
                        }
                    ]
                }
            },
            {
                "_shard": "[my-shard][0]",
                "_node": "acccSjFfQnOffqbiHV5nAg",
                "_index": "my-index",
                "_type": "_doc",
                "_id": "359895",
                "_score": 40.7084,
                "_source": {
                    "id": "359895",
                    "user-id": 389681,
                    "username": "XXX",
                    "email": "XXX@icloud.com",
                    "password-hash": "XXX",
                    "currency-code": "XXX",
                    "first-name": "XXX",
                    "last-name": "XXX",
                    "date-of-birth": "1973-03-01",
                    "address": "XXX",
                    "city": "N/A",
                    "postal-code": "XXX ",
                    "country-code": "XXX",
                    "phone": "XXX",
                    "security-question": "XXX?",
                    "security-answer": "XXX"
                },
                "_explanation": {
                    "value": 40.7084,
                    "description": "max plus 0.5 times others of:",
                    "details": [
                        {
                            "value": 1.0,
                            "description": "date-of-birth:[XXX TO XXX]",
                            "details": []
                        },
                        {
                            "value": 17.182709,
                            "description": "weight(postal-code:XXX  in 3183) [PerFieldSimilarity], result of:",
                            "details": [...]
                        },
                        {
                            "value": 0.6058445,
                            "description": "weight(country-code:XXX in 3183) [PerFieldSimilarity], result of:",
                            "details": [...]
                        },
                        {
                            "value": 24.692732,
                            "description": "weight(phone:XXX in 3183) [PerFieldSimilarity], result of:",
                            "details": [...]
                        },
                        {
                            "value": 4.2287235,
                            "description": "sum of:",
                            "details": [
                                {
                                    "value": 1.5324217E-6,
                                    "description": "weight(security-question:XXX in 3183) [PerFieldSimilarity], result of:",
                                    "details": [...]
                                },
                                {
                                    "value": 0.7475863,
                                    "description": "weight(security-question:XXX in 3183) [PerFieldSimilarity], result of:",
                                    "details": [...]
                                },
                                {
                                    "value": 1.7405679,
                                    "description": "weight(security-question:XXX in 3183) [PerFieldSimilarity], result of:",
                                    "details": [...]
                                },
                                {
                                    "value": 1.7405679,
                                    "description": "weight(security-question:XXX in 3183) [PerFieldSimilarity], result of:",
                                    "details": [...]
                                }
                            ]
                        },
                        {
                            "value": 9.014062,
                            "description": "weight(security-answer:XXX in 3183) [PerFieldSimilarity], result of:",
                            "details": [...]
                        }
                    ]
                }
            }

4

0 回答 0