1

这个查询是否有可能没有很好地查询优化,并且有人可能会提示我如何让它变得更好?目前在我杀死它之前它会持续几个小时。

数字:

  • 少于 200 万条小记录的集合(需要 2 GB RAM)
  • 有 16 个索引(需要 1.2 GB RAM)

arangosh [SES]> db.AuditsSearch.figures()

{
  "alive" : {
    "count" : 1940004,
    "size" : 2052009624
  },
  "dead" : {
    "count" : 397017,
"size" : 431456792,
"deletion" : 52950
  },
  "datafiles" : {
    "count" : 20,
    "fileSize" : 2132549880
  },
  "journals" : {
    "count" : 1,
    "fileSize" : 67108864
  },
  "compactors" : {
    "count" : 1,
    "fileSize" : 256528080
  },
  "shapefiles" : {
    "count" : 0,
    "fileSize" : 0
  },
  "shapes" : {
    "count" : 1004,
    "size" : 1310704
  },
  "attributes" : {
    "count" : 65,
    "size" : 3408
  },
  "indexes" : {
    "count" : 16,
    "size" : 1198718256
  },
  "lastTick" : "14686717826252",
  "uncollectedLogfileEntries" : 0
}

询问:

FOR a IN AuditsSearch

  // split the task as it currently is not possible to execute at once:
  FILTER IS_NULL(a.analytics)
  LIMIT 200000, 200000
  // end of split ... which also does not work

    LET utcTimestamp = DATE_TIMESTAMP(a.timestamp)
    LET intNumResults = TO_NUMBER(a.resultcount)
    LET intDuration = TO_NUMBER(a.duration)
    LET url = SPLIT(a.docid, "|")[1]

UPDATE a WITH { "analytics": { "utcTimestamp": utcTimestamp, "duration": intDuration, "numResults": intNumResults, "url": url } } IN AuditsSearch

索引:

[
{"id":"AuditsSearch/0","type":"primary","unique":true,"fields":["_key"]},
{"id":"AuditsSearch/13943073289094","type":"hash","unique":false,"fields":["eventtype"]},
{"id":"AuditsSearch/13943144067974","type":"hash","unique":false,"fields":["profile"]},
{"id":"AuditsSearch/13943163138950","type":"hash","unique":false,"fields":["sessionid"]},
{"id":"AuditsSearch/13943169299334","type":"hash","unique":false,"fields":["resultid"]},
{"id":"AuditsSearch/13943195644806","type":"skiplist","unique":false,"fields":["duration"]},
{"id":"AuditsSearch/13947101328262","type":"skiplist","unique":false,"fields":["timestamp"]},
{"id":"AuditsSearch/14023678636934","type":"skiplist","unique":false,"fields":["analytics.utcTimestamp"]},
{"id":"AuditsSearch/14064254132425","type":"skiplist","unique":false,"fields":["resultcount"]},
{"id":"AuditsSearch/14101960466633","type":"skiplist","unique":false,"fields":["analytics.duration"]},
{"id":"AuditsSearch/14101968134345","type":"skiplist","unique":false,"fields":["analytics.numResults"]},
{"id":"AuditsSearch/14140104909001","type":"hash","unique":false,"fields":["analytics.url"]},
{"id":"AuditsSearch/14168504672457","type":"skiplist","unique":false,"fields":["sessionid"]},
{"id":"AuditsSearch/14168754823369","type":"skiplist","unique":false,"fields":["eventtype"]},
{"id":"AuditsSearch/14169726263497","type":"hash","unique":false,"fields":["isadmin"]},
{"id":"AuditsSearch/14169732554953","type":"hash","unique":false,"fields":["isdelegatedadmin"]}
]

执行计划:

{
  "plan": {
    "nodes": [{
      "type": "SingletonNode",
      "dependencies": [],
      "id": 1,
      "estimatedCost": 1,
      "estimatedNrItems": 1
    },
    {
      "type": "EnumerateCollectionNode",
      "dependencies": [1],
      "id": 2,
      "estimatedCost": 1704564,
      "estimatedNrItems": 1704563,
      "database": "SES",
      "collection": "AuditsSearch",
      "outVariable": {
        "id": 0,
        "name": "a"
      },
      "random": false
    },
    {
      "type": "CalculationNode",
      "dependencies": [2],
      "id": 3,
      "estimatedCost": 3409127,
      "estimatedNrItems": 1704563,
      "expression": {
        "type": "function call",
        "name": "IS_NULL",
        "subNodes": [{
          "type": "array",
          "subNodes": [{
            "type": "attribute access",
            "name": "analytics",
            "subNodes": [{
              "type": "reference",
              "name": "a",
              "id": 0
            }]
          }]
        }]
      },
      "outVariable": {
        "id": 5,
        "name": "5"
      },
      "canThrow": false
    },
    {
      "type": "FilterNode",
      "dependencies": [3],
      "id": 4,
      "estimatedCost": 5113690,
      "estimatedNrItems": 1704563,
      "inVariable": {
        "id": 5,
        "name": "5"
      }
    },
    {
      "type": "CalculationNode",
      "dependencies": [4],
      "id": 6,
      "estimatedCost": 6818253,
      "estimatedNrItems": 1704563,
      "expression": {
        "type": "function call",
        "name": "DATE_TIMESTAMP",
        "subNodes": [{
          "type": "array",
          "subNodes": [{
            "type": "attribute access",
            "name": "timestamp",
            "subNodes": [{
              "type": "reference",
              "name": "a",
              "id": 0
            }]
          }]
        }]
      },
      "outVariable": {
        "id": 1,
        "name": "utcTimestamp"
      },
      "canThrow": false
    },
    {
      "type": "CalculationNode",
      "dependencies": [6],
      "id": 7,
      "estimatedCost": 8522816,
      "estimatedNrItems": 1704563,
      "expression": {
        "type": "function call",
        "name": "TO_NUMBER",
        "subNodes": [{
          "type": "array",
          "subNodes": [{
            "type": "attribute access",
            "name": "resultcount",
            "subNodes": [{
              "type": "reference",
              "name": "a",
              "id": 0
            }]
          }]
        }]
      },
      "outVariable": {
        "id": 2,
        "name": "intNumResults"
      },
      "canThrow": false
    },
    {
      "type": "CalculationNode",
      "dependencies": [7],
      "id": 8,
      "estimatedCost": 10227379,
      "estimatedNrItems": 1704563,
      "expression": {
        "type": "function call",
        "name": "TO_NUMBER",
        "subNodes": [{
          "type": "array",
          "subNodes": [{
            "type": "attribute access",
            "name": "duration",
            "subNodes": [{
              "type": "reference",
              "name": "a",
              "id": 0
            }]
          }]
        }]
      },
      "outVariable": {
        "id": 3,
        "name": "intDuration"
      },
      "canThrow": false
    },
    {
      "type": "CalculationNode",
      "dependencies": [8],
      "id": 9,
      "estimatedCost": 11931942,
      "estimatedNrItems": 1704563,
      "expression": {
        "type": "indexed access",
        "subNodes": [{
          "type": "function call",
          "name": "SPLIT",
          "subNodes": [{
            "type": "array",
            "subNodes": [{
              "type": "attribute access",
              "name": "docid",
              "subNodes": [{
                "type": "reference",
                "name": "a",
                "id": 0
              }]
            },
            {
              "type": "value",
              "value": "|"
            }]
          }]
        },
        {
          "type": "value",
          "value": 1
        }]
      },
      "outVariable": {
        "id": 4,
        "name": "url"
      },
      "canThrow": false
    },
    {
      "type": "CalculationNode",
      "dependencies": [9],
      "id": 10,
      "estimatedCost": 13636505,
      "estimatedNrItems": 1704563,
      "expression": {
        "type": "object",
        "subNodes": [{
          "type": "object element",
          "name": "analytics",
          "subNodes": [{
            "type": "object",
            "subNodes": [{
              "type": "object element",
              "name": "utcTimestamp",
              "subNodes": [{
                "type": "reference",
                "name": "utcTimestamp",
                "id": 1
              }]
            },
            {
              "type": "object element",
              "name": "duration",
              "subNodes": [{
                "type": "reference",
                "name": "intDuration",
                "id": 3
              }]
            },
            {
              "type": "object element",
              "name": "numResults",
              "subNodes": [{
                "type": "reference",
                "name": "intNumResults",
                "id": 2
              }]
            },
            {
              "type": "object element",
              "name": "url",
              "subNodes": [{
                "type": "reference",
                "name": "url",
                "id": 4
              }]
            }]
          }]
        }]
      },
      "outVariable": {
        "id": 6,
        "name": "6"
      },
      "canThrow": false
    },
    {
      "type": "LimitNode",
      "dependencies": [10],
      "id": 5,
      "estimatedCost": 13836505,
      "estimatedNrItems": 200000,
      "offset": 0,
      "limit": 200000,
      "fullCount": false
    },
    {
      "type": "UpdateNode",
      "dependencies": [5],
      "id": 11,
      "estimatedCost": 14036505,
      "estimatedNrItems": 0,
      "inDocVariable": {
        "id": 6,
        "name": "6"
      },
      "database": "SES",
      "collection": "AuditsSearch",
      "modificationFlags": {
        "ignoreErrors": false,
        "waitForSync": false,
        "nullMeansRemove": false,
        "mergeObjects": true,
        "ignoreDocumentNotFound": false
      },
      "inKeyVariable": {
        "id": 0,
        "name": "a"
      }
    }],
    "rules": ["move-calculations-up",
    "move-filters-up",
    "move-calculations-up-2",
    "move-filters-up-2"],
    "collections": [{
      "name": "AuditsSearch",
      "type": "write"
    }],
    "variables": [{
      "id": 0,
      "name": "a"
    },
    {
      "id": 1,
      "name": "utcTimestamp"
    },
    {
      "id": 4,
      "name": "url"
    },
    {
      "id": 2,
      "name": "intNumResults"
    },
    {
      "id": 3,
      "name": "intDuration"
    },
    {
      "id": 6,
      "name": "6"
    },
    {
      "id": 5,
      "name": "5"
    }],
    "estimatedCost": 14036505,
    "estimatedNrItems": 0
  },
  "warnings": [],
  "stats": {
    "rulesExecuted": 19,
    "rulesSkipped": 0,
    "plansCreated": 1
  }
}
4

1 回答 1

1

总结上述讨论的主要内容:

现在你应该使用db._explain(<your aql query goes here>)而不是获取原始执行计划。一些有经验的行为在较新的 arangodb 版本中得到了改进。

  • 优化器的决策并不总是完美的,但它的改进。优化器现在可以检测到过滤器之前的经验 LET 拉动并避免。因此,ArangoDB 团队总是热衷于了解您的查询。
  • 如果您想知道复杂查询的哪些部分导致高资源使用率,则将其拆分并尝试测量其时间的位可能很有用。
  • 有时使用临时 volatile 集合是有意义的。
  • 索引维护成本很高。如果您计划移动大量文档,则删除索引并在之后重新创建它们可能是有意义的。
  • 如果选择性变差,非 uniq 哈希索引可能需要很多性能;这意味着如果您有很多哈希冲突或重复键,则索引使用成本在插入和查询时都会增加。
  • 非唯一哈希索引的插入成本确实为 O(1)(因此索引创建仍然很快),但对于更新/删除,它需要在具有相同键的元素中找到“正确的”元素。当有许多索引条目具有相同的键时,找到“正确”的项目将需要进行n次比较(其中 n 是具有相同键的项目的数量)。唯一哈希索引当然没有这个问题,但是一个有很多相同键的非唯一索引就会显示出来。
  • 拥有许多索引总结了上述索引维护的开销。
  • truncate()涉及索引维护。您可能想要删除索引并重新创建它。
于 2016-03-31T14:27:31.007 回答