有时我发现索引已自动用于查询,即使操作员$not
加入了操作。它让我想起了这个问题,这个问题也让我困惑了很长时间。我尝试了新的线索并发现了一些不同的东西。我想我终于找到了答案。欢迎大家在这里发表评论,如果发现其他不同之处。
在 mongo shell V2.6.4 上运行
初始化数据如下:
> db.a.drop();
false
> db.a.insert({_id:1, a:[1,2,3], b:[{x:1, y:2}, {x:4, y:4}], c:1});
WriteResult({ "nInserted" : 1 })
> db.a.insert({_id:2, a:[4,2,3], b:[{x:1, y:2}, {x:4, y:4}], c:1});
WriteResult({ "nInserted" : 1 })
> db.a.ensureIndex({a:1}, {name:"a"});
{
"createdCollectionAutomatically" : false,
"numIndexesBefore" : 1,
"numIndexesAfter" : 2,
"ok" : 1
}
> db.a.ensureIndex({"b.x":1}, {name:"bx"});
{
"createdCollectionAutomatically" : false,
"numIndexesBefore" : 2,
"numIndexesAfter" : 3,
"ok" : 1
}
> db.a.ensureIndex({c:1}, {name:"c"});
{
"createdCollectionAutomatically" : false,
"numIndexesBefore" : 3,
"numIndexesAfter" : 4,
"ok" : 1
}
> db.a.getIndexes();
[
{
"v" : 1,
"key" : {
"_id" : 1
},
"name" : "_id_",
"ns" : "test.a"
},
{
"v" : 1,
"key" : {
"a" : 1
},
"name" : "a",
"ns" : "test.a"
},
{
"v" : 1,
"key" : {
"b.x" : 1
},
"name" : "bx",
"ns" : "test.a"
},
{
"v" : 1,
"key" : {
"c" : 1
},
"name" : "c",
"ns" : "test.a"
}
]
> db.a.find();
{ "_id" : 1, "a" : [ 1, 2, 3 ], "b" : [ { "x" : 1, "y" : 2 }, { "x" : 2, "y" : 3 } ], "c" : 1 }
{ "_id" : 2, "a" : [ 4, 2, 3 ], "b" : [ { "x" : 1, "y" : 2 }, { "x" : 4, "y" : 4 } ], "c" : 1 }
该块只是简单地证明即使$not
加入查询操作,索引也会自动正确使用。
> db.a.find({c:{$not:{$gte:1}}}).explain();
{
"cursor" : "BtreeCursor c",
"isMultiKey" : false,
"n" : 0,
"nscannedObjects" : 0,
"nscanned" : 1,
"nscannedObjectsAllPlans" : 0,
"nscannedAllPlans" : 1,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 0,
"indexBounds" : {
"c" : [
[
{
"$minElement" : 1
},
1
],
[
Infinity,
{
"$maxElement" : 1
}
]
]
},
"server" : "Duke-PC:27017",
"filterSet" : false
}
这是原始问题提到的样式。索引已被自动使用。
> db.a.find({b:{$elemMatch:{x:{$gte:1}}}}).explain();
{
"cursor" : "BtreeCursor bx", // attention on this line
"isMultiKey" : true,
"n" : 2,
"nscannedObjects" : 2,
"nscanned" : 4,
"nscannedObjectsAllPlans" : 2,
"nscannedAllPlans" : 4,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 9,
"indexBounds" : {
"b.x" : [
[
1,
Infinity
]
]
},
"server" : "Duke-PC:27017",
"filterSet" : false
}
$not
使用前置运算符时索引不起作用$elemMatch
。这是这个问题的核心。
> db.a.find({b:{$not:{$elemMatch:{x:{$gte:1}}}}}).explain();
{
"cursor" : "BasicCursor", // attention on this line
"isMultiKey" : false,
"n" : 0,
"nscannedObjects" : 2,
"nscanned" : 2,
"nscannedObjectsAllPlans" : 2,
"nscannedAllPlans" : 2,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 0,
"server" : "Duke-PC:27017",
"filterSet" : false
}
这个块:找到一些方法来解释数组字段索引的机制。
总共两个文件,但是nscanned: 6
. 这告诉我们索引是如何根据数组类型构建的。也就是说,索引节点位于数组的每个元素上,而不是数组本身。我想象字段上的索引结构a
是这样的:
BTree: Node(value:1, entry:[entry({_id:1})]), Node(value:2, entry:[entry({_id:1}), entry({_id:2})]), ...
当然,这只是我的想象来解释。:)
> db.a.find({a:{$gte:1}}).explain();
{
"cursor" : "BtreeCursor a",
"isMultiKey" : true,
"n" : 2,
"nscannedObjects" : 2,
"nscanned" : 6, // attention on this line
"nscannedObjectsAllPlans" : 2,
"nscannedAllPlans" : 6,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 0,
"indexBounds" : {
"a" : [
[
1,
Infinity
]
]
},
"server" : "Duke-PC:27017",
"filterSet" : false
}
使用运算符 $not 时,已自动采用相关索引。“indexBounds”字段告诉我们如何$not
处理查询。
> db.a.find({a:{$not:{$gte:2}}},{_id:0,a:1}).explain();
{
"cursor" : "BtreeCursor a",
"isMultiKey" : true,
"n" : 0,
"nscannedObjects" : 1, // attention on this field
"nscanned" : 2, // attention on this field
"nscannedObjectsAllPlans" : 1,
"nscannedAllPlans" : 2,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 0,
"indexBounds" : { // attention on this field
"a" : [
[
{
"$minElement" : 1
},
2
],
[
Infinity,
{
"$maxElement" : 1
}
]
]
},
"server" : "Duke-PC:27017",
"filterSet" : false
}
插入一个具有相同字段名称a
但不是数组的新文档。
> db.a.insert({a:1});
WriteResult({ "nInserted" : 1 })
> db.a.find();
{ "_id" : 1, "a" : [ 1, 2, 3 ], "b" : [ { "x" : 1, "y" : 2 }, { "x" : 2, "y" : 3 } ], "c" : 1 }
{ "_id" : 2, "a" : [ 4, 2, 3 ], "b" : [ { "x" : 1, "y" : 2 }, { "x" : 4, "y" : 4 } ], "c" : 1 }
{ "_id" : ObjectId("541e4fcbb65042180c128280"), "a" : 1 }
请阅读此块与上述内容进行比较。
> db.a.find({a:{$not:{$gte:2}}},{_id:0,a:1}).explain();
{
"cursor" : "BtreeCursor a",
"isMultiKey" : true, // This tells engine there are repeated array elements on index.
"n" : 1,
"nscannedObjects" : 2, // The third document should only access the index to fetch data
// since it has enough information.
// But here engine still read from the collection. My unstanding is the engine
// can not distinguish whether this index field is an array element or not,
// so it has to access the collection to find more information.
"nscanned" : 3,
"nscannedObjectsAllPlans" : 2,
"nscannedAllPlans" : 3,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 25,
"indexBounds" : {
"a" : [
[
{
"$minElement" : 1
},
2
],
[
Infinity,
{
"$maxElement" : 1
}
]
]
},
"server" : "Duke-PC:27017",
"filterSet" : false
}
结论:
elemMatch
很特别:
$elemMatch
明确告诉字段“b”是一个数组。
- 并且根据该运算符上的查询定义,找到与查询匹配的任何元素
true
都可以立即返回。但是只有完成对数组的所有元素的扫描,没有找到任何满意的元素,然后false
才能返回。
But index structure (think about my imagination above) on array can not support this kind of operation because engine can not determine which nodes on index are exactly from a certain array, if only by index. This is the most important point to explain this question.
- 其他算子从自己的查询定义中没有这个限制,比如$gte, $lt, ...,因为只有一个匹配可以判断是否匹配,可以直接通过索引来满足。
最后,有一种方法可以解决原始问题,但并不完美,因为必须提供整个元素。
数组字段上的索引,而不是元素上的索引。
> db.a.ensureIndex({b:1});
{
"createdCollectionAutomatically" : false,
"numIndexesBefore" : 4,
"numIndexesAfter" : 5,
"ok" : 1
}
> db.a.find({b:{$ne:{x:2, y:3}}}).explain();
{
"cursor" : "BtreeCursor b_1",
"isMultiKey" : true,
"n" : 1,
"nscannedObjects" : 2,
"nscanned" : 4,
"nscannedObjectsAllPlans" : 2,
"nscannedAllPlans" : 4,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 33,
"indexBounds" : {
"b" : [
[
{
"$minElement" : 1
},
{
"x" : 2,
"y" : 3
}
],
[
{
"x" : 2,
"y" : 3
},
{
"$maxElement" : 1
}
]
]
},
"server" : "Duke-PC:27017",
"filterSet" : false
}