I am trying to perform regex query on an array of strings in MongoDB collection. I could only find this limitation in the docs:
$regex can only use an index efficiently when the regular expression has an anchor for the beginning (i.e. ^) of a string and is a case-sensitive match.
Let's make a test:
> for (var i=0; i<100000; i++) db.test.insert({f: ['a_0_'+i, 'a_1_2']})
> db.test.count()
100000
> db.test.ensureIndex({f: 1})
> db.test.find({f: /^a_(0)?_12$/ })
{ "_id" : ObjectId("514ac59886f004fe03ef2a96"), "f" : [ "a_0_12", "a_1_2" ] }
> db.test.find({f: /^a_(0)?_12$/ }).explain()
{
"cursor" : "BtreeCursor f_1 multi",
"isMultiKey" : true,
"n" : 1,
"nscannedObjects" : 200000,
"nscanned" : 200000,
"nscannedObjectsAllPlans" : 200000,
"nscannedAllPlans" : 200000,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 482,
"indexBounds" : {
"f" : [
[
"a_",
"a`"
],
[
/^a_(0)?_12$/,
/^a_(0)?_12$/
]
]
},
"server" : "someserver:27017"
}
The query is sloooow. On the other hand, this query is optimal: (but doesn't suit my use case)
> db.test.find({f: 'a_0_12' }).explain()
{
"cursor" : "BtreeCursor f_1",
"isMultiKey" : true,
"n" : 1,
"nscannedObjects" : 1,
"nscanned" : 1,
"nscannedObjectsAllPlans" : 1,
"nscannedAllPlans" : 1,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 0,
"indexBounds" : {
"f" : [
[
"a_0_12",
"a_0_12"
]
]
},
"server" : "someserver:27017"
}
Why is regex query scanning all (sub)records when it has an index? What am I missing?