0

我在 Mongo 有一个集合users,我执行这个 map reduce,我相信它相当于 COUNT(*) GROUP BY origin

> m = function() { for (i in this.membership) {
... emit( this.membership[i].platform_profile.origin, 1 );
... }  }
function () {
    for (i in this.membership) {
        emit(this.membership[i].platform_profile.origin, 1);
    }
}
> r = function( id, values ) { var result = 0; 
... for ( var i = 0; i < values.length; i ++ ) { result += values[i];  }
... return result; }
function (id, values) {
    var result = 0;
    for (var i = 0; i < values.length; i++) {
        result += values[i];
    }
    return result;
}
> db.users.mapReduce(m, r, {out : { inline: 1}});
{
    "results" : [
        {
            "_id" : 0,
            "value" : 15
        },
        {
            "_id" : 1,
            "value" : 449
        },
    ...
}

但是,如果我尝试计算有多少文档将此字段设置为特定值,例如1,我得到的结果会更少:

db.users.count({"membership.platform_profile.origin": 1});

424

我错过了什么?

4

3 回答 3

2

您是否有机会使用稀疏索引进行计数查询?我唯一的猜测是,如果某些其他查询条件导致索引中缺少的文档被从计数中忽略。

我使用一些固定数据重新创建了您的架构,并且 map/reduce 和简单计数查询之间的结果是一致的:

db.users.drop();

var map = function() {
    for (i in this.membership) {
        emit(this.membership[i].platform_profile.origin, 1);
    }
};

var reduce = function(id, values ) {
    var result = 0;
    for (var i = 0; i < values.length; i++) {
        result += values[i];
    }
    return result;
}

var origins = {1: "a", 2: "b", 3: "c", 4: "d"};

for (var i = 0; i < 1000; ++i) {
    var membership = [];

    for (var o in origins) {
        if (0 == i % o) {
            membership.push({ platform_profile: { origin: origins[o] }});
        }
    }

    db.users.save({ membership: membership });
}

db.users.mapReduce(map, reduce, {out: {inline: 1}}).results.forEach(function(result){
    print(result["_id"] + ": " + result["value"]);
});

for (var o in origins) {
    print(origins[o] + ": " + db.users.count({"membership.platform_profile.origin": origins[o]}));
}

这是输出:

$ mongo --quiet mr_count.js 
a: 1000
b: 500
c: 334
d: 250
a: 1000
b: 500
c: 334
d: 250
于 2012-07-10T19:51:00.080 回答
1

您可以使用以下 map/reduce 等效于COUNT(*) GROUP BY origin

映射/减少功能:

map = function() {
    if(!this.membership) return;

    for (i in this.membership) {
        if(!this.membership[i].platform_profile || !this.membership[i].platform_profile.origin) return;
        emit(this.membership[i].platform_profile.origin, 1);
    }
}

reduce = function(key, values) {
    var count = 0;

    for (v in values) {
        count += values[v];
    }
    return count;
}

result = db.runCommand({
        "mapreduce" : "users", 
        "map" : map,
        "reduce" : reduce,
        "out" : "users_count"
});
于 2012-07-09T08:48:45.633 回答
1

我遇到过同样的问题。我替换x.length by Array.sum(x)了 reduce 函数(假设你在 map 函数中发出 1 )并且它可以工作。我同意x.length也应该工作,但我无法解释为什么它不工作。

于 2014-11-05T16:09:41.417 回答