0

我有一个数据集如下:

    { "_id" : "279771168740729_161573583988659_462046", "user_likes" : false, "message" : "good morning ICICI Bank have a great day...waiting for today surprise", "like_count" : 0, "message_tags" : [ { "id" : "279771168740729", "name" : "ICICI Bank", "length" : 10, "offset" : 13, "type" : "page" } ], "page_username" : "icicibank", "page_id" : "279771168740729", "can_remove" : false, "from" : { "id" : "100002801855936", "name" : "Kowshik Krankz" }, "page_name" : "ICICI Bank", "post_id" : "279771168740729_161573583988659", "created_time" : "2012-11-03T04:10:31+0000" }
    { "_id" : "279771168740729_203743029752972", "icon" : "http://static.ak.fbcdn.net/rsrc.php/v2/yj/r/v2OnaTyTQZE.gif", "link" : "http://youtu.be/eKxIbLVRHRE", "page_username" : "icicibank", "caption" : "www.youtube.com", "from" : { "id" : "279771168740729", "category" : "Bank/financial institution", "name" : "ICICI Bank" }, "type" : "video", "updated_time" : "2012-07-18T04:32:24+0000", "shares" : { "count" : 40 }, "id" : "279771168740729_203743029752972", "message" : "Like Raghu, you too could be at the wrong place at the wrong time. But would you be able to clear your unpaid bills like Raghu did? Now you can! To know how, check out this video. For more details, visit http://bit.ly/NsoCY3", "picture" : "http://external.ak.fbcdn.net/safe_image.php?d=AQADR4-ELAVCbuSI&w=130&h=130&url=http%3A%2F%2Fi2.ytimg.com%2Fvi%2FeKxIbLVRHRE%2Fmqdefault.jpg", "source" : "http://www.youtube.com/v/eKxIbLVRHRE?version=3&autohide=1&autoplay=1", "status_type" : "shared_story", "likes" : { "count" : 643, "data" : [ { "id" : "100002247030669", "name" : "Angel Zoya" }, { "id" : "100002257585478", "name" : "Rakesh Kumar" }, { "id" : "100002062205767", "name" : "P.k. Choudhury" }, { "id" : "100000484071154", "name" : "Balaji Jadhvar" } ] }, "name" : "ICICI Bank", "page_id" : "279771168740729", "page_name" : "ICICI Bank", "created_time" : "2012-07-18T04:32:24+0000", "comments" : { "count" : 48 }, "actions" : [ { "link" : "http://www.facebook.com/279771168740729/posts/203743029752972", "name" : "Comment" }, { "link" : "http://www.facebook.com/279771168740729/posts/203743029752972", "name" : "Like" } ] }
    { "_id" : "279771168740729_203743029752972_572142", "user_likes" : false, "message" : ":-)", "like_count" : 4, "page_username" : "icicibank", "page_id" : "279771168740729", "can_remove" : false, "from" : { "id" : "1060073189", "name" : "Raja Bhowmik" }, "page_name" : "ICICI Bank", "post_id" : "279771168740729_203743029752972", "created_time" : "2012-07-18T04:33:57+0000" }
    { "_id" : "279771168740729_203743029752972_572155", "user_likes" : false, "message" : "@?", "like_count" : 4, "page_username" : "icicibank", "page_id" : "279771168740729", "can_remove" : false, "from" : { "id" : "100001965306815", "name" : "Akhil Pandit" }, "page_name" : "ICICI Bank", "post_id" : "279771168740729_203743029752972", "created_time" : "2012-07-18T04:39:55+0000" }
    { "_id" : "279771168740729_203743029752972_572157", "user_likes" : false, "message" : "This ad is in very bad taste given the timing of it's release and the passing away of Satwik in the Bannerghata forests in Bangalore. Maybe there is no relation, but the similarity of the situation is uncanny.", "like_count" : 4, "page_username" : "icicibank", "page_id" : "279771168740729", "can_remove" : false, "from" : { "id" : "588391958", "name" : "Vijay Alphonse" }, "page_name" : "ICICI Bank", "post_id" : "279771168740729_203743029752972", "created_time" : "2012-07-18T04:41:05+0000" }
    { "_id" : "279771168740729_203743029752972_572182", "user_likes" : false, "message" : "Lv 2 do job in a bank", "like_count" : 6, "page_username" : "icicibank", "page_id" : "279771168740729", "can_remove" : false, "from" : { "id" : "100002492179903", "name" : "Monica Chandwani" }, "page_name" : "ICICI Bank", "post_id" : "279771168740729_203743029752972", "created_time" : "2012-07-18T04:48:51+0000" }

{ "_id" : "279771168740729_203743029752972_572228", "user_likes" : false, "message" : "R u working in ici bnk", "like_count" : 4, "page_username" : "icicibank", "page_id" : "279771168740729", "can_remove" : false, "from" : { "id" : "100002412887446", "name" : "Brijesh Gaur" }, "page_name" : "ICICI Bank", "post_id" : "279771168740729_203743029752972", "created_time" : "2012-07-18T05:10:06+0000" }

在这里,我需要根据喜欢的次数(like_count 键的值)显示前 2 个帖子。因此,ID 为 279771168740729_203743029752972_572182 的帖子将排在第一位(6 是最高的点赞数),ID 为 279771168740729_203743029752972_572142 的帖子排在第二位(4 是次高的),依此类推。

我想出了两个步骤:

  1. 发出 likeCount 和 postId
  2. 对 likeCount 进行降序排序并显示前两个条目

因此 :

var mapFunction = function() {
    var likeCount = this.like_count;
    var postId = this._id;

    if(postId != null && likeCount  !=  null){
        emit(likeCount, postId);
    }
};

var reduceFuntion = function(likeCount, postIdCollection) {
/*How to maintain a single sorted list of likeCount and show the corresponding post?*/

};

根据 mongo db doc,我已经对排序功能感到困惑。- 请参考这篇文章

4

1 回答 1

1

除非你真的打算用 MapReduce 功能做其他事情,否则你最好只使用一个普通的 Mongo 查询。你最好的选择是只使用一个查找查询:

db.collectionName.find().sort({ like_count: -1 }).limit(2);

如果您要处理大量数据,我还会推荐并索引 like_count 列:

db.collectionName.ensureIndex({like_count: -1})

如果你真的很想用 map reduce 来做,那么你可能会想要在 map reduce 命令上使用 sort an limit 选项

db.collectionName.mapReduce(mapFunction, reduceFunction, { sort: {like_count: -1, limit: 2}})

它本质上对输入的数据集执行相同的查询,然后在输出的路上将其切碎,但是,这意味着 MapReduce 步骤对您的作用并不大。

如果你想尝试用纯 MapReduce 来做,那么你需要一种完全不同的方法来处理你的 map 和 reduce 函数。MapReduce 进程在它的键上有一个隐式排序,这意味着,你可以运行这样的东西:

var mapFunction = function() {
    var likeCount = - this.like_count;
    var postId = this._id;

    if(postId != null && likeCount !=  null){
        emit(likeCount,postId);
    }
};

var reduceFunction = function(a,b) {
    var out = b.join();
    return(a, out);
};

db.test.mapReduce(mapFunction, reduceFunction, {out: { inline: 1 }, limit: 2});

然后处理结果集的最后一个成员以从末尾抓取条目,然后向外扩展以抓取帖子,尽管您需要对该结果集进行一些展开以使其合理。请注意,由于隐式排序顺序在增加,我们实际上发出的是负数 like_count,而不是正数,这意味着我们可以使用 limit。这不是严格意义上的前两个帖子,而是前两个 like_count 值以及与它们关联的所有帖子,因此您仍然需要进行一些后期处理。

当然,如果您想尝试另一种方法,也可以使用聚合框架:

db.collectionName.aggregate([{$sort: { like_count: -1 }}, {$limit: 2}]);
于 2013-08-17T09:59:34.850 回答