假设我有来自 Twitter API 的流数据,并且我将数据作为文档存储在 MongoDB 中。我试图找到的是screen_name
under的计数entities.user_mentions
。
{
"_id" : ObjectId("50657d5844956d06fb5b36c7"),
"contributors" : null,
"text" : "",
"entities" : {
"urls" : [ ],
"hashtags" : [
{
"text" : "",
"indices" : [
26,
30
]
},
{
"text" : "",
"indices" : []
}
],
"user_mentions" : [
{
"name":"Twitter API",
"indices":[4,15],
"screen_name":"twitterapi",
"id":6253282, "id_str":"6253282"
}]
},
...
我尝试使用 map reduce:
map = function() {
if (!this.entities.user_mentions.screen_name) {
return;
}
for (index in this.entities.user_mentions.screen_name) {
emit(this.entities.user_mentions.screen_name[index], 1);
}
}
reduce = function(previous, current) {
var count = 0;
for (index in current) {
count += current[index];
}
return count;
}
result = db.runCommand({
"mapreduce" : "twitter_sample",
"map" : map,
"reduce" : reduce,
"out" : "user_mentions"
});
但它并不完全工作......