我在这里做了几个假设:
- 城市可以有多个邮政编码
- 邮政编码是唯一的
- 你不是想得到M101P第 5 周问题的答案!
与其一次性计算城市,不如在地图阶段建立一个城市/邮编对象列表,然后在地图阶段将其简化为邮编和独特城市的列表。然后您可以使用 finalize 阶段来计算平均值。
注意:如果数据集很大,您可能需要考虑使用聚合框架,这在 map/reduce 示例之后显示
db.kodypocztowe.drop();
db.result.drop();
db.kodypocztowe.insert([
{"miejscowosci_str":"OneCity", "wojewodztwo":"FirstRegionName", "ZIP-Code" : "1"},
{"miejscowosci_str":"TwoCity", "wojewodztwo":"FirstRegionName", "ZIP-Code" : "2"},
{"miejscowosci_str":"ThreeCity", "wojewodztwo":"SecondRegionName", "ZIP-Code" : "3"},
{"miejscowosci_str":"FourCity", "wojewodztwo":"SecondRegionName", "ZIP-Code" : "4"},
{"miejscowosci_str":"FourCity", "wojewodztwo":"SecondRegionName", "ZIP-Code" : "5"},
]);
// map the data to { region : [{citiy : name , zip : code }] }
// Note : a city can be in multiple zips but zips are assumed to be unique
var map = function() {
emit(this.wojewodztwo, {city:this.miejscowosci_str, zip:this['ZIP-Code']});
};
//
// convert the data to :
//
// {region : {cities: [], zips : []}}
//
// note : always add zips
// note : only add cities if they are not already there
//
var reduce = function(key, val) {
var res = {zips:[], cities:[]}
for(i in val) {
var city = val[i].city;
res.zips.push(val[i].zip);
if(res.cities.indexOf(city) == -1) {
res.cities.push(city);
}
}
return res;
};
//
// finalize the data to get the average number of zips / region
var finalize = function(key, res) {
res.average = res.zips.length / res.cities.length;
delete res.cities;
delete res.zips;
return res;
}
print("==============");
print(" map/reduce")
print("==============");
db.kodypocztowe.mapReduce(
map,
reduce,
{ out : "result" , finalize:finalize}
);
db.result.find().pretty()
print("==============");
print(" aggregation")
print("==============");
db.kodypocztowe.aggregate( [
// get the number of zips / [region,city]
{ "$group" :
{
_id : {"region" : "$wojewodztwo", city : "$miejscowosci_str"},
zips:{$sum:1}
}
},
// get the number of cities per region and sum the number of zips
{ "$group" :
{
_id : "$_id.region" ,
cities:{$sum:1},
zips:{$sum:"$zips"},
}
},
// project the data into the same format that map/reduce generated
{ "$project" :
{
"value.average":{$divide: ["$zips","$cities"]}
}
}
]);
我希望这会有所帮助。