2

我有一组实体,代表一棵树。每个实体都有一个包含属性数组的属性。

例如:

{
    "_id" : 1,
    "parent_id" : null,
    "attributes" : [ "A", "B", "C" ]
}

我想使用 MapReduce 生成另一个类似于原始集合的集合,但是对于集合中的每个项目,它不仅包含与实体直接关联的属性,还包含其祖先的属性,一直到等级制度的根源。

所以给定以下实体:

{
    "_id" : 1,
    "parent_id" : null,
    "attributes" : [ "A", "B", "C" ]
}

{
    "_id" : 2,
    "parent_id" : 1,
    "attributes" : [ "D", "E", "F" ]
}

{
    "_id" : 3,
    "parent_id" : 2,
    "attributes" : [ "G", "H", "I" ]
}

MapReduce 作业的结果如下:

{
    "_id" : 1,
    "attributes" : [ "A", "B", "C" ]
}

{
    "_id" : 2,
    "attributes" : [ "A", "B", "C", "D", "E", "F" ]
}

{
    "_id" : 3,
    "attributes" : [ "A", "B", "C", "D", "E", "F", "G", "H", "I" ]
}

我已经管理生产 MapReduce 作业,这些作业做一些简单的事情,比如计算每个实体的属性,但我不知道如何处理层次结构。我对存储数据的替代方法持开放态度,但不想将整个层次结构存储在单个文档中。

在 MongoDB 中使用 MapReduce 是否可以实现这种精简,还是我只是以错误的方式思考问题?

4

1 回答 1

5

好的,所以我认为这不会非常高效/可扩展,因为您必须递归地从子节点中找到父 ID。但是,它确实提供了您想要的输出。

var mapFunc = function(doc, id) {
  // if this is being invoked by mapReduce, it won't pass any parameters 
  if(doc == null) {
    doc = this; 
    id = this._id; 
  } else if (doc.parent_id != null) {
    // if this is a recursive call, find the parent
    doc = db.test.findOne({_id:doc.parent_id});
  }
  // emit the id, which is always the id of the child node (starting point), and the attributes
  emit(id, {attributes: doc.attributes}); 
  // if parent_id is not null, call mapFunc with the hidden parameters
  if(doc.parent_id != null) {
    // recursive mapFunc call
    mapFunc(doc, id); 
  } 
}
// since we're going to call this from within mapReduce recursively, we have to save it in the system JS
db.system.js.save({ "_id" : "mapFunc", "value" : mapFunc});

var reduceFunc = function(key, values) {
  var result = {attributes:[]}; 
  values.forEach(function(value) {
    // concat the result to the new values (I don't think order is guaranteed here)
    result.attributes = value.attributes.concat(result.attributes);
  }); 
  return result; 
}

// this just moves the attributes up a level
var finalize = function(key, value) {return value.attributes};

// quick test...
db.test.mapReduce(mapFunc, reduceFunc, {out: {inline: 1}, finalize: finalize});

提供:

"results" : [
    {
        "_id" : 1,
        "value" : [
            "A",
            "B",
            "C"
        ]
    },
    {
        "_id" : 2,
        "value" : [
            "A",
            "B",
            "C",
            "D",
            "E",
            "F"
        ]
    },
    {
        "_id" : 3,
        "value" : [
            "A",
            "B",
            "C",
            "D",
            "E",
            "F",
            "G",
            "H",
            "I"
        ]
    }
],
"timeMillis" : 2,
"counts" : {
    "input" : 3,
    "emit" : 6,
    "reduce" : 2,
    "output" : 3
},
"ok" : 1,
}
于 2012-07-06T16:51:41.423 回答