扩展@Michael Korbakov 的答案,我用mongo
shell 脚本实现了他的步骤(参见MongoDB 参考手册关于mongo
shell 脚本)。
重要提示:如MongoDB 参考手册中所述,在 shell 上运行脚本mongo
有助于提高性能,因为它减少了每次批量获取和批量执行的连接延迟。
应该考虑的一个缺点是mongo
shell 命令总是同步的,但是批量执行已经为我们处理了并行性(对于每个块),所以我们很适合这个用例。
代码:
// constants
var sourceDbName = 'sourceDb';
var sourceCollectionName = 'sourceColl';
var destDbName = 'destdb';
var destCollectionName = 'destColl';
var bulkWriteChunckSize = 1000;
// for fetching, I figured 1000 for current bulkWrite, and +1000 ready for next bulkWrite
var batchSize = 2000;
var sourceDb = db.getSiblingDB(sourceDbName);
var destDb = db.getSiblingDB(destDbName);
var start = new Date();
var cursor = sourceDb[sourceCollectionName].find({}).noCursorTimeout().batchSize(batchSize);
var currChunkSize = 0;
var bulk = destDb[destCollectionName].initializeUnorderedBulkOp();
cursor.forEach(function(doc) {
currChunkSize++;
bulk.insert({
...doc,
newProperty: 'hello!',
}); // can be changed for your need, if you want update instead
if (currChunkSize === bulkWriteChunckSize) {
bulk.execute();
// each bulk.execute took for me 130ms, so i figured to wait the same time as well
sleep(130);
currChunkSize = 0;
bulk = destDb[destCollectionName].initializeUnorderedBulkOp();
}
});
if (currChunkSize > 0) {
bulk.execute();
currChunkSize = 0;
}
var end = new Date();
print(end - start);
cursor.close();