1

I'm inserting a lot of test records in a mongodb instance, via a mongo shell script.

I use batch inserts for performance db.messages.save(messagesBatch);

However, mongo upsert or update my data instead of inserting it !

After cleaning the collection, I run a loop for 200 inserts, via batches of 50. I end up with 51 (??) records after 4 batches, with following reports from db.getLastErrorObj():

/* 0 */
{
"n" : 0,
"connectionId" : 166,
"err" : null,
"ok" : 1
}

/* 1 */
{
"updatedExisting" : false,
"upserted" : ObjectId("527141c72a1ae75210d3a705"),
"n" : 1,
"connectionId" : 166,
"err" : null,
"ok" : 1
}

/* 2 */
{
"updatedExisting" : true,
"n" : 1,
"connectionId" : 166,
"err" : null,
"ok" : 1
}

/* 3 */
{
"updatedExisting" : true,
"n" : 1,
"connectionId" : 166,
"err" : null,
"ok" : 1
}

my insertion code is the following :

var batchLimit = 50;
var messagesBatch = [];

function flushMessages() {
print("* flushing... (" + messagesBatch.length + ")");
var inserted = false; // so far
do {
    db.messages.save(messagesBatch);
    var errObj = db.getLastErrorObj();
    print(errObj);
    if(errObj.ok && errObj.err === null) {
        // no error, fine
        inserted = true;
        messagesBatch.length = 0;
        print("* flushed. (" + messagesBatch.length + ")");
    }
    else {
        // insertion error !
        failedInsertions++;
        print(errObj);
    }
} while(!inserted);
}

function addMessage(message) {
messagesBatch.push(message);
if(messagesBatch.length >= batchLimit) {
    flushMessages();
}
msgGenerated++;
if(msgGenerated % 100000 == 0)
    print("* " + msgGenerated);
}

Can someone see why this code is upserting instead of inserting ? What am I doing wrong ?

Note : of course, the documents I'm inserting don't have an _id field.

4

1 回答 1

0

它似乎来自使用messagesBatch.length = 0;清空数组以准备下一批的技术。相反,当通过创建一个新数组来“重置”(某种程度)时,messagesBatch = [];它会按预期工作。

我猜插入是异步的,并且直接在数组 ref 上工作,似乎等待 getLastErrorObj() 不足以确保所有数据都已写入。这似乎是错误的。

第 51 条空记录来自脚本末尾对空数组的系统性“安全”刷新,与问题无关。

于 2014-03-14T09:59:14.273 回答