当 csv 文件上传到我的 s3 存储桶时,我的 lambda 将被触发以将我的数据插入 DynamoDB。我需要一个流,因为文件太大而无法作为完整对象下载。
const batchWrite = async (clientDynamoDB, itemsToProcess) => {
const ri = {};
ri[TABLE_DYNAMO] = itemsToProcess.map((itm) => toPutRequest(itm));
const params = { RequestItems: ri };
await clientDynamoDB.batchWriteItem(params).promise();
};
function runStreamPromiseAsync(stream, clientDynamoDB) {
return new Promise((resolve, reject) => {
const sizeChunk = 25;
let itemsToProcess = [];
stream
.pipe(fastCsv.parse({headers: Object.keys(schemaGeData), trim: true}))
.on("data", (row) => {
stream.pause();
itemsToProcess.push(row);
if (itemsToProcess.length === sizeChunk) {
batchWrite(clientDynamoDB, itemsToProcess).finally(() => {
stream.resume();
});
itemsToProcess = [];
}
})
.on("error", (err) => {
console.log(err);
reject("Error");
})
.on("end", () => {
stream.pause();
console.log("end");
batchWrite(clientDynamoDB, itemsToProcess).finally(() => {
resolve("OK");
});
});
});
}
module.exports.main = async (event, context, callback) => {
context.callbackWaitsForEmptyEventLoop = false;
const AWS = require('aws-sdk');
const s3 = new AWS.S3();
const object = event.Records[0].s3;
const bucket = object.bucket.name;
const file = object.object.key;
const agent = new https.Agent({
keepAlive: true
});
const client = new AWS.DynamoDB({
httpOptions: {
agent
}
});
try {
//get Stream csv data
const stream = s3
.getObject({
Bucket: bucket,
Key: file
})
.createReadStream()
.on('error', (e) => {
console.log(e);
});
await runStreamPromiseAsync(stream, client);
} catch (e) {
console.log(e);
}
};
当我的文件是 1000 行时,所有内容都被插入,但是当我有 5000 行时,我的函数只插入大约 3000 行,这个数字是随机的......有时更多有时更少......
所以我想了解我在这里缺少什么?
我也读过这篇文章,但老实说,即使你暂停第二个流,第一个流仍在运行。所以如果有人对如何做到这一点有任何想法,将不胜感激!
谢谢