2

我正在尝试创建一个 CSV。由于数据很大,我想继续将数据附加到文件中,而不是先将记录存储在数组中,然后将所有数据一次转储到 CSV 中。所以我写了下面的代码,模拟我想做的事情。每秒创建 50 条随机记录并将其附加到文件中。代码工作正常,但问题是最终的 CSV。它如下所示:

"id","value""value","id"
3226,"3653aab688be4934""value","id"
4070,"9de2be11958fa207""value","id"
2061,"b754b9164146d37f""value","id"
6216,"ac85aa653bfc845d""value","id"
48,"caf5f55c49fde7bf""value","id"
4330,"2c33ae658de7a3eb""value","id"
1997,"34caef7b4ae96edd""value","id"

我不明白这是为什么。我还阅读了相关的SO 帖子,但这也无济于事。

const json2csv = require('json2csv').parse;
const fs = require('fs');

Promise = require('bluebird');

let plist = [];
let count = 0;
let intvl = null;

let fields = ['id', 'value'];

function start() {

    if(count++ > 50) {
            Promise.all(plist)
                    .then(r => {
                            clearInterval(intvl);
                            console.log('file created');
                            process.exit(0);
                    })
                    .catch(err => {
                            console.log(err);
                            process.exit(-1);
                    })
    }

    let data = [{
            value: Math.floor(Math.random() * 9999),
            id: require('crypto').randomBytes(8).toString('hex')
    }];

    plist.push(append(json2csv(data)));

}


function append(data) {

    return new Promise((resolve, reject) => {

            fs.appendFile('./stream.csv', data, (err, resp) => {
                    if(err) reject(err);
                    else resolve();
            });
    });
}


function init() {

    fs.stat('./stream.csv', (err, resp) => {
            if(err) {
                    fs.writeFileSync('./stream.csv', json2csv([], {fields}));
            }
            intvl = setInterval(() => {
                    start();
            }, 1100);
    })
}

init();

我错过了什么?最初,代码检查文件是否已经存在。如果没有,则创建仅包含标题的文件,然后进行常规写入过程。我尝试删除仅写入标题的部分。它有助于删除顶部的重复标题,但无助于每行中标题的重复。怎么能防止呢?

4

1 回答 1

1

我认为问题是由于您的代码的多个周期造成的。每个循环都将fields标头 ("value","id") 附加到最后一行,因此:

我们有第一个循环

"id","value""value","id"
 3405,"6874eb66f714e717"

第二循环

 "id","value""value","id"
3405,"6874eb66f714e717""value","id" <-- "value","id" added
1436,"c91056b1207598bb"

等等。您应该仅在第一次使用时添加标头fs.writeFileSync('./stream.csv', json2csv([], {fields}));并删除返回的额外标头json2csv以隔离您的data.

目前似乎不可能将空标题传递给json2csv使用 call like json2csv(data, {})

这里有一个例子:

const json2csv = require('json2csv').parse;
const fs = require('fs');

Promise = require('bluebird');

let plist = [];
let count = 0;
let intvl = null;

let fields = ['id', 'value'];

function start() {

if(count++ > 50) {
    Promise.all(plist)
        .then(r => {
            clearInterval(intvl);
            console.log('file created');
            process.exit(0);
        })
        .catch(err => {
            console.log(err);
            process.exit(-1);
        })
}

let data = [{
    id: Math.floor(Math.random() * 9999), //id should be set in this way
    value: require('crypto').randomBytes(8).toString('hex')

}];

plist.push(append(json2csv(data).replace('"id","value"',''))); //remove header

}

function append(data) {

    return new Promise((resolve, reject) => {

    fs.appendFile('./stream.csv', data, (err, resp) => {
        if(err) reject(err);
        else resolve();
    });
});
}

function init() {

fs.stat('./stream.csv', (err, resp) => {
    if(err) {
        fs.writeFileSync('./stream.csv', json2csv([], {fields}));
       }
        intvl = setInterval(() => {
           start();
           }, 1100);
    })
}

init();

以及生成的输出文件stream.csv

"id","value"
2462,"7c9197ae6c101f27"
7714,"e1bbfa2dc9adba7a"
2728,"3ff6673cd22bb00b"
8686,"c1f61c138e7b9fdc"
6687,"01d006f74412459a"
7888,"7ccf8e40b9cc4192"
2892,"1672a034573d1be3"
6228,"d8d004148c59134b"
2273,"5028b14b40029d4c"
5114,"1e282fd1c9a84e25"
3636,"c2b7d2250e6fad1e"
8096,"9fb35e54f749417f"
8955,"f2ccc57eab5438a0"
3957,"b323e7addc967d29"
于 2018-06-08T14:35:11.010 回答