1
  • Node version: v10.19.0
  • Npm version: 6.13.4
  • Npm package csvtojson Package Link

    csvtojson({
     "delimiter": ";",
     "fork": true
    })
    .fromStream(fileReadStream)
    .subscribe((dataObj) => {
    console.log(dataObj);
    }, (err) => {
    console.error(err);
    }, (success) => {
    console.log(success);
    });
    

While trying to handle large CSV file (about 1.3 million records) I face error "CSV Parse Error: Error: unclosed_quote." after certain records(e.g. after 400+ records) being processed successfully. From the CSV file i don't see any problems with data formatting there, however the parser might be raising this error because of "\n" character being found inside the column/field value.

  • Is there a solution already available with this package? or
  • is there a workaround to handle this error? or
  • is there a way to skip such CSV rows having any sort of errors not only this one, to let the entire CSV to JSON parsing work without the processing getting stuck?

Any help will be much appreciated.

4

2 回答 2

2

I've played about with this, and it's possible to hook into this using a CSV File Line Hook, csv-file-line-hook, you can check for invalid lines and either repair or simply invalidate them.

The example below will simply skip the invalid lines (missing end quotes)

example.js

const fs = require("fs");

let fileReadStream = fs.createReadStream("test.csv");
let invalidLineCount = 0;

const csvtojson = require("csvtojson");
csvtojson({ "delimiter": ";", "fork": true })
.preFileLine((fileLineString, lineIdx)=> {
    let invalidLinePattern = /^['"].*[^"'];/;
    if (invalidLinePattern.test(fileLineString)) {
        console.log(`Line #${lineIdx + 1} is invalid, skipping:`, fileLineString);
        fileLineString = "";
        invalidLineCount++;
    }
    return fileLineString
})
.fromStream(fileReadStream) 
.subscribe((dataObj) => { 
    console.log(dataObj);
}, 
(err) => { 
    console.error("Error:", err); 
}, 
(success) => {
    console.log("Skipped lines:", invalidLineCount);
    console.log("Success"); 
});

test.csv

Name;Age;Profession
Bob;34;"Sales,Marketing"
Sarah;31;"Software Engineer"
James;45;Driver
"Billy, ;35;Manager
"Timothy;23;"QA
于 2020-04-02T07:24:17.693 回答
0

This regex works better

/^(?:[^"\]|\.|"(?:\.|[^"\])")$/g

Here is a more complex working script for big files by reading each line

import csv from 'csvtojson'
import fs from 'fs-extra'
import lineReader from 'line-reader'

import { __dirname } from '../../../utils.js'

const CSV2JSON = async(dumb, editDumb, headers, {
    options = {
        trim: true,
        delimiter: '|',
        quote: '"',
        escape: '"',
        fork: true,
        headers: headers
    }
} = {}) => {
    try {
        log(`\n\nStarting CSV2JSON - Current directory: ${__dirname()} - Please wait..`)

        await new Promise((resolve, reject) => {
            let firstLine, counter = 0
            lineReader.eachLine(dumb, async(line, last) => {
                counter++

                // log(`line before convert: ${line}`)
                let json = (
                    await csv(options).fromString(headers + '\n\r' + line)
                        .preFileLine((fileLineString, lineIdx) => {
                            // if it its not the first line
                                // eslint-disable-next-line max-len
                                if (counter !== 1 && !fileLineString.match(/^(?:[^"\\]|\\.|"(?:\\.|[^"\\])*")*$/g)) {
                                    // eslint-disable-next-line max-len
                                    console.log(`Line #${lineIdx + 1} is invalid. It has unescaped quotes. We will skip this line.. Invalid Line: ${fileLineString}`)
                                    fileLineString = ''
                                }

                            return fileLineString
                        })
                        .on('error', e => {
                            e = `Error while converting CSV to JSON.
                            Line before convert: ${line}
                            Error: ${e}`
                            throw new BaseError(e)
                        })
                )[0]

                // log(`line after convert: ${json}`)

                if (json) {
                    json = JSON.stringify(json).replace(/\\"/g, '')

                    if (json.match(/^(?:[^"\\]|\\.|"(?:\\.|[^"\\])*")*$/g)) {
                        await fs.appendFile(editDumb, json)
                    }
                }

                if (last) {
                    resolve()
                }
            })
        })
    } catch (e) {
        throw new BaseError(`Error while converting CSV to JSON - Error: ${e}`)
    }
}

export { CSV2JSON }
于 2021-11-13T20:40:54.123 回答