现在回想一下 Deno 1.0 已经发布,以防其他人有兴趣做这样的事情。我能够拼凑出一个适用于我的用例的小类。它不像stream-json
包之类的东西那么健壮,但它可以很好地处理大型 JSON 数组。
import { EventEmitter } from "https://deno.land/std/node/events.ts";
export class JSONStream extends EventEmitter {
private openBraceCount = 0;
private tempUint8Array: number[] = [];
private decoder = new TextDecoder();
constructor (private filepath: string) {
super();
this.stream();
}
async stream() {
console.time("Run Time");
let file = await Deno.open(this.filepath);
//creates iterator from reader, default buffer size is 32kb
for await (const buffer of Deno.iter(file)) {
for (let i = 0, len = buffer.length; i < len; i++) {
const uint8 = buffer[ i ];
//remove whitespace
if (uint8 === 10 || uint8 === 13 || uint8 === 32) continue;
//open brace
if (uint8 === 123) {
if (this.openBraceCount === 0) this.tempUint8Array = [];
this.openBraceCount++;
};
this.tempUint8Array.push(uint8);
//close brace
if (uint8 === 125) {
this.openBraceCount--;
if (this.openBraceCount === 0) {
const uint8Ary = new Uint8Array(this.tempUint8Array);
const jsonString = this.decoder.decode(uint8Ary);
const object = JSON.parse(jsonString);
this.emit('object', object);
}
};
};
}
file.close();
console.timeEnd("Run Time");
}
}
示例用法
const stream = new JSONStream('test.json');
stream.on('object', (object: any) => {
// do something with each object
});
处理一个约 4.8 MB 的 json 文件,其中包含约 20,000 个小对象
[
{
"id": 1,
"title": "in voluptate sit officia non nesciunt quis",
"urls": {
"main": "https://www.placeholder.com/600/1b9d08",
"thumbnail": "https://www.placeholder.com/150/1b9d08"
}
},
{
"id": 2,
"title": "error quasi sunt cupiditate voluptate ea odit beatae",
"urls": {
"main": "https://www.placeholder.com/600/1b9d08",
"thumbnail": "https://www.placeholder.com/150/1b9d08"
}
}
...
]
耗时 127 毫秒。
❯ deno run -A parser.ts
Run Time: 127ms