0
  var parseFile = function(filename, fieldMap) {
      var array = [];
      var f = fs.readFileSync(filename);

      array = f.toString().split('\n');

      array = array.map(splitLines);
      array = array.map(function(lineArray) {
          var out = [];
          fieldMap.forEach(function(value, key) {
              out.push(lineArray[key]);
          });
          return out;
      });

      return array;
  };

  var splitLines = function(line) {
      return line.split('^');
  };

  (function () {
  for (var k in requiredFieldMap) {
      var v = requiredFieldMap[k];
      fs.writeFileSync(k + '.json', JSON.stringify(parseFile(k + '.txt', v)));
      console.log('done file : ' + k);
  }
  })();

上面的代码处理 34mb 文件我运行它喜欢

节点 des.js 被杀死

我在虚拟盒子 512 mb ram Ubuntu 服务器中运行

我是 javascript 的新手,算法太糟糕了,还是别的什么。我该如何改进它谢谢。

4

1 回答 1

0

您的程序失败了,因为您似乎怀疑它内存不足。我不能确切地说出你在做什么fieldMap,但是一个快速脚本在换行符上分割 32mb 的文本,然后在我的 mac 上分割每行 5 个字符,需要 560mb,所以你的虚拟机将无法处理它。

您将不得不一次读取该文件一行,并在生成输出时写入输出。模块中的异步方法fs是一个很好的起点,但我认为您真正想要的可能是node-lazy。查看开始的lines方法Lazy

你可能会像这样实现它。

var fs = require('fs');

// we use modules lazy and temp, so you have 
// to run "npm install lazy temp" for this script
// to work.
var Lazy = require('lazy');
var tempfile = require('temp');

// Return a function that splits a line around a character
function lineSplitter(char){
  return function(string){
    // use string.toString() so we're ok if string happens to 
    // be a Buffer (lazy generates them, it seems)
    return string.toString().split(char);
  }
}

// Take a file, pass each line to lineHandler, and 
// call done after the entire file has been processed
function parseWholeFile(filename, lineHandler, done){
  var stream = fs.createReadStream(filename);
  Lazy(stream)
    .lines
    .map(lineSplitter('^'))
    .forEach(lineHandler)
    .join(done); 
}

function lineHandler(parts){
  // do something to parts...
  // we'll just append json to a file
  fs.appendFile('/tmp/out.json', JSON.stringify(parts),function(error){
    // ignore it for brevity
  });
}

// This is how you do a main function in node
if(require.main === module){
  // my file lives at /tmp/bigish, but you can pass yours 
  // as a command line argument
  // /tmp/bigish is a 32MB file with lines of around 1kb
  // consisting of 64 byte parts separated by '^' character 
  var filename = process.argv[2] || '/tmp/bigish';
  var started = new Date();

  parseWholeFile(filename, lineHandler, function(){
    var ended = new Date();
    console.log("took " + (ended - started) + "ms");
  });
}
于 2013-08-31T03:29:28.990 回答