以下程序的目的是抓取 CNN,并将其所有文本写入单个文件(使用几个第三方)
我明白了
RangeError: Maximum call stack size exceeded
如何解决这个问题,我该如何绕过它?有没有办法可以“释放”内存?如何?
//----------Configuration--------------
var startingUrl = "http://cnn.com"; //keep the http\https or www prefix
var crawlingDepth = "50";
var outputFileName = "cnn.txt";
//-------------------------------------
var Crawler = require("js-crawler");
var sanitizeHtml = require('sanitize-html');
var htmlToText = require('html-to-text');
var fs = require('fs');
var index = 0;
new Crawler().configure({depth: crawlingDepth})
.crawl(startingUrl, function onSuccess(page) {
var text = htmlToText.fromString(page.body, {
wordwrap: false,
hideLinkHrefIfSameAsText: true,
ignoreHref: true,
ignoreImage: true
});
index++;
console.log(index + " pages were crawled");
fs.appendFile(outputFileName, text, function (err) {
if (err) {
console.log(err);
};
console.log('It\'s saved! in same location.');
});
});