我正在使用 Phantom JS 加载第三方 URL。然后有一个带有“phantom”模块的 node+express 服务器,它从 Phantom JS 返回 html。
该代码在我的 mac 中运行完美,但是当我尝试在 VPS 中运行它时,节点给出了
RangeError:超出最大调用堆栈大小
function scrape(url, func){
var phantom = require('phantom');
phantom.create('--load-images=no', function(ph){
return ph.createPage(function(page){
page.set('settings.loadImages', false) ;
return page.open(url, function(status){
//page.injectJs('http://ajax.googleapis.com/ajax/libs/jquery/1.7.2/jquery.min.js', function() {
return page.evaluate((function(){
var scripts = document.getElementsByTagName('script'),
links = document.getElementsByTagName('link'),
images = document.getElementsByTagName('img'),
objects = document.getElementsByTagName('object');
for(i in objects){
if((obj = objects[i]) && obj.data){
if(obj.getAttribute('data') != obj.data){
obj.setAttribute('data', obj.data);
var params = obj.getElementsByTagName('param');
for(var j in params){
if((par = params[j]) && par.value){
console.log(par.name);
if(par.getAttribute('value') != par.value){
par.setAttribute('value', par.value);
}
if(par.name == "allowscriptaccess" ){
par.setAttribute('value', "always");
par.value = "always";
}
}
}
}
}
}
for(i in scripts){
if((script = scripts[i]) && script.src){
if(script.getAttribute('src') != script.src){
script.setAttribute('src', script.src);
}
}
}
for(i in links){
if((link = links[i]) && link.href ){
if(link.getAttribute('href') != link.href){
link.setAttribute('href', link.href);
}
}
}
for(i in images){
if((image = images[i]) && image.src){
if(image.getAttribute('src') != image.src){
image.setAttribute('src', image.src);
}
}
}
var baseTag = document.getElementsByTagName('base');
if(baseTag.length == 0){
var baseTag = '<base id="test" href="'+ document.domain +'" />';
var head = document.getElementsByTagName('head');
head[0].innerHTML = baseTag + head[0].innerHTML;
}
return document.getElementsByTagName('html')[0].outerHTML;
}), function(result){
ph.exit(); (func)(result);
});
//});
});
});
});
}
exports.scrape = scrape;