我想从网站上保存一些图像。目前我可以获得图像的路径,但我不知道如何使用 phantomJs 获取和保存图像。
findRotationTeaserImages = ->
paths = page.evaluate ->
jQuery('.rotate img').map(-> return this.src).get()
for path, i in paths
console.log(path);
//save the image
我想从网站上保存一些图像。目前我可以获得图像的路径,但我不知道如何使用 phantomJs 获取和保存图像。
findRotationTeaserImages = ->
paths = page.evaluate ->
jQuery('.rotate img').map(-> return this.src).get()
for path, i in paths
console.log(path);
//save the image
我知道这是一个老问题,但是您只需将每个图像的尺寸和位置存储在一个对象中,然后更改 phantomjs page.clipRect 以便 page.render() 方法仅呈现其中的区域图像是。这是一个示例,从http://dribbble.com/抓取多个图像:
var page = require('webpage').create();
page.open('http://dribbble.com/', function() {
page.includeJs('//ajax.googleapis.com/ajax/libs/jquery/1.10.2/jquery.min.js',function() {
var images = page.evaluate(function() {
var images = [];
function getImgDimensions($i) {
return {
top : $i.offset().top,
left : $i.offset().left,
width : $i.width(),
height : $i.height()
}
}
$('.dribbble-img img').each(function() {
var img = getImgDimensions($(this));
images.push(img);
});
return images;
});
images.forEach(function(imageObj, index, array){
page.clipRect = imageObj;
page.render('images/'+index+'.png')
});
phantom.exit();
});
});
现在有另一种方法可以做到这一点。
var fs = require("fs");
var imageBase64 = page.evaluate(function(){
var canvas = document.createElement("canvas");
canvas.width =img.width;
canvas.height =img.height;
var ctx = canvas.getContext("2d");
ctx.drawImage(img, 0, 0);
return canvas.toDataURL ("image/png").split(",")[1];
})
fs.write("file.png",atob(imageBase64),'wb');
通过启动一个运行下载图像的节点脚本的子进程来解决这个问题:
phantomJs 脚本:
findRotationTeaserImages = ->
paths = page.evaluate ->
jQuery('.rotate img').map(-> return this.src).get()
args = ('loadRotationTeaser.js ' + paths.join(' ')).split(' ')
child_process.execFile("node", args, null, (err, stdout, stderr) ->
phantom.exit()
)
node.js 脚本
http = require('http-get');
args = process.argv.splice(2)
for path, i in args
http.get path, 'public/images/rotationTeaser/img' + i + '.jpeg', (error, result) ->
如果图像尺寸已知:
var webPage = require('webpage');
/**
* Download image with known dimension.
* @param src Image source
* @param dest Destination full path
* @param width Image width
* @param height Image height
* @param timeout Operation timeout
* @param cbk Callback (optional)
* @param cbkParam Parameter to pass back to the callback (optional)
*/
function downloadImg(src, dest, width, height, timeout, cbk, cbkParam) {
var page = webPage.create();
page.settings.resourceTimeout = timeout; //resources loading timeout(ms)
page.settings.webSecurityEnabled = false; //Disable web security
page.settings.XSSAuditingEnabled = false; //Disable web security
page.open(src, function(status) {
// missing images sometime receive text from server
var success = status == 'success' && !page.plainText;
if (success) {
page.clipRect = {
top: 0,
left: 0,
width: width,
height: height
};
page.render(dest);
}
cbk && cbk(success, cbkParam);
page.close();
});
};
我在使用该render
方法时确实遇到了很多麻烦。幸运的是,我终于想出了两个更好的解决方案。这是我在项目中使用的代码。第一个解决方案更新cookie有些麻烦,因此在获取验证码图像时无法正常工作。这两种方法都会导致一个新的 http 请求。但是通过一些修改,第二个可以省略这种请求。
第一个从获取 cookiephantomJs
并使用request
. 第二个用于base64
传递图像。
async download(download_url, stream) {
logger.profile(`download(download_url='${download_url}')`);
let orig_url = await this.page.property('url');
download_url = url.resolve(orig_url, download_url);
let cookies = await this.page.property('cookies');
let jar = request.jar();
for (let cookie of cookies) {
if (cookie.name !== undefined) {
cookie.key = cookie.name;
delete cookie.name;
}
if (cookie.httponly !== undefined) {
cookie.httpOnly = cookie.httponly;
delete cookie.httponly;
}
if (cookie.expires !== undefined)
cookie.expires = new Date(cookie.expires);
jar.setCookie(new Cookie(cookie), download_url, {ignoreError: true});
}
let req = request({
url: download_url,
jar: jar,
headers: {
'User-Agent': this.user_agent,
'Referer': orig_url
}
});
await new Promise((resolve, reject) => {
req.pipe(stream)
.on('close', resolve)
.on('error', reject);
});
// Due to this issue https://github.com/ariya/phantomjs/issues/13409, we cannot set cookies back
// to browser. It is said to be redesigned, but till now (Mar 31 2017), no change has been made.
/*await Promise.all([
new Promise((resolve, reject) => {
req.on('response', () => {
jar._jar.store.getAllCookies((err, cookies) => {
if (err) {
reject(err);
return;
}
cookies = cookies.map(x => x.toJSON());
for (let cookie of cookies) {
if (cookie.key !== undefined) {
cookie.name = cookie.key;
delete cookie.key;
}
if (cookie.httpOnly !== undefined) {
cookie.httponly = cookie.httpOnly;
delete cookie.httpOnly;
}
if (cookie.expires instanceof Date) {
cookie.expires = cookie.expires.toGMTString();
cookie.expiry = cookie.expires.toTime();
}
else if (cookie.expires == Infinity)
delete cookie.expires;
delete cookie.lastAccessed;
delete cookie.creation;
delete cookie.hostOnly;
}
this.page.property('cookies', cookies).then(resolve).catch(reject);
});
}).on('error', reject);
}),
new Promise((resolve, reject) => {
req.pipe(fs.createWriteStream(save_path))
.on('close', resolve)
.on('error', reject);
})
]);*/
logger.profile(`download(download_url='${download_url}')`);
}
async download_image(download_url, stream) {
logger.profile(`download_image(download_url='${download_url}')`);
await Promise.all([
new Promise((resolve, reject) => {
this.client.once('donwload image', data => {
if (data.err)
reject(err);
else
stream.write(Buffer.from(data.data, 'base64'), resolve);
});
}),
this.page.evaluate(function (url) {
var img = new Image(), callback = function (err, data) {
callPhantom({
event: 'donwload image',
data: {
err: err && err.message,
data: data
}
});
};
img.onload = function () {
var canvas = document.createElement("canvas");
canvas.width = img.width;
canvas.height = img.height;
canvas.getContext("2d").drawImage(img, 0, 0);
callback(null, canvas.toDataURL("image/png").replace(/^data:image\/(png|jpg);base64,/, ""));
};
img.onerror = function () {
callback(new Error('Failed to fetch image.'));
};
img.src = url;
}, download_url)
]);
logger.profile(`download_image(download_url='${download_url}')`);
}