7

我想从网站上保存一些图像。目前我可以获得图像的路径,但我不知道如何使用 phantomJs 获取和保存图像。

findRotationTeaserImages = ->
  paths = page.evaluate ->
    jQuery('.rotate img').map(-> return this.src).get()

  for path, i in paths
    console.log(path);
    //save the image
4

5 回答 5

19

我知道这是一个老问题,但是您只需将每个图像的尺寸和位置存储在一个对象中,然后更改 phantomjs page.clipRect 以便 page.render() 方法仅呈现其中的区域图像是。这是一个示例,从http://dribbble.com/抓取多个图像:

var page = require('webpage').create();

page.open('http://dribbble.com/', function() {

    page.includeJs('//ajax.googleapis.com/ajax/libs/jquery/1.10.2/jquery.min.js',function() {

        var images = page.evaluate(function() {
            var images = [];
            function getImgDimensions($i) {
                return {
                    top : $i.offset().top,
                    left : $i.offset().left,
                    width : $i.width(),
                    height : $i.height()
                }
            }
            $('.dribbble-img img').each(function() {
                var img = getImgDimensions($(this));
                images.push(img);
            });

            return images;
        });

        images.forEach(function(imageObj, index, array){
            page.clipRect = imageObj;
            page.render('images/'+index+'.png')
        });

        phantom.exit();
    });
});
于 2013-10-10T17:50:45.930 回答
9

现在有另一种方法可以做到这一点。

var fs = require("fs");
var imageBase64 = page.evaluate(function(){
  var canvas = document.createElement("canvas");
  canvas.width =img.width;
  canvas.height =img.height;
  var ctx = canvas.getContext("2d");
  ctx.drawImage(img, 0, 0);      
  return canvas.toDataURL ("image/png").split(",")[1];
})
fs.write("file.png",atob(imageBase64),'wb');
于 2014-09-19T19:29:35.447 回答
5

通过启动一个运行下载图像的节点脚本的子进程来解决这个问题:

phantomJs 脚本:

findRotationTeaserImages = ->
  paths = page.evaluate ->
    jQuery('.rotate img').map(-> return this.src).get()

  args = ('loadRotationTeaser.js ' + paths.join(' ')).split(' ')

  child_process.execFile("node", args, null, (err, stdout, stderr) ->
    phantom.exit()
  )

node.js 脚本

http = require('http-get');

args = process.argv.splice(2)

for path, i in args
  http.get path, 'public/images/rotationTeaser/img' + i + '.jpeg', (error, result) ->
于 2013-05-24T09:49:36.857 回答
0

如果图像尺寸已知:



    var webPage = require('webpage');

    /**
     * Download image with known dimension.
     * @param src   Image source
     * @param dest  Destination full path
     * @param width Image width
     * @param height    Image height
     * @param timeout   Operation timeout
     * @param cbk   Callback (optional)
     * @param cbkParam  Parameter to pass back to the callback (optional)
     */
    function downloadImg(src, dest, width, height, timeout, cbk, cbkParam) {
        var page = webPage.create();

        page.settings.resourceTimeout = timeout; //resources loading timeout(ms)
        page.settings.webSecurityEnabled = false; //Disable web security
        page.settings.XSSAuditingEnabled = false; //Disable web security

        page.open(src, function(status) {

            // missing images sometime receive text from server
            var success = status == 'success' && !page.plainText;

            if (success) {
                page.clipRect = {
                    top: 0,
                    left: 0,
                    width: width,
                    height: height
                };
                page.render(dest);

            }

            cbk && cbk(success, cbkParam);
            page.close();
        });
    };


于 2015-11-26T12:42:58.120 回答
0

我在使用该render方法时确实遇到了很多麻烦。幸运的是,我终于想出了两个更好的解决方案。这是我在项目中使用的代码。第一个解决方案更新cookie有些麻烦,因此在获取验证码图像时无法正常工作。这两种方法都会导致一个新的 http 请求。但是通过一些修改,第二个可以省略这种请求。

第一个从获取 cookiephantomJs并使用request. 第二个用于base64传递图像。

 async download(download_url, stream) {
    logger.profile(`download(download_url='${download_url}')`);
    let orig_url = await this.page.property('url');
    download_url = url.resolve(orig_url, download_url);
    let cookies = await this.page.property('cookies');
    let jar = request.jar();
    for (let cookie of cookies) {
        if (cookie.name !== undefined) {
            cookie.key = cookie.name;
            delete cookie.name;
        }
        if (cookie.httponly !== undefined) {
            cookie.httpOnly = cookie.httponly;
            delete cookie.httponly;
        }
        if (cookie.expires !== undefined)
            cookie.expires = new Date(cookie.expires);
        jar.setCookie(new Cookie(cookie), download_url, {ignoreError: true});
    }
    let req = request({
        url: download_url,
        jar: jar,
        headers: {
            'User-Agent': this.user_agent,
            'Referer': orig_url
        }
    });
    await new Promise((resolve, reject) => {
        req.pipe(stream)
            .on('close', resolve)
            .on('error', reject);
    });
    // Due to this issue https://github.com/ariya/phantomjs/issues/13409, we cannot set cookies back
    // to browser. It is said to be redesigned, but till now (Mar 31 2017), no change has been made.
    /*await Promise.all([
        new Promise((resolve, reject) => {
            req.on('response', () => {
                jar._jar.store.getAllCookies((err, cookies) => {
                    if (err) {
                        reject(err);
                        return;
                    }
                    cookies = cookies.map(x => x.toJSON());
                    for (let cookie of cookies) {
                        if (cookie.key !== undefined) {
                            cookie.name = cookie.key;
                            delete cookie.key;
                        }
                        if (cookie.httpOnly !== undefined) {
                            cookie.httponly = cookie.httpOnly;
                            delete cookie.httpOnly;
                        }
                        if (cookie.expires instanceof Date) {
                            cookie.expires = cookie.expires.toGMTString();
                            cookie.expiry = cookie.expires.toTime();
                        }
                        else if (cookie.expires == Infinity)
                            delete cookie.expires;
                        delete cookie.lastAccessed;
                        delete cookie.creation;
                        delete cookie.hostOnly;
                    }
                    this.page.property('cookies', cookies).then(resolve).catch(reject);
                });
            }).on('error', reject);
        }),
        new Promise((resolve, reject) => {
            req.pipe(fs.createWriteStream(save_path))
                .on('close', resolve)
                .on('error', reject);
        })
    ]);*/
    logger.profile(`download(download_url='${download_url}')`);
}
async download_image(download_url, stream) {
    logger.profile(`download_image(download_url='${download_url}')`);
    await Promise.all([
        new Promise((resolve, reject) => {
            this.client.once('donwload image', data => {
                if (data.err)
                    reject(err);
                else
                    stream.write(Buffer.from(data.data, 'base64'), resolve);

            });
        }),
        this.page.evaluate(function (url) {
            var img = new Image(), callback = function (err, data) {
                callPhantom({
                    event: 'donwload image',
                    data: {
                        err: err && err.message,
                        data: data
                    }
                });
            };
            img.onload = function () {
                var canvas = document.createElement("canvas");
                canvas.width = img.width;
                canvas.height = img.height;
                canvas.getContext("2d").drawImage(img, 0, 0);
                callback(null, canvas.toDataURL("image/png").replace(/^data:image\/(png|jpg);base64,/, ""));
            };
            img.onerror = function () {
                callback(new Error('Failed to fetch image.'));
            };
            img.src = url;
        }, download_url)
    ]);
    logger.profile(`download_image(download_url='${download_url}')`);
}
于 2017-04-09T09:58:26.030 回答