1

I'm using the code from netsniff.js to generate a har file and I want to improve it to generate a har file from multiple links given in an array (named links in my below code).

There is another question here Using Multiple page.open in Single Script that might help me, but I have no idea how to implement the given solution in my code..

Below is my code (it logs FAIL to load the address in the output file if the links array contain more than one item):

"use strict";
if (!Date.prototype.toISOString) {
    Date.prototype.toISOString = function () {
        function pad(n) { return n < 10 ? '0' + n : n; }
        function ms(n) { return n < 10 ? '00'+ n : n < 100 ? '0' + n : n }
        return this.getFullYear() + '-' +
            pad(this.getMonth() + 1) + '-' +
            pad(this.getDate()) + 'T' +
            pad(this.getHours()) + ':' +
            pad(this.getMinutes()) + ':' +
            pad(this.getSeconds()) + '.' +
            ms(this.getMilliseconds()) + 'Z';
    }
}
var entries = [];
function createHAR(address, title, startTime, resources)
{
    resources.forEach(function (resource) {
        var request = resource.request,
            startReply = resource.startReply,
            endReply = resource.endReply;

        if (!request || !startReply || !endReply) {
            return;
        }

        // Exclude Data URI from HAR file because
        // they aren't included in specification
        if (request.url.match(/(^data:image\/.*)/i)) {
            return;
        }

        entries.push({
            startedDateTime: request.time.toISOString(),
            time: endReply.time - request.time,
            request: {
                method: request.method,
                url: request.url,
                httpVersion: "HTTP/1.1",
                cookies: [],
                headers: request.headers,
                queryString: [],
                headersSize: -1,
                bodySize: -1
            },
            response: {
                status: endReply.status,
                statusText: endReply.statusText,
                httpVersion: "HTTP/1.1",
                cookies: [],
                headers: endReply.headers,
                redirectURL: "",
                headersSize: -1,
                bodySize: startReply.bodySize,
                content: {
                    size: startReply.bodySize,
                    mimeType: endReply.contentType
                }
            },
            cache: {},
            timings: {
                blocked: 0,
                dns: -1,
                connect: -1,
                send: 0,
                wait: startReply.time - request.time,
                receive: endReply.time - startReply.time,
                ssl: -1
            },
            pageref: address
        });
    });

    return {
        log: {
            version: '1.2',
            creator: {
                name: "PhantomJS",
                version: phantom.version.major + '.' + phantom.version.minor +
                    '.' + phantom.version.patch
            },
            pages: [{
                startedDateTime: startTime.toISOString(),
                id: address,
                title: title,
                pageTimings: {
                    onLoad: page.endTime - page.startTime
                }
            }],
            entries: entries
        }
    };
}
var page = require('webpage').create()
var fs = require('fs');
var count = 0;
function processSites(links)
{
    page.address = links.pop();
    var path = 'file' + count + '.har';
    page.resources = [];
    console.log("page resources:", page.resources)
    count = count + 1;
    page.onLoadStarted = function () {
        page.startTime = new Date();
    };
    page.onResourceRequested = function (req) {
        page.resources[req.id] = {
            request: req,
            startReply: null,
            endReply: null
        };
    };

    page.onResourceReceived = function (res) {
        if (res.stage === 'start') {
            page.resources[res.id].startReply = res;
        }
        if (res.stage === 'end') {
            page.resources[res.id].endReply = res;
        }
    };

    page.open(page.address, function (status) {
        var har;
        setTimeout(function () {
            if (status !== 'success') {
                console.log('FAIL to load the address');
                phantom.exit(1);
            } else {
                page.endTime = new Date();
                page.title = page.evaluate(function () {
                    return document.title;
                });
                entries = [];
                har = createHAR(page.address, page.title, page.startTime, page.resources);
                // console.log(JSON.stringify(har, undefined, 4));
                fs.write(path, JSON.stringify(har), 'w');

                if(links.length > 0)
                {
                    processSites(links);
                }
                else
                {
                    phantom.exit();
                }
            }
        }, 10000);
    });

}

var links = ["http://stackoverflow.com", "http://marvel.com"];

processSites(links);

Update:
The above code generate two har files file1.har and file2.har, but the second har file also contains the har code generated from both links, and it should only have the har code for the first link...

Fixed this by setting var har = " "

4

1 回答 1

2

你不能在一个简单的循环中迭代 PhantomJS 中的打开页面,因为page.open方法是异步的。它不会等待第一个站点被处理,而是立即打开第二个站点。

我已经重写了您的脚本以使用递归:只有在处理当前站点后才会打开下一个站点。(注意:如果队列中的任何站点加载失败,整个过程将停止,但您可以轻松地重写脚本以避免这种情况)。

if (!Date.prototype.toISOString) {
    Date.prototype.toISOString = function () {
        // ...
    }
}

var entries = [];

function createHAR(address, title, startTime, resources)
{
    // ...
}

var page = require('webpage').create()

function processSites(links)
{
    page.address = links.pop();

    console.log("PAGE ADDRESS: ", page.address);
    page.resources = [];

    page.onLoadStarted = function () {
        page.startTime = new Date();
    };
    page.onResourceRequested = function (req) {
        page.resources[req.id] = {
            request: req,
            startReply: null,
            endReply: null
        };
    };

    page.onResourceReceived = function (res) {
        if (res.stage === 'start') {
            page.resources[res.id].startReply = res;
        }
        if (res.stage === 'end') {
            page.resources[res.id].endReply = res;
        }
    };

    page.open(page.address, function (status) {
        var har;
        setTimeout(function () {
            if (status !== 'success') {
                console.log('FAIL to load the address');
                phantom.exit(1);
            } else {
                page.endTime = new Date();
                page.title = page.evaluate(function () {
                    return document.title;
                });
                har = createHAR(page.address, page.title, page.startTime, page.resources);
                console.log(JSON.stringify(har, undefined, 4));

                if(links.length > 0)
                {
                    processSites(links);
                }
                else
                {
                    phantom.exit();
                }
            }
        }, 10000);
    });

}

var links = ["http://edition.cnn.com", "http://stackoverflow.com"];

processSites(links);
于 2017-01-30T08:36:27.023 回答