8

我在 PhantomJS 中运行 jQuery 时遇到问题。我找到了这个答案,它谈到了评估函数内部没有可用的变量,但问题是关于节点模块的,在我的示例中,我只console.log在评估函数内部调用。我也把这个问题放在了GitHub 上

以前,对于某些页面,以下evaluate代码没有执行。现在@b1f56gd4 提供了一些帮助,现在它会打印消息;我无法执行它,但现在我可以看到:

https://login.yahoo.com/上的页面运行来自http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js的不安全内容。

我无法从不同的域加载 jQuery 并且--local-to-remote-url-access=trueor--web-security=false选项没有区别。

我将尝试在本地加载 jQuery。这是代码:

console.log('Loading a web page');
var url = 'https://login.yahoo.com/'; 
var page = require('webpage').create();
console.log('Setting error handling');
page.onConsoleMessage = function (msg) {
    console.log(msg);
};
page.onError = function (msg, trace) {
    console.log(msg);
    trace.forEach(function(item) {
        console.log('  ', item.file, ':', item.line);
    })
    phantom.exit();
}
console.log('Error handling is set');
console.log('Opening page');
page.open(url, function (status) {
    if (status != 'success') {
        console.log('F-' + status);
    } else {
        console.log('S-' + status); 
        //-------------------------------------------------     
        var jsLoc = '';
        jsLoc = 'jquery.min.js'; // to load local
        //jsLoc = 'http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js'; // to load remote
        var func = function(pg){
            console.log('Function called');
            console.log('Page evaluating');
            console.log(pg);
            pg.evaluate(function() {
                console.log('Page evaluate started');               
                //---
                var loginVar = 'ih5d4hf65465fd45h6@yahoo.com.br';
                var pwdVar = 'itsmypass_445f4hd564hd56f46s'; 
                //---
                $("#login_form #username").value = loginVar;
                $("#login_form #passwd").value = pwdVar;
                //---
            });
            console.log('Rendering');
            pg.render('ystsA.png');
            console.log('Rendered');
        }
        if (typeof jQuery == 'undefined') {  
            console.log('JQuery Loading');  // <<<<==== Execute only until here
            console.log('Source:['+jsLoc+']');
            var rs = page.includeJs(jsLoc, function()  // <<<<===== Fail here, jsLoc was changed to load locally and after tried remotely, i tried use page.injectJs but fail too
            { 
                console.log('JQuery Loaded');  // <<<< ===== Never reach here, no matter if loading local or remote script in include above
                func(page); 
            });
            page.render('ystsB.png');
        } else {
            console.log('JQuery Already Loaded');
            func(page);
            page.render('ystsC.png');
        }
        //-------------------------------------------------
    }
    phantom.exit();
});

在阅读@g4d564w56 答案后,我没有使用 JQuery,然后我可以填写文本框,但无法单击按钮在登录表单上发布。
查看新代码:

console.log('Loading a web page');
var url = 'https://login.yahoo.com/'; 
var page = require('webpage').create();
console.log('Setting error handling');
page.onConsoleMessage = function (msg) {
    console.log(msg);
};
page.onError = function (msg, trace) {
    console.log(msg);
    trace.forEach(function(item) {
        console.log('  ', item.file, ':', item.line);
    })
    phantom.exit();
}
console.log('Error handling is set');
console.log('Opening page');
page.open(url, function (status) {
    if (status != 'success') {
        console.log('F-' + status);
    } else {
        console.log('S-' + status); 
        //-------------------------------------------------     
        var jsLoc = '';
        jsLoc = 'jquery.min.js'; // to load local
        //jsLoc = 'http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js'; // to load remote      
        var act01 = function(pg){
            console.log('Function called');
            console.log('Page evaluating');
            console.log(pg);
            pg.evaluate(function() {
                var getElmById = function(id){
                    return document.getElementById(id);
                }           
                console.log('Page evaluate started');               
               //---
                var loginVar = 'ih5d4hf65465fd45h6@yahoo.com.br';
                var pwdVar = 'itsmypass_445f4hd564hd56f46s'; 
                //---
                getElmById("username").value = loginVar;
                getElmById("passwd").value = pwdVar;
                getElmById("login_form").submit(); /// <<<<==== now its dont work !!!
                //---
            });
            console.log('Rendering');
            pg.render('ystsA.png');
            console.log('Rendered');
        }
        act01(page);
        //-------------------------------------------------
    }
    phantom.exit();
});
4

5 回答 5

5

我知道这个问题已经在一年前得到了回答,但答案并没有真正解决这个问题。错误原因如下:

https://login.yahoo.com/上的页面运行了来自http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js的不安全内容 。”

是登录页面是 https 页面,而您正在尝试加载 http 资源。如果您将 url 更改为https://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js ,此错误将消失。花了一段时间才弄清楚。

于 2014-05-15T23:58:39.227 回答
4

使用谷歌搜索的工作版本。

var page, doSearch, displayResults;
page = require('webpage').create();

doSearch = function() {
    console.log('Searching...');
    page.evaluate(function() {
        $("input[name=q]").val('what is phantomjs');
        $("form").trigger('submit');
        return true;
    });
    page.render('phantomjs-searching.png');
};

displayResults = function() {
    console.log('Results...');
    page.evaluate(function() {
        $('h3 a').each(function(i) {
            console.log([i + 1, $(this).text(), ' // ' + $(this).attr('href')].join(': '));
        });
        return true;
    });
    page.render('phantomjs-results.png');
};

page.onLoadFinished = function(status) {
    if (status === 'success') {
        page.includeJs('http://ajax.googleapis.com/ajax/libs/jquery/1.10.2/jquery.min.js', function() {
            if (!phantom.state) {
                doSearch();
                phantom.state = 'results';
            } else {
                displayResults();
                phantom.exit();
            }
        });
    } else {
        console.log('Connection failed.');
        phantom.exit();
    }
};

page.onConsoleMessage = function(msg) {
    console.log(msg);
};

page.open('http://google.com');
于 2014-01-06T05:37:18.310 回答
2

尝试来自http://snippets.aktagon.com/snippets/534-How-to-scrape-web-pages-with-PhantomJS-and-jQuery的下一个代码。它加载 jQuery 的本地副本,但也可以使用请求页面加载的 jQuery 实例。

var page = new WebPage(),
     url = 'http://localhost/a-search-form',
     stepIndex = 0;

 /**
  * From PhantomJS documentation:
  * This callback is invoked when there is a JavaScript console. The callback may accept up to three arguments: 
  * the string for the message, the line number, and the source identifier.
  */
 page.onConsoleMessage = function (msg, line, source) {
     console.log('console> ' + msg);
 };

 /**
  * From PhantomJS documentation:
  * This callback is invoked when there is a JavaScript alert. The only argument passed to the callback is the string for the message.
  */
 page.onAlert = function (msg) {
     console.log('alert!!> ' + msg);
 };

 // Callback is executed each time a page is loaded...
 page.open(url, function (status) {
   if (status === 'success') {
     // State is initially empty. State is persisted between page loads and can be used for identifying which page we're on.
     console.log('============================================');
     console.log('Step "' + stepIndex + '"');
     console.log('============================================');

     // Inject jQuery for scraping (you need to save jquery-1.6.1.min.js in the same folder as this file)
     page.injectJs('jquery-1.6.1.min.js');

     // Our "event loop"
     if(!phantom.state){
       initialize();
     } else {
       phantom.state();
     } 

     // Save screenshot for debugging purposes
     page.render("step" + stepIndex++ + ".png");
   }
 });

 // Step 1
 function initialize() {
   page.evaluate(function() {
     $('form#search input.query').val('Jebus saves');
     $('form#search').submit();
     console.log('Searching...');
   });
   // Phantom state doesn't change between page reloads
   // We use the state to store the search result handler, ie. the next step
   phantom.state = parseResults; 
 }

 // Step 2
 function parseResults() {
   page.evaluate(function() {
     $('#search-result a').each(function(index, link) {
       console.log($(link).attr('href'));
     })
     console.log('Parsed results');
   });
   // If there was a 3rd step we could point to another function
   // but we would have to reload the page for the callback to be called again
   phantom.exit(); 
 }
于 2013-06-11T20:28:52.720 回答
1

有一个众所周知的错误,即 PhantomJS 无法加载 JQuery,将很难将一些表单数据发布到服务器,但您只能使用 querySelectorAll 选择元素,例如:如何使用 phantomjs 抓取链接

于 2013-04-11T03:41:40.680 回答
1

@lmeurs 答案非常好,但不起作用。
我用答案为你创造了一些有用的东西:)。

var page = new WebPage();
var url = 'http://br.search.yahoo.com';
var stepIndex = 0;

page.onConsoleMessage = function (msg, line, source) { console.log('console> ' + msg); };

page.onAlert = function (msg) { console.log('alert!!> ' + msg); };

function takeShot(){
    console.log("TakingShot"); 
    page.render("step" + stepIndex + ".png");
    console.log("ShotTake");     
}

function step0() {
    console.log("step 00 enter");
    page.evaluate(function() {
        $("form [type='text']").val('its now sunday searching it');
        $("form [type='submit']").submit();     
    });
    console.log("step 00 exit");
}

function step1() {
    console.log("step 01 enter");
    page.evaluate(function() {
        $('#search-result a').each(function(index, link) {
            console.log($(link).attr('href'));
        })
    });
    console.log("step 01 exit");
    phantom.exit(); 
}

page.open(url, function (status) {
    console.log("[- STARTING -]");
    if (status === 'success') {
        var cmd = ""
        page.injectJs('jquery-1.6.1.min.js');
        while(true)
        {
            console.log("Step["+stepIndex+"] starting on ["+new Date()+"]");
            //cmd = "var x = step"+stepIndex+";"
            //console.log(cmd);
            //eval(cmd);
            switch(stepIndex){
                case 0:
                    step0();
                    break;
                case 1:
                    step1();
                    break;                  
            }
            takeShot();
            stepIndex++;
        }       
    }
});
于 2013-07-06T19:14:24.687 回答