1

html file

<table id="tbl_proxy_list">
...........
 <tr>
   ......
    <td align="left">
        <time class="icon icon-check">1 min</time>
    </td>
    <td align="left">
        <div class="progress-bar" data-value="75" title="4625"></div>
    </td>
</tr>
</table>

ip.js file

casper.start('http://www.proxynova.com/proxy-server-list/', function() {
    var info_text = this.evaluate(function() {
        var nodes = document.querySelectorAll('table[id="tbl_proxy_list"] tr');
        return [].map.call(nodes, function(node) { 
            //return node.innerText;
            return node;
        });
    });

    var tr_data = info_text.map(function(str) {
        var elements = str;
        var data = {
            ip        : elements,
            port      : elements[1],
            lastcheck : elements[2],
            speed     : elements[3], // <== value is 75..
        };
        return data;
    });

    utils.dump(tr_data);
});

casper.run();

return node.innerText is only text.

  • ip is a text value
  • port is a text value
  • lastcheck is a text value
  • speed is not a text value (data-value="75")

I want to import data-value="75" (speed value is 75).

I do not know what to do.

========================================

It's work.. good. thank you Artjom.

but tr_data echo error.

first, you code modify..

return {
    "ip":        tr.children[0].innerText.trim(),
    "port":      tr.children[1].innerText.trim(),
    "lastcheck": tr.children[2].innerText.trim(),
    "speed":     tr.children[3].children[0].getAttribute("data-value")
};

and echo..

//this.echo(tr_data.length);
for(var ii=0; ii<tr_data.length; ii++)
{
    this.echo(tr_data[ii]['ip']);
}

at run, blow error..

TypeError: 'null' is not an object (evaluating 'tr_data.length'); what is problem?

I need your help.. thanks.

4

1 回答 1

7

您不能从页面上下文(内部evaluate回调)传递 DOM 元素。

文档

注意:评估函数的参数和返回值必须是简单的原始对象。经验法则:如果它可以通过 JSON 序列化,那就没问题了。

返回一个 DOM 元素数组将产生一个包含尽可能多undefined值的数组。这意味着您需要映射页面上下文中的所有内容,然后返回结果数组。你也只需要一个map

var tr_data = this.evaluate(function() {
    var nodes = document.querySelectorAll('table[id="tbl_proxy_list"] tbody tr');
    return Array.prototype.map.call(nodes, function(tr, i) { 
        if (tr.children.length != 6) {
            return null; // skip ads
        }
        return {
            ip:        tr.children[0].innerText.trim(),
            port:      tr.children[1].innerText.trim(),
            lastcheck: tr.children[2].innerText.trim(),
            speed:     tr.children[3].children[0].getAttribute("data-value")
        };
    }).filter(function(data){
        return data !== null; // filter the null out
    });;
});

您可能还想要trim多余的空白。

于 2014-08-04T19:10:31.973 回答