0

我需要提取整个页面的 loc 标记文本并获取所有提取文本的计数。在循环中迭代以下命令 casper.open(' http://www.raybiztech.com/ .....') 不同迭代。任何人都可以帮助解决上述情况

<urlset xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">
        <url>
            <loc>http://raybiztech.com/blog</loc>
            <lastmod>2014-02-24</lastmod>
        </url><url>
            <loc>http://raybiztech.com/blog/application-development-maintenance</loc>
            <lastmod>2014-02-14</lastmod>
        </url><url>
            <loc>http://raybiztech.com/blog/application-development-maintenance/analysis-services-testing-with-nick</loc>
            <lastmod>2014-02-14</lastmod>
        </url><url>
            <loc>http://raybiztech.com/blog/application-development-maintenance/bi-solution-join-on-premises-to-windows-azure</loc>
            <lastmod>2014-02-14</lastmod>
        </url><url>
            <loc>http://raybiztech.com/blog/application-development-maintenance/data-profiling-im-data-source-view</loc>
            <lastmod>2014-02-14</lastmod>
        </url><url>
            <loc>http://raybiztech.com/blog/application-development-maintenance/data-programmability-tools-with-craig-lee</loc>
            <lastmod>2014-02-14</lastmod>
 .....

我更新了代码如下。我想提取文本,特别是“loc”并在 casper.thenopen(“ http://raybiztech.com/blog ”)中迭代;

var casper = new require('casper').Casper({
    verbose: true,          // log messages will be printed out to the console
    logLevel: "debug",              // "Debug" level messages will be logged        
    safeLogs: false,


});
    var fs = require('fs');
        var fname = new Date().getTime() + '.xml';
    var save = fs.pathJoin(fs.workingDirectory, 'raybiztech', fname);

casper.test.begin('Payment Gateway',function(test) {

/*
 * Start the browser with url http://lcdev.bluemodus.com/
 */
casper.start('http://www.raybiztech.com/sitemap.xml',function(){
    this.capture('/home/gopi/output/learningcounts/cmsHome.jpg');
    this.echo(this.getCurrentUrl());
    fs.write(save, this.getPageContent() + '\n', 'w');
    require('utils').dump(this.getElementsInfo('loc'));


});
casper.viewport(1366,768);
casper.run(function() {
    test.done();
    casper.test.renderResults(true, 0, '/home/gopi/workspace/learningcounts/raybiztech.xml');
});
});
4

1 回答 1

0

该函数casper.getElementsInfo返回一个数组,其中每个项目都包含该text属性。然后,您可以遍历数组map以将对象转换为text属性字符串。

于 2014-12-17T19:21:19.113 回答