1

问题是:我需要 WWW::Mechanize 从网页下载一些 PDF。

编辑:一段时间后,我重新编写了代码。我使用 LWP::UserAgent 而不是 WWWW::Mechanize。这是一个MWE:

#!/usr/bin/env perl

use strict;
use warnings;

use Carp;

use LWP::UserAgent;
use HTTP::Cookies;
use HTML::Form;
use HTTP::Request::Common;
use HTTP::Request::Form;
use Data::Printer;

my $login      = 'https://www.wissenschaft-online.de/template/d_meinkonto_controller';
my $cookie_jar = HTTP::Cookies->new();
my $ua         = LWP::UserAgent->new;

$ua->cookie_jar($cookie_jar);
push @{ $ua->requests_redirectable }, 'POST';

my $response = $ua->get($login);
my @forms = HTML::Form->parse($response);

$forms[1]->method( q{POST} );

$forms[1]->param(q{user},q{XXX-USER});
$forms[1]->param(q{pass},q{PASSWD});

my $request = $forms[1]->click;
$response   = $ua->request($request);

p $response; # print 1

$cookie_jar->extract_cookies( $response );
print $cookie_jar->as_string; # print 2

$response  = $ua->get('http://www.spektrum.de/alias/pdf/sdw-2013-5-ges-pdf/1190244',   ':content_file' => 'SDW.pdf');

p $response; # print 3

因此,打印 1 - 凭据提交的响应:

HTTP::Response  {
    Parents       HTTP::Message
    public methods (22) : as_string, base, clone, code, current_age, dump, error_as_HTML, filename, freshness_lifetime, fresh_until, is_error, is_fresh, is_info, is_redirect, is_success, message, new, parse, previous, redirects, request, status_line
    private methods (0)
    internals: {
        _content    "<!DOCTYPE html>
<!--[if lt IE 7 ]> <html lang="de" class="no-js ie6"> <![endif]-->
<!--[if IE 7 ]>    <html lang="de" class="no-js ie7"> <![endif]-->
<!--[if IE 8 ]>    <html lang="de" class="no-js ie8"> <![endif]-->
<!--[if IE 9 ]>    <html lang="de" class="no-js ie9"> <![endif]-->
<!--[if (gt IE 9)|!(IE)]><!--> <html lang="de" class="no-js"> <!--<![endif]-->
<head>
    <meta charset="utf-8" />
    <meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1"><meta http-equiv="refresh" content="3; URL=http://www.spektrum.de/template/d_meinkonto_controller?wo=1">  <script src="/js_css/s2/prototype.js"></script><script src="http://www.epoc.de/template/d_sys_logincheck?i=e4328159435cbb952edb540e0ccdd40a&p=a007d48c26707db202ec4e6115c04464&k=V086cGFscGF0aW5l&y=97a7db42c674d3288c1ffb27e362832c"></script>
<script src="http://www.spektrum.de/template/d_sys_logincheck?i=e4328159435cbb952edb540e0ccdd40a&p=a007d48c26707db202ec4e6115c04464&k=V086cGFscGF0aW5l&y=97a7db42c674d3288c1ffb27e362832c"></script>
<script src="http://www.spektrum.com/template/d_sys_logincheck?i=e4328159435cbb952edb540e0ccdd40a&p=a007d48c26707db202ec4e6115c04464&k=V086cGFscGF0aW5l&y=97a7db42c674d3288c1ffb27e362832c"></script>
<script src="http://www.spektrumverlag.de/template/d_sys_logincheck?i=e4328159435cbb952edb540e0ccdd40a&p=a007d48c26707db202ec4e6115c04464&k=V086cGFscGF0aW5l&y=97a7db42c674d3288c1ffb27e362832c"></script>
<script src="http://www.spektrum-neo.de/template/d_sys_logincheck?i=e4328159435cbb952edb540e0ccdd40a&p=a007d48c26707db202ec4e6115c04464&k=V086cGFscGF0aW5l&y=97a7db42c674d3288c1ffb27e362832c"></script>
<script src="http://www.gehirn-und-geist.de/template/d_sys_logincheck?i=e4328159435cbb952edb540e0ccdd40a&p=a007d48c26707db202ec4e6115c04464&k=V086cGFscGF0aW5l&y=97a7db42c674d3288c1ffb27e362832c"></script>
<script src="http://www.astronomie-heute.de/template/d_sys_logincheck?i=e4328159435cbb952edb540e0ccdd40a&p=a007d48c26707db202ec4e6115c04464&k=V086cGFscGF0aW5l&y=97a7db42c674d3288c1ffb27e362832c"></script>
<script src="http://www.spektrumdirekt.de/template/d_sys_logincheck?i=e4328159435cbb952edb540e0ccdd40a&p=a007d48c26707db202ec4e6115c04464&k=V086cGFscGF0aW5l&y=97a7db42c674d3288c1ffb27e362832c"></script>
<script src="http://www.sterne-und-weltraum.de/template/d_sys_logincheck?i=e4328159435cbb952edb540e0ccdd40a&p=a007d48c26707db202ec4e6115c04464&k=V086cGFscGF0aW5l&y=97a7db42c674d3288c1ffb27e362832c"></script>
<script src="http://www.wissenschaft-online.de/template/d_sys_logincheck?i=e4328159435cbb952edb540e0ccdd40a&p=a007d48c26707db202ec4e6115c04464&k=V086cGFscGF0aW5l&y=97a7db42c674d3288c1ffb27e362832c"></script>
    <style type="text/css">
        html, body {
            font-family: Verdana,Helvetica,Arial,sans-serif;
        }
</style>
</head>
<body id="index" class="home">
    Einen Moment bitte, Sie werden am System von Spektrum der Wissenschaft angemeldet.      Sollten Sie nicht in einigen Augenblicken weitergeleitet werden, klicken Sie bitte <a href="http://www.spektrum.de/template/d_meinkonto_controller?wo=1">hier</a>.
        <iframe src="http://www.epoc.de/template/d_sys_logincheck?i=e4328159435cbb952edb540e0ccdd40a&p=a007d48c26707db202ec4e6115c04464&k=V086cGFscGF0aW5l&y=97a7db42c674d3288c1ffb27e362832c" width="1" height="1" scrolling="no" marginheight="0" marginwidth="0" frameborder="0"></iframe>
<iframe src="http://www.spektrum.de/template/d_sys_logincheck?i=e4328159435cbb952edb540e0ccdd40a&p=a007d48c26707db202ec4e6115c04464&k=V086cGFscGF0aW5l&y=97a7db42c674d3288c1ffb27e362832c" width="1" height="1" scrolling="no" marginheight="0" marginwidth="0" frameborder="0"></iframe>
<iframe src="http://www.spektrum.com/template/d_sys_logincheck?i=e4328159435cbb952edb540e0ccdd40a&p=a007d48c26707db202ec4e6115c04464&k=V086cGFscGF0aW5l&y=97a7db42c674d3288c1ffb27e362832c" width="1" height="1" scrolling="no" marginheight="0" marginwidth="0" frameborder="0"></iframe>
<iframe src="http://www.spektrumverlag.de/template/d_sys_logincheck?i=e4328159435cbb952edb540e0ccdd40a&p=a007d48c26707db202ec4e6115c04464&k=V086cGFscGF0aW5l&y=97a7db42c674d3288c1ffb27e362832c" width="1" height="1" scrolling="no" marginheight="0" marginwidth="0" frameborder="0"></iframe>
<iframe src="http://www.spektrum-neo.de/template/d_sys_logincheck?i=e4328159435cbb952edb540e0ccdd40a&p=a007d48c26707db202ec4e6115c04464&k=V086cGFscGF0aW5l&y=97a7db42c674d3288c1ffb27e362832c" width="1" height="1" scrolling="no" marginheight="0" marginwidth="0" frameborder="0"></iframe>
<iframe src="http://www.gehirn-und-geist.de/template/d_sys_logincheck?i=e4328159435cbb952edb540e0ccdd40a&p=a007d48c26707db202ec4e6115c04464&k=V086cGFscGF0aW5l&y=97a7db42c674d3288c1ffb27e362832c" width="1" height="1" scrolling="no" marginheight="0" marginwidth="0" frameborder="0"></iframe>
<iframe src="http://www.astronomie-heute.de/template/d_sys_logincheck?i=e4328159435cbb952edb540e0ccdd40a&p=a007d48c26707db202ec4e6115c04464&k=V086cGFscGF0aW5l&y=97a7db42c674d3288c1ffb27e362832c" width="1" height="1" scrolling="no" marginheight="0" marginwidth="0" frameborder="0"></iframe>
<iframe src="http://www.spektrumdirekt.de/template/d_sys_logincheck?i=e4328159435cbb952edb540e0ccdd40a&p=a007d48c26707db202ec4e6115c04464&k=V086cGFscGF0aW5l&y=97a7db42c674d3288c1ffb27e362832c" width="1" height="1" scrolling="no" marginheight="0" marginwidth="0" frameborder="0"></iframe>
<iframe src="http://www.sterne-und-weltraum.de/template/d_sys_logincheck?i=e4328159435cbb952edb540e0ccdd40a&p=a007d48c26707db202ec4e6115c04464&k=V086cGFscGF0aW5l&y=97a7db42c674d3288c1ffb27e362832c" width="1" height="1" scrolling="no" marginheight="0" marginwidth="0" frameborder="0"></iframe>
<iframe src="http://www.wissenschaft-online.de/template/d_sys_logincheck?i=e4328159435cbb952edb540e0ccdd40a&p=a007d48c26707db202ec4e6115c04464&k=V086cGFscGF0aW5l&y=97a7db42c674d3288c1ffb27e362832c" width="1" height="1" scrolling="no" marginheight="0" marginwidth="0" frameborder="0"></iframe>
        <script type="text/javascript"><!--
        function reload(){
            window.location.href="http://www.spektrum.de/template/d_meinkonto_controller?wo=1";
        }
        document.observe('dom:loaded',function(){
            window.setTimeout("reload()", 3000);
        }); -->
        </script>
    </body>
</html>",
        _headers    HTTP::Headers,
        _msg        "OK",
        _previous   HTTP::Response,
        _protocol   "HTTP/1.1",
        _rc         200,
        _request    HTTP::Request
    }
}

打印 2 - cookie 检查:

Set-Cookie3: PHPSESSID=a007d48c26707db202ec4e6115c04464; path="/"; domain=www.wissenschaft-online.de; path_spec; discard; version=0
Set-Cookie3: SIX_prefix=0; path="/"; domain=www.wissenschaft-online.de; path_spec; discard; version=0
Set-Cookie3: SIX_pwd=e4328159435cbb952edb540e0ccdd40a; path="/"; domain=www.wissenschaft-online.de; path_spec; discard; version=0
Set-Cookie3: SIX_uid=WO%3Axxx-USER; path="/"; domain=www.wissenschaft-online.de; path_spec; discard; version=0
Set-Cookie3: mobile=aus; path="/"; domain=www.wissenschaft-online.de; path_spec; expires="2013-10-09 06:14:30Z"; version=0

打印 3 - 请求下载 PDF 文件:

HTTP::Response  {
    Parents       HTTP::Message
    public methods (22) : as_string, base, clone, code, current_age, dump, error_as_HTML, filename, freshness_lifetime, fresh_until, is_error, is_fresh, is_info, is_redirect, is_success, message, new, parse, previous, redirects, request, status_line
    private methods (0)
    internals: {
        _content    "",
        handlers    {
            response_done   [
                [0] {
                    callback   sub { ... }
                }
            ]
        },
        _headers    HTTP::Headers,
        _msg        "OK",
        _protocol   "HTTP/1.1",
        _rc         200,
        _request    HTTP::Request
    }
}

结果文件 - 'SDW.pdf' - 包含以下内容:

<!DOCTYPE html>
<!-- d_sdwv_html_head -->
<!--[if lt IE 7 ]> <html lang="de" class="no-js ie6"> <![endif]-->
<!--[if IE 7 ]>    <html lang="de" class="no-js ie7"> <![endif]-->
<!--[if IE 8 ]>    <html lang="de" class="no-js ie8"> <![endif]-->
<!--[if IE 9 ]>    <html lang="de" class="no-js ie9"> <![endif]-->
<!--[if (gt IE 9)|!(IE)]><!--> <html lang="de" class="no-js"> <!--<![endif]-->
<head prefix="og: http://ogp.me/ns#">
                    <title>PDF-Download - Login - Spektrum der Wissenschaft</title>
    <link rel="shortcut icon" href="/pix/sdwv/favicon_sdw.ico" />
<link rel="apple-touch-icon-precomposed" href="/pix/sdwv/touch-icon-sdw.png" />                         <link rel="canonical" href="http://www.spektrum.de/alias/pdf/sdw-2013-5-ges-pdf/1190244" />                                         <meta http-equiv="content-type" content="text/html; charset=iso-8859-1" />
<meta name="language" content="de" />
<meta name="description" content="SdW_2013_5_ges (pdf)" />
<meta name="robots" content="noindex" />
<meta name="id" content="cms-864-1190244" />
<meta name="produkt" content="cms-864" />
<meta name="titel" content="PDF-Download - Login - Spektrum der Wissenschaft" />
<meta name="twitter:card" content="summary" />
<meta name="twitter:site" content="@spektrum" />
<meta property="og:title" content="PDF-Download - Login - Spektrum der Wissenschaft" />
<meta property="og:type" content="article" />
<meta property="og:description" content="SdW_2013_5_ges (pdf)" />
<meta property="og:url" content="http://www.spektrum.de/alias/pdf/sdw-2013-5-ges-pdf/1190244" />
<meta name="apple-itunes-app" content="app-id=545968608" />
<meta name="datum" content="04.04.2013" />

<link rel="alternate" type="application/rss+xml" title="Spektrum.de RSS-Feed" href="/alias/rss/spektrum-de-rss-feed/996406">

<link rel="alternate" type="application/rss+xml" title="Pressemitteilungen" href="/alias/rss/pressemitteilungen/995265">

<link rel="alternate" type="application/rss+xml" title="Gehirn und Geist RSS-Feed" href="/alias/rss/gehirn-und-geist-rss-feed/982626">

<link rel="alternate" type="application/rss+xml" title="Spektrum der Wissenschaft RSS-Feed" href="/alias/rss/spektrum-der-wissenschaft-rss-feed/982623">

<link rel="alternate" type="application/rss+xml" title="Sterne und Weltraum RSS-Feed" href="/alias/rss/sterne-und-weltraum-rss-feed/865248">

<link rel="alternate" type="application/rss+xml" title="Spektrum neo RSS-Feed" href="/alias/rss/spektrum-neo-rss-feed/1193335">
    <link rel="stylesheet" href="/template/d_sdwv_css_wrapper" />
    <link rel="stylesheet" href="/js_css/s2/smoothness/jquery-ui-1.9.1.custom.css" />
    <link rel="stylesheet" href="/js_css/mediaelementjs/mediaelementplayer.min.css" />
    <link rel="stylesheet" href="/js_css/mediaelementjs/mejs-skins.css" />
    <script src="/js_css/modernizr/modernizr.js"></script>
    <script src="/js_css/s2/prototype.js"></script>
    <script src="/js_css/s2/s2.js"></script>
    <script src="/js_css/jquery/jquery-1.6.4.min.js"></script>
    <script src="/js_css/s2/event.simulate.js"></script>
    <script type="text/javascript">jQuery.noConflict();</script>
    <script type="text/javascript" src="/js_css/socialshareprivacy/jquery.socialshareprivacy.min.js"></script>
    <script src="/js_css/jquery/jquery.imagesloaded.min.js"></script>
    <script src="/js_css/mediaelementjs/mediaelement-and-player.min.js"></script>
    <script src='/template/d_js_vgwort_datei.js'></script>
    <script src="/js_css/jquery/fancybox/jquery.fancybox-1.3.4.pack.js"></script>
    <link rel="stylesheet" hr

抱歉耽搁了,感谢您的帮助!

4

0 回答 0