1

我正在使用Net::Curl::Easyperl,但无法获得正确的 http 响应。LWP::UserAgent使用作品执行相同的请求。

真正的问题Net::Curl::Easy是它无法创建正确的 http 请求。看起来它发送带有标头参数Expect: 100 continue的 http 请求的一部分,这就是服务器使用响应标头HTTP/1.1 100 Continue不正确数据进行响应的原因。

示例代码是:

#!/usr/bin/perl

use strict;
use warnings;
use Net::Curl::Easy;
use HTTP::Request::Common qw(GET POST);
use Data::Dumper;
use LWP::UserAgent;
use HTTP::Cookies;

my $tmp;

my ($timeout, $handled_ref, $resend);

my $request;
my $url = "http://www.wegolo.com/";
$request = GET($url);
$request->header('User-Agent' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.7.5) Gecko/20041204 Firefox/1.0 (Debian package 1.0.x.2-1)');

#------ Using Net::Curl::Easy ------#
my $uri = $request->url()->as_string();

my $easy = Net::Curl::Easy->new({
        request     => $request,
        body        => '',
        headers     => '',
        timeout     => $timeout,
        handled_ref => $handled_ref, 
        resend      => $resend,
    });

$easy->setopt( Net::Curl::Easy::CURLOPT_URL(),         $uri );
$easy->setopt( Net::Curl::Easy::CURLOPT_WRITEHEADER(), \$easy->{headers} );
$easy->setopt( Net::Curl::Easy::CURLOPT_FILE(),        \$easy->{body} );

$easy->perform();

open $tmp, ">", "easy_resp1";
print $tmp Dumper $easy->{headers};
print $tmp "\n\n\n\n";
print $tmp Dumper $easy->{body};
close $tmp;

#------ Using LWP::UserAgent ------#
my $ua = LWP::UserAgent->new();
my $cookie_jar = HTTP::Cookies->new();
$ua->cookie_jar($cookie_jar);
$ua->agent('Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.7.5) Gecko/20041204 Firefox/1.0 (Debian package 1.0.x.2-1)');
my $response = $ua->request($request);

open $tmp, ">", "ua_resp1";
print $tmp Dumper $response;
close $tmp;


my @cookie_string = $easy->{headers} =~ /Set\-Cookie\:\s+(.+)/gxi;
my $cookie_string = "";

foreach my $cookie (@cookie_string) {
    my @temp = split(";", $cookie);
    $cookie_string .= $temp[0]."; ";
}
$cookie_string =~ s/^\s+//gxi;
$cookie_string =~ s/\s+$//gxi;

my $dep_country_code = 'CA';
my $dep_country_name = 'Canada';
my $dep_city         = 'Toronto';

my ($MyScriptManager_HiddenField) = $easy->{body} =~ /\_TSM\_CombinedScripts\_=([^"]*)\"/x;
my ($viewState)                   = $easy->{body} =~ /id="__VIEWSTATE"\s+value="([^"]*)"/gx;
my ($eventValid)                  = $easy->{body} =~ /id="__EVENTVALIDATION"\s+value="([^"]*)"/gx;
my ($tbDepartureDate)             = $easy->{body} =~ /value="([A-Za-z\d\,\s]+)"\s+id="tbDepartureDate"/gx;
my ($tbReturnDate)                = $easy->{body} =~ /value="([A-Za-z\d\,\s]+)"\s+id="tbReturnDate"/gx;


my $form_elements = {
            'MyScriptManager'             => 'MyScriptManager|ddlCountry',
            'MyScriptManager_HiddenField' => $MyScriptManager_HiddenField,
            '__EVENTTARGET'               => 'ddlCountry',
            '__EVENTARGUMENT'             => '',
            '__LASTFOCUS'                 => '',
            '__VIEWSTATE'                 => $viewState,
            '__EVENTVALIDATION'           => $eventValid,
            'ddlCountry'                  => $dep_country_code,
            'TripType'                    => 'false',
            'ddlDeparture'                => '',
            'tbDepartureDate'             => $tbDepartureDate,
            'ddlDestination'              => '',
            'tbReturnDate'                => $tbReturnDate,
            'ddlNumAdults'                => 1,
            'ddlNumChildren'              => 0,
            'ddlNumInfants'               => 0,
            'ddlCurrency'                 => 'CAD',
            'deepLinkUrl'                 => '',
            '__ASYNCPOST'                 => 'true',
            ''                            => '',
        };



$request = POST $url, $form_elements;
$request->header('User-Agent' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.7.5) Gecko/20041204 Firefox/1.0 (Debian package 1.0.x.2-1)');


#------ Using Net::Curl::Easy ------#
    $uri = $request->url()->as_string();
    my $content = $request->content();

    $easy = Net::Curl::Easy->new({
            request     => $request,
            body        => '',
            headers     => '',
            timeout     => $timeout,
            handled_ref => $handled_ref, 
            resend      => $resend,
        });

    $easy->setopt( Net::Curl::Easy::CURLOPT_URL(),         $uri );
    $easy->setopt( Net::Curl::Easy::CURLOPT_POSTFIELDS,    $content );
    $easy->setopt( Net::Curl::Easy::CURLOPT_POSTFIELDSIZE, length($content) );
    $easy->setopt( Net::Curl::Easy::CURLOPT_COOKIE,        $cookie_string );
    $easy->setopt( Net::Curl::Easy::CURLOPT_WRITEHEADER(), \$easy->{headers} );
    $easy->setopt( Net::Curl::Easy::CURLOPT_FILE(),        \$easy->{body} );

    $easy->perform();

    open $tmp, ">", "easy_resp2";
    print $tmp Dumper $easy->{headers};
    print $tmp "\n\n\n\n";
    print $tmp Dumper $easy->{body};
    close $tmp;
#-----------------------------------#



#------ Using LWP::UserAgent ------#
    $response = $ua->request($request);

    open $tmp, ">", "ua_resp2";
    print $tmp Dumper $response;
    close $tmp;
#----------------------------------#

运行此文件后,检查ua_resp2easy_resp2之间的区别。

4

1 回答 1

1
    #
    # performs post or get http request , returns
    # usage:
    # ( $ret , $response_code , $response_body , $response_content )
    #     = $objUrlRunner->doRunURL( 'GET' , $url , $headers );
    #
    sub doRunURL {

        my $self               = shift ;
        my $http_method_type   = shift ;
        my $url                = shift ;
        my $headers            = shift ;

        my $cookies_file       = $appConfig->{'COOKIES_FILE'} ;
        $objLogger->doLogInfoMsg ( "using the following cookies_file: " );
        $objLogger->doLogInfoMsg ( $cookies_file ) ;

        my $curl = WWW::Curl::Easy->new();

        ## Set up the standard GET/POST request options
        if ( $cookies_file ) {
            $curl->setopt(WWW::Curl::Easy::CURLOPT_COOKIEFILE, $cookies_file );  # set where the cookies are stored
        } else {
            $curl->setopt(WWW::Curl::Easy::CURLOPT_HTTPAUTH, 'CURLAUTH_BASIC' );  # set where the cookies are stored
        }

        $curl->setopt(WWW::Curl::Easy::CURLOPT_HEADER(),1);
        $curl->setopt(WWW::Curl::Easy::CURLOPT_MAXREDIRS(),3);
        $curl->setopt(WWW::Curl::Easy::CURLOPT_URL(), "$url" );
        $curl->setopt(WWW::Curl::Easy::CURLOPT_VERBOSE, 0);                  # Disable verbosity
        $curl->setopt(WWW::Curl::Easy::CURLOPT_HEADER, 1);                   # Don't include header in body
        $curl->setopt(WWW::Curl::Easy::CURLOPT_NOPROGRESS, 1);               # Disable internal progress meter
        $curl->setopt(WWW::Curl::Easy::CURLOPT_FOLLOWLOCATION, 0);           # Disable automatic location redirects
        $curl->setopt(WWW::Curl::Easy::CURLOPT_FAILONERROR, 1);              # Setting this to true fails on HTTP error
        $curl->setopt(WWW::Curl::Easy::CURLOPT_SSL_VERIFYPEER, 0);           # Ignore bad SSL
        $curl->setopt(WWW::Curl::Easy::CURLOPT_SSL_VERIFYHOST, 0);           # Ignore bad SSL
        $curl->setopt(WWW::Curl::Easy::CURLOPT_NOSIGNAL, 1);               # To make thread safe, disable signals
        $curl->setopt(WWW::Curl::Easy::CURLOPT_ENCODING, 'gzip');          # Allow gzip compressed pages

        if ( $headers ) {
            for my $key ( sort ( keys %$headers )) {
                my $header_name = $key ;
                my $header_val = $headers->{ "$key" } ;
                $curl->setopt(WWW::Curl::Easy::CURLOPT_HTTPHEADER() , [ $header_name . $header_val ]  );
            }
        }

        if ( $http_method_type eq 'POST' ) {
            $curl->setopt(WWW::Curl::Easy::CURLOPT_POST(), 1);
        }

        # A filehandle, reference to a scalar or reference to a typeglob can be used here.
        my $response_body       = q{} ;
        my $response_code       = q{} ;
        my $response_content    = q{} ;

        $curl->setopt(WWW::Curl::Easy::CURLOPT_WRITEDATA(),\$response_body);

        # Starts the actual request
        my $ret = $curl->perform;


        if ($ret == 0) {
            my $msg = "OK for the curl transfer for the url: $url " ;
            $objLogger->doLogInfoMsg ( $msg ) ;

            $response_code = $curl->getinfo(CURLINFO_HTTP_CODE);
            $response_content = HTTP::Response->parse( "$response_body" ) ;
            $response_content = $response_content->content;
            p ( $response_content ) if $module_trace == 1 ;

        } else {
            my $msg = "An error happened: $ret ".$curl->strerror($ret)." ".$curl->errbuf."\n" ;
            $objLogger->doLogErrorMsg ( $msg ) ;
            #  Error code, type of error, error message
        }

        return ( $ret , $response_code , $response_body , $response_content ) ;
    }
于 2017-04-04T13:22:20.493 回答