1
use WWW::Mechanize;
use strict;
use warnings;
use LWP::Simple;
my $ctime       = time();
my $Home_page   = 'www.condortk.com/';
my $output_file = "www.condortk.com-$ctime";
my $url         = 'http://www.condortk.com/results.php?keyword=&Buscar.x=4&Buscar.y=7';
my $m           = WWW::Mechanize->new();
$m->get( $url ) or die "unable to get $url";
my $Home_Con = $m->content;
system( 'mkdir Images' ) if ( !-d "Images" );
my $next = '';
my $page = '';

while ( $Home_Con =~ m/<div class="producto"><a href="([^>]*?)"/igs ) {

#print "Loop 1\n";
  $m->get( $1 );

  my $list_content = $m->content;
  my $img_name     = '';
  my $img_folder   = '';
  my $image        = '';
  my $big_image    = '';

  while ( $list_content =~ m/<div\s*class="nombre2"><a\s*href="([^>]*?)"/igs ) {

#print"Loop2\n";
    my $desc  = '';
    my $desc1 = '';
    my $block = $Home_page . $1;
    $m->get( $1 );
    my $content1 = $m->content;
    if ( $content1 =~ m/id="imgproducto"><img src="([^<]*?)"/is ) {
      $img_name = $1;

#   print "$img_name\n";

      if ( $img_name =~ m/\/\w+\/\w+\/([^>]*?)$/is ) {
        $img_folder = $1;

#print "$img_folder\n";
#print "$Home_page.$img_name\n";
        getstore( $Home_page . $img_name, $img_folder );

      }
    }
  }
}

我正在尝试从该站点下载并保存特定图像。但我不确定为什么我无法获得这些图像。我使用了正确的图像正则表达式。我怀疑的部分是 getstore 方法。需要一些指导。提前致谢。

4

2 回答 2

2

如果您正在搜索图像,请使用WWW::Mechanize 上的特定图像方法,这可能会帮助您找到所需的内容。例子:

my @images = $mech->find_all_images( url_regex => qr/productos-detalle\.php/ );
于 2013-07-29T07:35:57.497 回答
1

正则表达式的工作是因为运气,它们并不可靠。请参阅“错误的正则表达式”评论。此外,getstore(url, file)方法需要http://host.org/dir/file作为参数 1 和path/to/file.jpg作为参数 2。

这是一个工作版本:

use WWW::Mechanize;
use strict;
use warnings;
use LWP::Simple;
my $ctime       = time();
my $Home_page   = 'www.condortk.com/';
my $output_file = "www.condortk.com-$ctime";
my $url         = 'http://www.condortk.com/results.php?keyword=&Buscar.x=4&Buscar.y=7';
my $m           = WWW::Mechanize->new();
$m->get( $url ) or die "unable to get $url";
my $Home_Con = $m->content;
system( 'mkdir Images' ) if ( ! -d "Images" );
my $next = '';
my $page = '';

while ( $Home_Con =~ m/<div class="producto"><a href="([^"]*?)"/igs ) { # <-- wrong regex

  #print "Loop 1 $1 \n";
  $m->get( "http://www.condortk.com/$1" ); # <-- wrong url

  my $list_content = $m->content;
  my $img_name     = '';
  my $img_folder   = '';
  my $image        = '';
  my $big_image    = '';

  while ( $list_content =~ m/<div\s*class="nombre2"><a\s*href="([^"]*?)"/igs ) {

    #print "Loop 2 $1 \n";
    my $desc  = '';
    my $desc1 = '';
    my $block = $Home_page . $1;
    $m->get( $1 );
    my $content1 = $m->content;
    if ( $content1 =~ m/id="imgproducto"><img src="([^<]*?)"/is ) {
      $img_name = $1;


    # wrong url and 'save as' parameters
    if ( $img_name =~ m/\/\w+\/\w+\/(.*)$/is ) {
        my $basename = $1;
        my $get = "http://${Home_page}${img_name}";
        my $save = "Images/$basename";
        my $code = -1;
        if ( ! -f $save ) { 
            $code = getstore($get, $save ); 
            print "$get --> $save : $code\n";
        } else {
            print "Skipping $save\n";
        }
        #if ($code != 200) { print "$get --> Images/$save : $code\n"; }
    }

    }
  }
}

-

$ perl dl.img.pl
http://www.condortk.com/.../60504s_725x0.jpg --> Images/Images/60504s_725x0.jpg : 200
http://www.condortk.com/.../60508s_725x0.jpg --> Images/Images/60508s_725x0.jpg : 200
http://www.condortk.com/.../60501s_725x0.jpg --> Images/Images/60501s_725x0.jpg : 200
http://www.condortk.com/.../60020s_725x0.jpg --> Images/Images/60020s_725x0.jpg : 200 
于 2013-07-29T10:35:57.890 回答