我正在尝试运行一个 perl 脚本(在 Windows cmd 窗口中),但它总是会在某个点停止工作。我怎样才能找出为什么它不会继续?
这是脚本:我可以看到执行的最后一件事是第 37 行中的“get_html_source()”
#!/usr/bin/perl
# Perl script that scrapes the members of the Hellenic Parliament
# Created by Kostas Ntonas, 03 May 2013 - http://ntonas.gr
# http://deixto.blogspot.gr/2013/05/scraping-members-of-greek-parliament.html
use strict;
use warnings;
use utf8;
use IO::File;
use POSIX qw(tmpnam);
use DEiXToBot;
use WWW::Selenium;
my $agent = DEiXToBot->new(); # create the DEiXToBot agent object
# launch a Firefox instance
my $sel = WWW::Selenium->new( host => "localhost",
port => 4444,
browser => "*firefox",
browser_url => "http://www.hellenicparliament.gr/"
);
$sel->start;
for my $i (1..30) {
my $url = "http://www.hellenicparliament.gr/en/Vouleftes/Viografika-Stoicheia?pageNo=$i";
$sel->open($url);
$sel->wait_for_page_to_load(5000);
$sel->pause(1);
print "$i) $url\n";
my $content = $sel->get_html_source();
my ($fh,$name); # create a temporary file containing the page's source code
do { $name = tmpnam() } until $fh = IO::File->new($name, O_RDWR|O_CREAT|O_EXCL);
binmode( $fh, ':utf8' );
print $fh $content;
close $fh;
$agent->get("file://$name"); # load the temporary file/page with the DEiXToBot agent using the file:// scheme
unlink $name; # delete the temporary file, it is not needed any more
if (! $agent->success) { die "Could not fetch the temp file!\n"; }
$agent->build_dom();
$agent->load_pattern('C:\Users\XXX\Documents\Privat\MyCase3\Deixto Patterns\parliament_CVs.xml');
$agent->extract_content();
if (! $agent->hits) {
die "Could not find any MPs/ records!\n";
}
else {
for my $record ($agent->records) {
my @rec = @$record;
my $party;
my $logo = $rec[0];
# deduce the party name from the logo in the first column of the table
if ($logo=~m#ND_Logo#) { $party = "N.D. (New Democracy)"; }
elsif ($logo=~m#COALITION#) { $party = "SYRIZA Unitary Social Front"; }
elsif ($logo=~m#PASOK#) { $party = "PA.SO.K. (Panhellenic Socialist Movement)"; }
elsif ($logo=~m#ANEKS_ELL#) { $party = "ANEXARTITOI ELLINES (Independent Hellenes)"; }
elsif ($logo=~m#xrisi#) { $party = "LAIKOS SYNDESMOS - CHRYSI AVGI (People's Association - Golden Dawn)"; }
elsif ($logo=~m#small#) { $party = "DHM.AR (Democratic Left)"; }
elsif ($logo=~m#KKE#) { $party = "K.K.E. (Communist Party of Greece)"; }
elsif ($logo=~m#INDEPENDENT#) { $party = "INDEPENDENT"; }
else { die "$logo => Unknown logo!\n"; }
$rec[0] = $party;
$rec[3]=~s#\s+# #g; # replace whitespace characters with a single space
# append the data in a tab delimited text file
open my $fh,">>:utf8","MPs.txt";
print $fh join("\t",@rec)."\n";
close $fh;
}
}
}
$sel->stop;