2

我试图从大约 3,000 个 HTML 文件中获取一大堆值并将它们保存到电子表格中。

HTML::TreeBuilder用来处理 HTML 并使用 Spreadsheet::WriteExcel.

但是我的脚本没有成功获取值。我懂了

在电子表格第 63 行的连接 (.) 或字符串中使用未初始化的值 $val。

我可能做错了什么?

这是我的 HTML 文件在pastebin.com. 它太大而无法在问题中发布。

我的 Perl 代码

use warnings 'all';
use strict;

use LWP::Simple 'get';
use Spreadsheet::WriteExcel;
use HTML::TreeBuilder;
use Path::Tiny;
use constant URL => 'http://pastebin.com/raw/qLwu80ZW';

my $teamNumber  = "";
my $teamName    = "";
my $schoolName  = "";
my $area        = "";
my $district    = "";
my $agDeptPhone = "";
my $schoolPhone = "";
my $fax         = "";
my $addressOne  = "";
my $addressTwo  = "";
my $city        = ""; 
my $state       = "";
my $zipCode     = "";
my $name        = "";
my $email       = "";
my $row         = "";
my $Ypos        = 0; 

my $path = "Z:\\_WEB_CLIENTS\\Morgan Livestock\\Judging Card";

my $workbook  = Spreadsheet::WriteExcel->new('perlOutput.xlsx');
my $worksheet = $workbook->add_worksheet();


sub getTeamNumber {
    my ($file) = @_;

    my $html = path($file);
    my $tree = HTML::TreeBuilder->new_from_content($html);
    my @nodes = $tree->look_down(_tag => 'input');

    my $val;

    foreach my $node (@nodes) {
        $val = $node->look_down('name', qr/\$txt_TeamNumber/)->attr('value');
    }

    print "Got Team Number $val\n";

    return $val;
}


sub getTeamName {
    my ($file) = @_;

    my $html = path($file);
    my $tree = HTML::TreeBuilder->new_from_content($html);
    my @nodes = $tree->look_down(_tag => 'input');

    my $val;

    foreach my $node (@nodes) {
        $val = $node->look_down('name', qr/\$txt_TeamName/)->attr('value');
    }

    print "Got Team Name $val\n";

    return $val;
}


sub getSchoolName {
    my ($file) = @_;

    my $html = path($file);
    my $tree = HTML::TreeBuilder->new_from_content($html);
    my @nodes = $tree->look_down(tag_ => 'input');

    my $val;

    foreach my $node (@nodes) {
        $val = $node->look_down('name', qr/\$txt_SchoolName/)->attr('value');
    }

    print "Got School Name $val\n";

    return $val;
}


sub getArea{
    my ($file) = @_;

    my $html = path($file);
    my $tree = HTML::TreeBuilder->new_from_content($html);
    my @nodes = $tree->look_down(tag_ => 'input');

    my $val;

    foreach my $node (@nodes) {
        $val = $node->look_down('name', qr/\$txt_Area/)->attr('value');
    }

    print "Got Area $val\n";

    return $val;
}


sub getDistrict{
    my ($file) = @_;

    my $html = path($file);
    my $tree = HTML::TreeBuilder->new_from_content($html);
    my @nodes = $tree->look_down(_tag => 'input');

    my $val;

    foreach my $node (@nodes) {
        $val = $node->look_down('name', qr/\$txt_District/)->attr('value');
    }

    print "Got District $val\n";

    return $val;
}


sub getDeptPhone {
    my ($file) = @_;

    my $html = path($file);
    my $tree = HTML::TreeBuilder->new_from_content($html);
    my @nodes = $tree->look_down(_tag => 'input');

    my $val;

    foreach my $node (@nodes) {
        $val = $node->look_down('name', qr/\$txt_Phone/)->attr('value');
    }

    print "Got Dept Phone $val\n";

    return $val;
}


sub getSchoolPhone{
    my ($file) = @_;

    my $html = path($file);
    my $tree = HTML::TreeBuilder->new_from_content($html);
    my @nodes = $tree->look_down(_tag => 'input');

    my $val;

    foreach my $node (@nodes) {
        $val = $node->look_down('name', qr/\$txt_Phone2/)->attr('value');
    }

    print "Got School Phone $val\n";

    return $val;
}


sub getFax{
    my ($file) = @_;

    my $html = path($file);
    my $tree = HTML::TreeBuilder->new_from_content($html);
    my @nodes = $tree->look_down(_tag => 'input');

    my $val;

    foreach my $node (@nodes) {
        $val = $node->look_down('name', qr/\$txt_Fax/)->attr('value');
    }

    print "Got Fax $val\n";

    return $val;
}


sub getAddress1 {
    my ($file) = @_;

    my $html = path($file);
    my $tree = HTML::TreeBuilder->new_from_content($html);
    my @nodes = $tree->look_down(_tag => 'input');

    my $val;

    foreach my $node (@nodes) {
        $val = $node->look_down('name', qr/\$txt_Address1/)->attr('value');
    }

    print "Got Address One $val\n";

    return $val;
}


sub getAddress2 {
    my ($file) = @_;

    my $html = path($file);
    my $tree = HTML::TreeBuilder->new_from_content($html);
    my @nodes = $tree->look_down(_tag => 'input');

    my $val;

    foreach my $node (@nodes) {
        $val = $node->look_down('name', qr/\$txt_Address2/)->attr('value');
    }

    print "Got Address Two $val\n";

    return $val;
}


sub getCity {
    my ($file) = @_;

    my $html = path($file);
    my $tree = HTML::TreeBuilder->new_from_content($html);
    my @nodes = $tree->look_down(_tag => 'input');

    my $val;

    foreach my $node (@nodes) {
        $val = $node->look_down('name', qr/\$txt_City/)->attr('value');
    }

    print "Got Address Two $val\n";

    return $val;
}


sub getState {
    my ($file) = @_;

    my $html = path($file);
    my $tree = HTML::TreeBuilder->new_from_content($html);
    my @nodes = $tree->look_down(_tag => 'input');

    my $val;

    foreach my $node (@nodes) {
        $val = $node->look_down('name', qr/\$txt_State/)->attr('value');
    }

    print "Got State $val\n";

    return $val;
}


sub getZip {
    my ($file) = @_;

    my $html = path($file);
    my $tree = HTML::TreeBuilder->new_from_content($html);
    my @nodes = $tree->look_down(_tag => 'input');

    my $val;

    foreach my $node (@nodes) {
        $val = $node->look_down('name', qr/\$txt_Zip/)->attr('value');
    }

    print "Got Zip $val\n";

    return $val;
}


sub getWebsite {
    my ($file) = @_;

    my $html = path($file);
    my $tree = HTML::TreeBuilder->new_from_content($html);
    my @nodes = $tree->look_down(_tag => 'input');

    my $val;

    foreach my $node (@nodes) {
        $val = $node->look_down('name', qr/\$txt_Website/)->attr('value');
    }

    print "Got Website $val\n";

    return $val;
}


sub getNameAndEmail {
    my ($file) = @_;

    my $tree = HTML::TreeBuilder->new_from_content(get URL);
    my ($table) = $tree->look_down(_tag => 'table', class => 'rgMasterTable');

    for my $tr ( $table->look_down(_tag => 'tr') ) {
        next unless my @td = $tr->look_down(_tag => 'td');
        my ($name, $email) = map { $_->as_trimmed_text } @td[0,1];
    }

    print "Got Name and Email $name and $email\n";

    return ($name, $email);
}

# FILLER: This fills the spreadsheet with all the variables we've acquired

sub fill {
    my ($name, $email, $teamNumber, $teamName, $schoolName,
        $area, $district, $agDeptPhone, $schoolPhone,
        $fax, $addressOne, $addressTwo, $city, $state, $zipCode) = (@_);

    $worksheet->write($Ypos, 1, $name);
    $worksheet->write($Ypos, 2, $email);
    $worksheet->write($Ypos, 3, $teamNumber);
    $worksheet->write($Ypos, 4, $teamName);
    $worksheet->write($Ypos, 5, $schoolName);
    $worksheet->write($Ypos, 6, $area);
    $worksheet->write($Ypos, 7, $district);
    $worksheet->write($Ypos, 8, $agDeptPhone);
    $worksheet->write($Ypos, 9, $schoolPhone);
    $worksheet->write($Ypos, 10, $fax);
    $worksheet->write($Ypos, 11, $addressOne);
    $worksheet->write($Ypos, 12, $addressTwo);
    $worksheet->write($Ypos, 13, $city);
    $worksheet->write($Ypos, 14, $state);
    $worksheet->write($Ypos, 15, $zipCode);
}

# Open judgingcard directory

opendir (DIR, $path) or die "Unable to open directory 'Judging Card': $!";

my @files = readdir(DIR);

# This fills out all top row info

$worksheet->write("A1", "Name");
$worksheet->write("B1", "Email");
$worksheet->write("C1", "Team Number");
$worksheet->write("D1", "Team Name");
$worksheet->write("E1", "School Name");
$worksheet->write("F1", "Area");
$worksheet->write("G1", "District");
$worksheet->write("H1", "Ag Dept Phone");
$worksheet->write("I1", "School Phone");
$worksheet->write("J1", "Fax");
$worksheet->write("K1", "Address One");
$worksheet->write("L1", "Address Two");
$worksheet->write("M1", "City");
$worksheet->write("N1", "State");
$worksheet->write("O1", "Zip Code");

###################################

foreach my $file (@files) { # run through all files in directory

    next if (-d $file); # Skip file if file is folder

    $Ypos = $Ypos + 1;

    my ($name1, $email1) = getNameAndEmail($file);

    $name        = $name1;
    $email       = $email1;
    $teamNumber  = getTeamNumber($file);
    $teamName    = getTeamName($file);
    $schoolName  = getSchoolName($file);
    $area        = getArea($file);
    $district    = getDistrict($file);
    $agDeptPhone = getDeptPhone($file);
    $schoolPhone = getSchoolPhone($file);
    $fax         = getFax($file);
    $addressOne  = getAddress1($file);
    $addressTwo  = getAddress2($file);
    $city        = getCity($file);
    $state       = getState($file);
    $zipCode     = getZip($file);

    fill($name, $email, $teamNumber, $teamName, $schoolName,
        $area, $district, $agDeptPhone, $schoolPhone, $fax,
        $addressOne, $addressTwo, $city, $state, $zipCode);

    print "Progressing                    $file                ($Ypos)\n"
}

closedir(DIR);


sub getTeamNumber {
    my ($file) = @_;

    my $html = path($file);
    my $tree = HTML::TreeBuilder->new_from_content($html);
    my @nodes = $tree->look_down(_tag => 'input');

    my $name;
    my $val;

    foreach my $node (@nodes) {
        $name = $node->look_down('name', qr/\$txt_TeamNumber/);
    }

    if ( ! defined $name ) {
        print "Couldn't get team number\n";
    }

    if ( $name ) {
        $val = $name->attr('value');
        print "Got Team number $val\n";
    }

    return $val;
}

新脚本:

use LWP::Simple 'get';
use Spreadsheet::WriteExcel;
use HTML::TreeBuilder;
use Path::Tiny;

my $path = "Z:\\_WEB_CLIENTS\\Morgan Livestock\\Judging Card";

my $workbook  = Spreadsheet::WriteExcel->new('perlOutput.xlsx');
my $worksheet = $workbook->add_worksheet();

opendir (DIR, $path) or die "Unable to open directory 'Judging Card': $!";
my @files = readdir(DIR);

# Specify spreadsheet headers in desired order and write  to file
my @headers = ('Name', 'Email', 'Team Number', 'Team Name', 'School Name', 'Area', 'District', 'Ag Dept Phone', 'School Phone', 'Fax', 'Address One'
    , 'Address Two', 'City', 'State', 'Zip Code');
$worksheet->write_row(0, 0, \@headers);               # first row

# Build ancillary data structures to later sort results by this order
# each header with its index from @headers (specifies columns' order)
my %ho = map { state $idx; $_ => ++$idx } @headers;
# each name (`TeamNumber` ...) with the index of its header
my %name_order = ( Name => $ho{Name}, Email => $ho{Email}, 
    TeamNumber => $ho{'Team Number'}, TeamName => $ho{'Team Name'}, SchoolName => $ho{'School Name'}, Area => $ho{'Area'}, District => $ho{'District'}, 
        AgDeptPhone => $ho{'Ag Dept Phone'}, SchoolPhone => $ho{'School Phone'}, Fax => $ho{'Fax'}, AddressOne => $ho{'Address One'},
        AddressTwo => $ho{'Address Two'}, City => $ho{'City'}, State => $ho{'State'}, Zip => $ho{'Zip Code'});
        
sub getNames {
    my ($file) = @_;
    my $tree = HTML::TreeBuilder->new_from_content( path($file) );
    my @nodes = $tree->look_down(_tag => 'input');

    # List phrases to find, and build hash with their derived names
    # Should probably be defined globally, once for the whole program
    my @patterns  = map { '$txt_' . $_ } 
        qw(TeamName TeamNumber SchoolName Area District 
           Phone Phone2 Fax Address1 Address2 City State Zip Website);

    # Name for each pattern: everything after first _ (so after $txt_) 
    my %patt_name = map { $_ => (/[^_]+_(.*)/)[0] } @patterns;
    my %name_val;
    foreach my $node (@nodes) {
        foreach my $patt (@patterns) {
            my $name = $node->look_down('name', qr/\Q$patt/);
            if ($name) {
                $name_val{$patt_name{$patt}} = $name->attr('value') || '';
            }
        }
    }

    # Name and Email are stored differently. Fetch those now
    my ($table) = $tree->look_down(_tag => 'table', class => 'rgMasterTable');
    for my $tr ( $table->look_down(_tag => 'tr') ) { 
        next unless my @td = $tr->look_down(_tag => 'td');
        # Discard incomplete Name-Email records -- either both or none
        @name_val{qw(Name Email)} = 
            map { (defined) ? $_->as_trimmed_text : '' } @td[0,1];
    }
    return \%name_val;
}

sub fill_row {
   my ($ws, $row, $rdata, $rorder) = @_;    
   my %name_val   = %$rdata;
   my %name_order = %$rorder;

   my @vals = map { $name_val{$_} } 
              sort { $name_order{$a} <=> $name_order{$b} } 
              keys %name_val;

   $ws->write_row($row, 0, \@vals);  # add check (returns 0 on success)

   return 1;

    my $row = 1;
}

foreach my $file (@files) {
    next if -d $file;

    my %name_val = %{ getNames($file) };

    foreach my $name (sort keys %name_val) {
        # Fill the spreadsheet with all info in one go
        if ($name_val{$name}) {
            print "$name => $name_val{$name}\n";
        } else {
            print "Not found $name in $file\n";
        }
    }

    
    my %name_val = %{ getNames($file) };
    fill_row($worksheet, $row++, \%name_val, \%name_order);

    foreach my $name (sort keys %name_val) {  # demo
        if ($name_val{$name}) { print "$name => $name_val{$name}\n" }
        else                  { print "Not found $name in $file\n" }
    }
    
    print "Progressing $Ypos \n"
}

                                                                                                  
4

2 回答 2

3

您可能可以减少代码。即使在以下示例中我没有使用 HTML::TreeBuidler,方法也是类似的。使用Mojo::DOM58

use 5.014;
use warnings;

use Mojo::DOM58;
use Path::Tiny;
use Data::Dumper;

my @fields = qw( TeamName TeamNumber SchoolName Area District Phone Phone2 Fax Address1 Address2 City State Zip Website );

my $html = path('team.html')->slurp;
my $dom = Mojo::DOM58->new($html);

my $data;
for my $field( @fields ) {
    $data->{$field} = $dom->at(qq{input[name*="txt_$field"]})->attr('value') // "";
}

say Dumper $data;

印刷:

$VAR1 = {
          'TeamName' => 'Ruidoso',
          'Zip' => '',
          'State' => 'NM',
          'City' => '',
          'District' => '1',
          'Phone2' => '',
          'Area' => '1',
          'SchoolName' => '',
          'Address2' => '',
          'Website' => '',
          'Address1' => '',
          'Phone' => '',
          'Fax' => '',
          'TeamNumber' => '83'
        };
于 2017-04-03T21:00:12.417 回答
1

简而言之,其中一些'name'可能只是在(某些)HTML 文件中找不到。所以首先测试它是否在那里,然后写信$val或打印关于它没有被发现的消息。

最明显的需要改进的是:不需要单独的功能。您可以在一次调用中搜索并找到所有这些,并将它们存储在name => value返回的 hash 中。

sub getNames {
    my ($file) = @_;
    my $tree = HTML::TreeBuilder->new_from_content( path($file) );
    my @nodes = $tree->look_down(_tag => 'input');

    # List phrases to find, and build hash with their derived names
    # Should probably be defined globally, once for the whole program
    my @patterns  = map { '$txt_' . $_ } 
        qw(TeamName TeamNumber SchoolName Area District 
           Phone Phone2 Fax Address1 Address2 City State Zip Website);

    # Name for each pattern: everything after first _ (so after $txt_) 
    my %patt_name = map { $_ => (/[^_]+_(.*)/)[0] } @patterns;
    my %name_val;
    foreach my $node (@nodes) {
        foreach my $patt (@patterns) {
            my $name = $node->look_down('name', qr/\Q$patt/);
            if ($name) {
                $name_val{$patt_name{$patt}} = $name->attr('value') || '';
            }
        }
    }

    # Name and Email are stored differently. Fetch those now
    my ($table) = $tree->look_down(_tag => 'table', class => 'rgMasterTable');
    for my $tr ( $table->look_down(_tag => 'tr') ) { 
        next unless my @td = $tr->look_down(_tag => 'td');
        # Discard incomplete Name-Email records -- either both or none
        if (2 == grep { not ref $_ } @td) {
            @name_val{qw(Name Email)} = map { $_->as_trimmed_text } @td[0,1];
        }
        else { @name_val{qw(Name Email)} = ('', '') }
    }
    return \%name_val;
}

因为我们要求Name两者Email都作为文本存在,或者两者都被丢弃。(样本源There are no people ... 内部有divfor Name,而没有 for Email。)

得到任何东西,而不是if-else上面使用

@name_val{qw(Name Email)} = 
    map { (defined) ? $_->as_trimmed_text : '' } @td[0,1];

我们得到了上面引用的注释Name和一个空字符串Email,这个示例。

然后

# Specify spreadsheet headers in desired order and write  to file
my @headers = ('Name', 'Email', 'Team Number', 'Team Name', ...);
$worksheet->write_row(0, 0, \@headers);               # first row

# Build ancillary data structures to later sort results by this order
# each header with its index from @headers (specifies columns' order)
my %ho = map { state $idx; $_ => ++$idx } @headers;
# each name (`TeamNumber` ...) with the index of its header
my %name_order = ( Name => $ho{Name}, Email => $ho{Email}, 
    TeamNumber => $ho{'Team Number'}, TeamName => $ho{'Team Name'}, ... 
);

my $row = 1;
foreach my $file (@files) {
    next if -d $file;

    my %name_val = %{ getNames($file) };
    fill_row($worksheet, $row++, \%name_val, \%name_order);

    foreach my $name (sort keys %name_val) {  # demo
        if ($name_val{$name}) { print "$name => $name_val{$name}\n" }
        else                  { print "Not found $name in $file\n" }
    }
}

sub fill_row {
   my ($ws, $row, $rdata, $rorder) = @_;    
   my %name_val   = %$rdata;
   my %name_order = %$rorder;

   my @vals = map { $name_val{$_} } 
              sort { $name_order{$a} <=> $name_order{$b} } 
              keys %name_val;

   $ws->write_row($row, 0, \@vals);  # add check (returns 0 on success)

   return 1;
}

write_row引用一个数组并用它的元素写出一行。请注意write,当给出 arrayref 时,也可以这样使用。

链接的 HTML 文件的输出

面积 => 1
区 => 1
状态 => NM
TeamName => Ruidoso
团队编号 => 83

Not found ...其他人。该.xls文件是正确的(当使用完整的名称列表时)。


整个节目

use warnings;
use strict;
use feature qw(say state);

use Path::Tiny;
use HTML::TreeBuilder;
use Spreadsheet::WriteExcel;


my @src = qw(TeamName TeamNumber SchoolName Area District Phone Phone2 
    Fax Address1 Address2 City State Zip Website);
my @headers = ('Name', 'Email', 'Team Number', 'Team Name', 'School Name', 
    'Area', 'District', 'Ag Dept Phone', 'School Phone', 'Fax', 'Address One', 
    'Address Two', 'City', 'State', 'Zip Code', 'Web Site'
);
my @lens = map { length } @headers;  # for printing

# Numeric order of headers' fields (so, columns)
my %ho = map { state $idx; $_ => ++$idx } @headers;
# Translation: name from HTML source => column number (retrieved from %ho)
my %name_order = ( 
    Name => $ho{Name}, Email => $ho{Email}, 
    TeamNumber => $ho{'Team Number'},
    TeamName => $ho{'Team Name'}, 
    SchoolName => $ho{'School Name'}, Area => $ho{'Area'},
    District => $ho{'District'}, Phone2 => $ho{'Ag Dept Phone'},
    Phone => $ho{'School Phone'}, Fax => $ho{'Fax'}, 
    Address1 => $ho{'Address One'},
    Address2 => $ho{'Address Two'}, 
    City => $ho{'City'}, State => $ho{'State'}, 'Zip' => $ho{'Zip Code'},
    Website => $ho{'Web Site'}
);

say "Order (column) of names from HTML source to follow headers:";
printf("%-10s  ==>  %s\n", $_, $name_order{$_})
    for sort { $name_order{$a} <=> $name_order{$b} } keys %name_order;
say '';

my $workbook  = Spreadsheet::WriteExcel->new('data.xls');
my $worksheet = $workbook->add_worksheet();

# Print headers to .xls file (and to screen)
$worksheet->write_row(0, 0, \@headers);
say "Spreadsheet, header and rows:";
prn_row(\@headers);  # print to screen

my @files = ('fetch_names.html');
my $row = 1;
foreach my $file (@files) {
    next if -d $file;

    # Parse the file, print the row to spreadsheet
    my %name_val = %{ getNames($file) };
    fill_row($worksheet, $row++, \%name_val, \%name_order);
}


# Functions

sub fill_row {
    my ($ws, $row, $rdata, $rorder) = @_;

    my %name_val = %$rdata;
    my $name_order = %$rorder;

    my @vals =
        map { $name_val{$_} }
        sort { $name_order{$a} <=> $name_order{$b} }
        grep { exists $name_order{$_} }
        keys %name_val;

    prn_row(\@vals);  # print to screen

    $worksheet->write_row($row, 0, \@vals);  # test this (returns 0 on success)

    return 1;
}

sub prn_row {
    my @ary = @{ $_[0] };
    for (0..$#ary) {
        my $len = $lens[$_];
        printf("%${len}s  ", $ary[$_]);
    }
    say '';
}

sub getNames {
    my ($file) = @_;
    my $tree = HTML::TreeBuilder->new_from_content( path($file)->slurp );
    my @nodes = $tree->look_down(_tag => 'input');

    my @patterns = map { '$txt_' . $_ } @src;
    # List phrases to find, and build hash with their derived names
    # Name for each pattern: everything first _ (so after \$txt_)
    my %patt_name = map { $_ => (/[^_]+_(.*)/)[0] } @patterns;

    my %name_val;
    foreach my $node (@nodes) {
        foreach my $patt (@patterns) {
            my $name = $node->look_down('name', qr/\Q$patt/) or next;
            $name_val{$patt_name{$patt}} = $name->attr('value') // '';
        }
    }

    # Name and Email are stored differently, fetch those now
    my ($table) = $tree->look_down(_tag => 'table', class => 'rgMasterTable');
    for my $tr ( $table->look_down(_tag => 'tr') ) {
        next unless my @td = $tr->look_down(_tag => 'td');
        # Discard incomplete Name-Email records -- either both or none
        if (2 == grep { not ref } @td) {
            @name_val{qw(Name Email)} = map { $_->as_trimmed_text } @td[0,1];
        }
        else { @name_val{qw(Name Email)} = ('', '') }
    }
    return \%name_val;
}

它作为一个完整的程序与提供的示例 HTML 源一起工作。


另外:一个实际的页面可能有多个名称-电子邮件对

use LWP::Simple qw(get);

sub getNames {
    my ($file) = @_; 
    my $url = 'https://www.judgingcard.com/Directory/Directory.aspx?ID=1643';
    my $page = get($url) or die "Can't get the page $url: $!";
    my $tree = HTML::TreeBuilder->new_from_content( $page );

    my @nodes = $tree->look_down(_tag => 'input');

    my @patterns = map { '$txt_' . $_ } @src;
    # List phrases to find, and build hash with their derived names
    # Name for each pattern: everything first _ (so after \$txt_)
    my %patt_name = map { $_ => (/[^_]+_(.*)/)[0] } @patterns;

    my %name_val;
    foreach my $node (@nodes) {
        foreach my $patt (@patterns) {
            my $name = $node->look_down('name', qr/\Q$patt/) or next;
            $name_val{$patt_name{$patt}} = $name->attr('value') // ''; 
        }   
    }   

    # Name and Email are stored differently, fetch those now
    my %name_email;
    my ($table) = $tree->look_down(_tag => 'table', class => 'rgMasterTable');
    for my $tr ( $table->look_down(_tag => 'tr') ) { 
        next unless my @td = $tr->look_down(_tag => 'td');
        # There may be more than one Name-Email pair
        # so enter key-value pair explicitely
        if (2 <= grep { ref } @td) {
            $name_email{$td[0]->as_trimmed_text} = $td[1]->as_trimmed_text;
        }
        else { %name_email = ('', '') }
    }
    return \%name_val, \%name_email;
}

然后主要你需要

foreach my $file (@files) {
    next if -d $file;

    # Parse the file, unpack name-value and name-email hashes
    my ($rname_val, $rname_email) = getNames($file);
    my %name_val   = %$rname_val;
    my %name_email = %$rname_email;

    # Print a row for each Name-Email, adding them to %name_val
    foreach my $name (keys %name_email) {
        $name_val{Name}  = $name;
        $name_val{Email} = $name_email{$name};
        fill_row($worksheet, $row++, \%name_val, \%name_order);
    }
}

具有多个名称-电子邮件对的所需格式是:相同的标题,并且对于每一对,将单独的行打印到文件中,其中除名称-电子邮件之外的所有信息都是相同的。

打印的电子表格(使用的 URL 在评论中提供)

打印的电子表格。 评论中提供了 URL

于 2017-04-03T20:36:41.143 回答