这就是我经常使用XML::Twig进行的处理。
该wrap_children
方法就是为此而设计的:它允许您定义一个类似正则表达式的表达式,该表达式将被包装在一个元素中。有关更多信息,请参见下面的示例和文档:
#!/usr/bin/perl
use strict;
use warnings;
use Test::More tests => 1;
use XML::Twig;
# reads the DATA section, the input doc first, then the expected result
my( $in, $expected)= do{ local $/="\n\n"; <DATA>};
my $t=XML::Twig->new->parse( $in);
my $root= $t->root;
# that's where the wrapping occurs, form inside out
$root->wrap_children( '<h3><body>', topic => { level => 3 });
$root->wrap_children( '<h2><body><topic level="3">*', topic => { level => 2 });
$root->wrap_children( '<h1><body><topic level="2">*', topic => { level => 1 });
# now we cleanup: the levels are not used any more
foreach my $to ($t->descendants( 'topic'))
{ $to->del_att( 'level'); }
# the wrapping will have generated tons of additional id's,
# you may not need this if your elements had id's before the wrapping
foreach my $to ($t->descendants( 'topic|body|h1|h2|h3'))
{ $to->del_att( 'id'); }
# now we can deal with titles
foreach my $h ($t->descendants( 'h1|h2|h3')) { $h->set_tag( 'title'); }
# how did we do?
is( $t->sprint( pretty_print => 'indented'), $expected, 'just one test');
__DATA__
<doc>
<h1> Head 1 </h1>
<body></body>
<h2> Sub Head 1 </h2>
<body></body>
<h3> SubSub Head 1 </h3>
<body></body>
<h2> Sub Head 2 </h2>
<body></body>
<h1> Head 2 </h1>
<body></body>
</doc>
<doc>
<topic>
<title> Head 1 </title>
<body></body>
<topic>
<title> Sub Head 1 </title>
<body></body>
<topic>
<title> SubSub Head 1 </title>
<body></body>
</topic>
</topic>
<topic>
<title> Sub Head 2 </title>
<body></body>
</topic>
</topic>
<topic>
<title> Head 2 </title>
<body></body>
</topic>
</doc>