这是分解成可消化部分的相同正则表达式。除了更具可读性之外,一些子正则表达式本身也很有用。更改允许的分隔符也容易得多。
#!/usr/local/ActivePerl-5.10/bin/perl
use 5.010; #only 5.10 and above
use strict;
use warnings;
my $sep = qr{ [/.-] }x; #allowed separators
my $any_century = qr/ 1[6-9] | [2-9][0-9] /x; #match the century
my $any_decade = qr/ [0-9]{2} /x; #match any decade or 2 digit year
my $any_year = qr/ $any_century? $any_decade /x; #match a 2 or 4 digit year
#match the 1st through 28th for any month of any year
my $start_of_month = qr/
(?: #match
0?[1-9] | #Jan - Sep or
1[0-2] #Oct - Dec
)
($sep) #the separator
(?:
0?[1-9] | # 1st - 9th or
1[0-9] | #10th - 19th or
2[0-8] #20th - 28th
)
\g{-1} #and the separator again
/x;
#match 28th - 31st for any month but Feb for any year
my $end_of_month = qr/
(?:
(?: 0?[13578] | 1[02] ) #match Jan, Mar, May, Jul, Aug, Oct, Dec
($sep) #the separator
31 #the 31st
\g{-1} #and the separator again
| #or
(?: 0?[13-9] | 1[0-2] ) #match all months but Feb
($sep) #the separator
(?:29|30) #the 29th or the 30th
\g{-1} #and the separator again
)
/x;
#match any non-leap year date and the first part of Feb in leap years
my $non_leap_year = qr/ (?: $start_of_month | $end_of_month ) $any_year/x;
#match 29th of Feb in leap years
#BUG: 00 is treated as a non leap year
#even though 2000, 2400, etc are leap years
my $feb_in_leap = qr/
0?2 #match Feb
($sep) #the separtor
29 #the 29th
\g{-1} #the separator again
(?:
$any_century? #any century
(?: #and decades divisible by 4 but not 100
0[48] |
[2468][048] |
[13579][26]
)
|
(?: #or match centuries that are divisible by 4
16 |
[2468][048] |
[3579][26]
)
00
)
/x;
my $any_date = qr/$non_leap_year|$feb_in_leap/;
my $only_date = qr/^$any_date$/;
say "test against garbage";
for my $date (qw(022900 foo 1/1/1)) {
say "\t$date ", $date ~~ $only_date ? "matched" : "didn't match";
}
say '';
#comprehensive test
my @code = qw/good unmatch month day year leap/;
for my $sep (qw( / - . )) {
say "testing $sep";
my $i = 0;
for my $y ("00" .. "99", 1600 .. 9999) {
say "\t", int $i/8500*100, "% done" if $i++ and not $i % 850;
for my $m ("00" .. "09", 0 .. 13) {
for my $d ("00" .. "09", 1 .. 31) {
my $date = join $sep, $m, $d, $y;
my $re = $date ~~ $only_date || 0;
my $code = not_valid($date);
unless ($re == !$code) {
die "error $date re $re code $code[$code]\n"
}
}
}
}
}
sub not_valid {
state $end = [undef, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31];
my $date = shift;
my ($m,$d,$y) = $date =~ m{([0-9]+)[-./]([0-9]+)[-./]([0-9]+)};
return 1 unless defined $m; #if $m is set, the rest will be too
#components are in roughly the right ranges
return 2 unless $m >= 1 and $m <= 12;
return 3 unless $d >= 1 and $d <= $end->[$m];
return 4 unless ($y >= 0 and $y <= 99) or ($y >= 1600 and $y <= 9999);
#handle the non leap year case
return 5 if $m == 2 and $d == 29 and not leap_year($y);
return 0;
}
sub leap_year {
my $y = shift;
$y = "19$y" if $y < 1600;
return 1 if 0 == $y % 4 and 0 != $y % 100 or 0 == $y % 400;
return 0;
}