这段代码可以吗?我真的不知道我应该使用哪种规范化形式(我唯一注意到的是NFD
我得到了错误的输出)。
#!/usr/local/bin/perl
use warnings;
use 5.014;
use utf8;
binmode STDOUT, ':encoding(utf-8)';
use Unicode::Normalize;
use Unicode::Collate::Locale;
use Unicode::GCString;
my $text = "my taxt täxt";
my %hash;
while ( $text =~ m/(\p{Alphabetic}+(?:'\p{Alphabetic}+)?)/g ) { #'
my $word = $1;
my $NFC_word = NFC( $word );
$hash{$NFC_word}++;
}
my $collator = Unicode::Collate::Locale->new( locale => 'DE' );
for my $word ( $collator->sort( keys %hash ) ) {
my $gcword = Unicode::GCString->new( $word );
printf "%-10.10s : %5d\n", $gcword, $hash{$word};
}