我使用 perl 脚本从另一台服务器下载 CSV 文件。下载后我想检查文件是否包含任何损坏的数据。我尝试使用 Encode::Detect::Detector 来检测编码,但它在两种情况下都返回“undef”:
- 如果字符串是 ASCII 或
- 如果字符串损坏
所以使用下面的程序我无法区分 ASCII 和损坏的数据。
use strict;
use Text::CSV;
use Encode::Detect::Detector;
use XML::Simple;
use Encode;
require Encode::Detect;
my @rows;
my $init_file = "new-data-jp-2013-8-8.csv";
my $csv = Text::CSV->new ( { binary => 1 } )
or die "Cannot use CSV: ".Text::CSV->error_diag ();
open my $fh, $init_file or die $init_file.": $!";
while ( my $row = $csv->getline( $fh ) ) {
my @fields = @$row; # get line into array
for (my $i=1; $i<=23; $i++){ # I already know that CSV file has 23 columns
if ((Encode::Detect::Detector::detect($fields[$i-1])) eq undef){
print "the encoding is undef in col".$i.
" where field is ".$fields[$i-1].
" and its length is ".length($fields[$i-1])." \n";
}
else {
my $string = decode("Detect", $fields[$i-1]);
print "this is string print ".$string.
" the encoding is ".Encode::Detect::Detector::detect($fields[$i-1]).
" and its length is ".length($fields[$i-1])."\n";
}
}
}