我试图阻止英文文本,我阅读了很多论坛,但我看不到一个明确的例子。我正在使用搬运工词干分析器,就像使用 Text::ENglish 一样。这是我走了多远:
use Lingua::StopWords qw(getStopWords);
my $stopwords = getStopWords('en');
use Text::English;
@stopwords = grep { $stopwords->{$_} } (keys %$stopwords);
chdir("c:/Test Facility/input");
@files = <*>;
foreach $file (@files)
{
open (input, $file);
while (<input>)
{
open (output,">>c:/Test Facility/normalized/".$file);
chomp;
for my $w (@stopwords)
{
s/\b\Q$w\E\b//ig;
}
$_ =~s/<[^>]*>//g;
$_ =~ s/[[:punct:]]//g;
##What should I write here to apply porter stemming using Text::English##
print output "$_\n";
}
}
close (input);
close (output);