我使用以下 perl 代码从 SEC 网站大量下载 10-Ks。但是,我得到一个“内存不足!” 当脚本在处理一个特别大的 10-K 文件时明显卡住时,每隔几百个文件发送一条消息。任何想法如何避免这种“内存不足!” 大文件出错?
#!/usr/bin/perl
use strict;
use warnings;
use LWP;
my $ua = LWP::UserAgent->new;
open LOG , ">download_log.txt" or die $!;
######## make sure the file with the ids/urls is in the
######## same folder as the perl script
open DLIST, "downloadlist.txt" or die $!;
my @file = <DLIST>;
foreach my $line (@file) {
#next if 0.999 > rand ;
#print "Now processing file: $line\n" ;
my ($nr, $get_file) = split /,/, $line;
chomp $get_file;
$get_file = "http://www.sec.gov/Archives/" . $get_file;
if ($get_file =~ m/([0-9|-]+).txt/ ) {
my $filename = $nr . ".txt";
open OUT, ">$filename" or die $!;
print "file $nr \n";
my $response =$ua->get($get_file);
if ($response->is_success) {
print OUT $response->content;
close OUT;
} else {
print LOG "Error in $filename - $nr \n" ;
}
}
}