如果我是你,那么我会从这三个文件构建一个 sqlite 数据库,然后使用 sql 来检索结果会容易得多。
我不知道它会有多快,但我认为它比并行读取三个文件要强大得多。SQlite 可以处理这么多的数据。
http://perlmaven.com/simple-database-access-using-perl-dbi-and-sql
用于大型数据集的 SQLite?
#!/usr/bin/perl
use strict;
use warnings;
use DBI;
my $dbfile = "sample.db";
my $dsn = "dbi:SQLite:dbname=$dbfile";
my $user = "";
my $password = "";
my $dbh = DBI->connect($dsn, $user, $password, {
PrintError => 1,
RaiseError => 1,
FetchHashKeyName => 'NAME_lc',
AutoCommit => 0,
});
$dbh->do('PRAGMA synchronous = OFF');
my $sql = <<'END_SQL';
CREATE TABLE t1 (
id INTEGER PRIMARY KEY,
c1 VARCHAR(100),
c2 VARCHAR(100),
c3 VARCHAR(100),
c4 VARCHAR(100),
)
END_SQL
$dbh->do($sql);
my $sql = <<'END_SQL';
CREATE TABLE t2 (
id INTEGER PRIMARY KEY,
c1 VARCHAR(100),
c2 VARCHAR(100),
c3 VARCHAR(100),
c4 VARCHAR(100),
)
END_SQL
$dbh->do($sql);
my $sql = <<'END_SQL';
CREATE TABLE t3 (
id INTEGER PRIMARY KEY,
c1 VARCHAR(100),
c2 VARCHAR(100),
c3 VARCHAR(100),
c4 VARCHAR(100),
)
END_SQL
$dbh->do($sql);
### populate data
open my $fh, $ARGV[0] or die "Can't open $ARGV[0] for reading: $!";
while( my $line = <$fh> ){
my @cols = split('\|', $line);
$dbh->do('INSERT INTO t1 (id, c1, c2, c3, c4) VALUES (?, ?, ?)',undef,$col[0],$col[1],$col[2],$col[3]);
}
close($fh);
$dbh->commit();
open my $fh, $ARGV[1] or die "Can't open $ARGV[1] for reading: $!";
while( my $line = <$fh> ){
my @cols = split('\|', $line);
$dbh->do('INSERT INTO t2 (id, c1, c2, c3, c4) VALUES (?, ?, ?)',undef,$col[0],$col[1],$col[2],$col[3]);
}
close($fh);
$dbh->commit();
open my $fh, $ARGV[2] or die "Can't open $ARGV[2] for reading: $!";
while( my $line = <$fh> ){
my @cols = split('\|', $line);
$dbh->do('INSERT INTO t3 (id, c1, c2, c3, c4) VALUES (?, ?, ?)',undef,$col[0],$col[1],$col[2],$col[3]);
}
close($fh);
$dbh->commit();
### process data
my $sql = 'SELECT t1.c1, t1.c2, t1.c3, t2.c2, t2.c3, t3.c2 FROM t1,t2,t3 WHERE t1.c1=t2.c1 AND t1.c1=t3.c1 ORDER BY t1.c1';
my $sth = $dbh->prepare($sql);
$sth->execute(1, 10);
while (my @row = $sth->fetchrow_array) {
print join("\t",@row)."\n";
}
$dbh->disconnect;
#unlink($dbfile);