好的,我知道我不应该只做你的作业,但是这个有一些有趣的技巧,所以我玩了一下。从中学习,而不仅仅是复制。我没有很好地评论,所以如果你有什么不明白的地方,请提问。这有一些小魔法,如果你没有在课堂上讲过,你的教授会知道的,所以一定要明白。
#!/usr/bin/env perl
use strict;
use warnings;
use Getopt::Long;
my ($enzyme_file, $dna_file);
my $write_output = 0;
my $verbose = 0;
my $help = 0;
GetOptions(
'enzyme=s' => \$enzyme_file,
'dna=s' => \$dna_file,
'output' => \$write_output,
'verbose' => \$verbose,
'help' => \$help
);
$help = 1 unless ($dna_file && $enzyme_file);
help() if $help; # exits
# 'Main'
my $dna = getDNA($dna_file);
my %enzymes = %{ getEnzymes($enzyme_file) }; # A function cannot return a hash, so return a hashref and then store the referenced hash
foreach my $enzyme (keys %enzymes) {
print "Applying enzyme " . $enzyme . " gives:\n";
my $dna_holder = $dna;
my ($precut, $postcut) = ($enzymes{$enzyme}{'precut'}, $enzymes{$enzyme}{'postcut'});
my $R = qr/[GA]/;
my $B = qr/[CGT]/;
$precut =~ s/R/${R}/g;
$precut =~ s/B/${B}/g;
$postcut =~ s/R/${R}/g;
$postcut =~ s/B/${B}/g;
print "\tPre-Cut pattern: " . $precut . "\n" if $verbose;
print "\tPost-Cut pattern: " . $postcut . "\n" if $verbose;
#while(1){
# if ($dna_holder =~ s/(.*${precut})(${postcut}.*)/$1/ ) {
# print "\tFound section:" . $2 . "\n" if $verbose;
# print "\tRemaining DNA: " . $1 . "\n" if $verbose;
# unshift @{ $enzymes{$enzyme}{'cut_dna'} }, $2;
# } else {
# unshift @{ $enzymes{$enzyme}{'cut_dna'} }, $dna_holder;
# print "\tNo more cuts.\n" if $verbose;
# print "\t" . $_ . "\n" for @{ $enzymes{$enzyme}{'cut_dna'} };
# last;
# }
#}
unless ($dna_holder =~ s/(${precut})(${postcut})/$1'$2/g) {
print "\tHas no effect on given strand\n" if $verbose;
}
@{ $enzymes{$enzyme}{'cut_dna'} } = split(/'/, $dna_holder);
print "\t$_\n" for @{ $enzymes{$enzyme}{'cut_dna'} };
writeOutput($dna_file, $enzyme, $enzymes{$enzyme}{'cut_dna'}) if $write_output; #Note that $enzymes{$enzyme}{'cut_dna'} is an arrayref already
print "\n";
}
sub getDNA {
my ($dna_file) = @_;
open(my $dna_handle, '<', $dna_file) or die "Cannot open file $dna_file";
my @dna_array = <$dna_handle>;
chomp(@dna_array);
my $dna = join('', @dna_array);
print "Using DNA:\n" . $dna . "\n\n" if $verbose;
return $dna;
}
sub getEnzymes {
my ($enzyme_file) = @_;
my %enzymes;
open(my $enzyme_handle, '<', $enzyme_file) or die "Cannot open file $enzyme_file";
while(<$enzyme_handle>) {
chomp;
if(m{([^/]*)/([^']*)'([^/]*)//}) {
print "Found Enzyme " . $1 . ":\n\tPre-cut: " . $2 . "\n\tPost-cut: " . $3 . "\n" if $verbose;
$enzymes{$1} = {
precut => $2,
postcut => $3,
cut_dna => [] #Added to show the empty array that will hold the cut DNA sections
};
}
}
print "\n" if $verbose;
return \%enzymes;
}
sub writeOutput {
my ($dna_file, $enzyme, $cut_dna_ref) = @_;
my $outfile = $dna_file . '_' . $enzyme;
print "\tSaving data to $outfile\n" if $verbose;
open(my $outfile_handle, '>', $outfile) or die "Cannot open $outfile for writing";
print $outfile_handle $_ . "\n" for @{ $cut_dna_ref };
}
sub help {
my $filename = (split('/', $0))[-1];
my $enzyme_text = <<'END';
AatI/AGG'CCT//
AatII/GACGT'C//
AbsI/CC'TCGAGG//
TryII/RRR'TTT//
Test/AAA'TTT//
END
my $dna_text = <<'END';
CCCCCCGGGTTTCCCCCCC
CCCCCAAATTTCCCCCCCCCCCCAGATTTC
CCCCCCCCCGAGTTTCCCCC
END
print <<END;
Usage:
$filename --enzyme (-e) <enzyme-filename> --dna (-d) <dna-filename> [options] (files may come in either order)
$filename -h (shows this help)
Options:
--verbose (-v) print additional (debugging) information
--output (-o) output final data to files
Files:
The DNA file contains one DNA string which may be broken over many lines. E.G.:
$dna_text
The enzymes file constains enzyme definitions, one per line. E.G.:
$enzyme_text
END
exit;
}
编辑:明确添加了 cut_dna 初始化,因为这是每种酶的最终结果持有者,所以我认为更清楚地看到它会很好。
编辑 2:添加了输出例程、调用、标志和相应的帮助。
编辑 3:更改了主程序以结合最佳的 canavanin 方法,同时删除循环。现在它是一个全局替换以添加临时剪切标记 ('),然后在剪切标记上拆分为数组。留下旧方法作为注释,新方法是下面的 5 行。
编辑 4:用于写入多个文件的附加测试用例。(以下)
my @names = ('cat','dog','sheep');
foreach my $name (@names) { #$name is lexical, ie dies after each loop
open(my $handle, '>', $name); #open a lexical handle for the file, also dies each loop
print $handle $name; #write to the handle
#$handles closes automatically when it "goes out of scope"
}