我在 bash 中编写了一个运行 blastx 的脚本。
#!/bin/sh -l
# $Id: Barrine.sh federicogaiti $
echo "Right now it is:"
date
echo ""
function usage() {
echo "Barrine.sh script by Federico Gaiti, March 2013."
echo ""
echo "Run Blastx"
echo ""
echo "Usage: "
echo "Barrine.sh <All contigs (fasta extension)> <Number of fasta split sequences> <Head file with PBS command where you set up account, walltime, etc..> "
echo ""
echo "Example: "
echo "Barrine.sh InputFasta n Head "
echo ""
exit 1
}
# Testing if the number of arguments is correct
if [ $# != 3 ]
then
usage
exit
fi
### Declaring variables
InputFasta=$1 MY ORIGINAL FASTA FILE WITH ALL THE SEQUENCES
n=$2 NUMBER OF FILES I WANT MY FASTA FILE SPLIT
Head=$3 PBS OPTIONS
n4=$((n/4))
n41=$((n/4 + 1))
n2=$((n/2))
n21=$((n/2 + 1))
n43=$((n/4 * 3))
n431=$((n/4*3 + 1))
echo Loading Modules
module load ucsc_utilities/20130122
wait
echo Split input FASTA in n FASTA file
faSplit sequence ${InputFasta} ${n} ${InputFasta}_Split_S
wait
echo blastx command splitting it in 4 jobs to make it faster
for i in {000..0$n4} ; do echo "blastx -query ${InputFasta}_Split_S$i.fa -evalue 0.0001 -max_target_seqs 100 -db nr -num_threads 8 -outfmt '7 qseqid qlen sseqid pident length mismatch gapopen qstart qend sstart send ppos evalue bitscore score' -out ${InputFasta}_blastx$i.csv" ; done > ${InputFasta}_Job1.sh
for i in {0$n41..$n2} ; do echo "blastx -query ${InputFasta}_Split_S$i.fa -evalue 0.0001 -max_target_seqs 100 -db nr -num_threads 8 -outfmt '7 qseqid qlen sseqid pident length mismatch gapopen qstart qend sstart send ppos evalue bitscore score' -out ${InputFasta}_blastx$i.csv" ; done > ${InputFasta}_Job2.sh
for i in {$n21..$n43} ; do echo "blastx -query ${InputFasta}_Split_S$i.fa -evalue 0.0001 -max_target_seqs 100 -db nr -num_threads 8 -outfmt '7 qseqid qlen sseqid pident length mismatch gapopen qstart qend sstart send ppos evalue bitscore score' -out ${InputFasta}_blastx$i.csv" ; done > ${InputFasta}_Job3.sh
for i in {$n431..$n} ; do echo "blastx -query ${InputFasta}_Split_S$i.fa -evalue 0.0001 -max_target_seqs 100 -db nr -num_threads 8 -outfmt '7 qseqid qlen sseqid pident length mismatch gapopen qstart qend sstart send ppos evalue bitscore score' -out ${InputFasta}_blastx$i.csv" ; done > ${InputFasta}_Job4.sh
wait
echo Head the PBS commands to the Job files
for i in {1..4}
do
cat ${Head} ${InputFasta}_Job$i.sh | sed "s/BlastJob/BlastJob_$i/g" > ${InputFasta}_BlastJob$i.sh
done
然后我会将 4 个作业提交到 barrine 服务器。我应该获得的是包含以下内容的 4 个工作:
blastx -query TEST_Split_S000.fa -evalue 0.0001 -max_target_seqs 100 -db nr -num_threads 8 -outfmt '7 qseqid qlen sseqid pident length mismatch gapopen qstart qend sstart send ppos evalue bitscore score' -out TEST_blastx000.csv
blastx -query TEST_Split_S001.fa -evalue 0.0001 -max_target_seqs 100 -db nr -num_threads 8 -outfmt '7 qseqid qlen sseqid pident length mismatch gapopen qstart qend sstart send ppos evalue bitscore score' -out TEST_blastx001.csv
blastx -query TEST_Split_S002.fa -evalue 0.0001 -max_target_seqs 100 -db nr -num_threads 8 -outfmt '7 qseqid qlen sseqid pident length mismatch gapopen qstart qend sstart send ppos evalue bitscore score' -out TEST_blastx002.csv
blastx -query TEST_Split_S003.fa -evalue 0.0001 -max_target_seqs 100 -db nr -num_threads 8 -outfmt '7 qseqid qlen sseqid pident length mismatch gapopen qstart qend sstart send ppos evalue bitscore score' -out TEST_blastx003.csv
blastx -query TEST_Split_S004.fa -evalue 0.0001 -max_target_seqs 100 -db nr -num_threads 8 -outfmt '7 qseqid qlen sseqid pident length mismatch gapopen qstart qend sstart send ppos evalue bitscore score' -out TEST_blastx004.csv
blastx -query TEST_Split_S005.fa -evalue 0.0001 -max_target_seqs 100 -db nr -num_threads 8 -outfmt '7 qseqid qlen sseqid pident length mismatch gapopen qstart qend sstart send ppos evalue bitscore score' -out TEST_blastx005.csv
blastx -query TEST_Split_S006.fa -evalue 0.0001 -max_target_seqs 100 -db nr -num_threads 8 -outfmt '7 qseqid qlen sseqid pident length mismatch gapopen qstart qend sstart send ppos evalue bitscore score' -out TEST_blastx006.csv
blastx -query TEST_Split_S007.fa -evalue 0.0001 -max_target_seqs 100 -db nr -num_threads 8 -outfmt '7 qseqid qlen sseqid pident length mismatch gapopen qstart qend sstart send ppos evalue bitscore score' -out TEST_blastx007.csv
............
blastx -query TEST_Split_S050.fa -evalue 0.0001 -max_target_seqs 100 -db nr -num_threads 8 -outfmt '7 qseqid qlen sseqid pident length mismatch gapopen qstart qend sstart send ppos evalue bitscore score' -out TEST_blastx050.csv
但我在脚本中得到的只是:
blastx -query TEST_Split_S{000..050}.fa -evalue 0.0001 -max_target_seqs 100 -db nr -num_threads 8 -outfmt '7 qseqid qlen sseqid pident length mismatch gapopen qstart qend sstart send ppos evalue bitscore score' -out TEST_blastx{000..050}.csv
有人可以帮我解决问题吗?如果我在我的脚本之外使用 for 循环可以正常工作。
感谢帮助