我知道这是一个常见错误,我已经检查了其他帖子,但它并没有解决我的问题。我想使用与我使用相同的方式用于SortMeRNA
rule( ) 的数据库的名称。但很显然,我做不到。rRNAdb=config["rRNA_database"]
version=config["genome_version"]
SyntaxError:
Not all output, log and benchmark files of rule SortMeRNA contain the same wildcards. This is crucial though, in order to avoid that two or more jobs write to the same file
......
import glob
import os
configfile : "config.json"
#################### GLOBAL VARIABLES #######################
OUTDIR = os.path.abspath(config["outdir"])
# ID NGS
idNGS = config["idNGS"]
# Fichiers FASTQ
DIR_FASTQ = config["dir_fastq"]
## PARAMETERS TRIMMOMATIC
w = config["w"]
Q = config["Q"]
m = config["m"]
## Bowtie2 database
BWT2_DB = config["BWT2_DB"]
## Templates multiQC
DIR_TPL = config["DIR_TPL"]
#### VERSIONS genomes and database ####
version = config["genome_version"]
rRNAdb = config["rRNA_database"]
####################### FASTQ FILES #########################
def list_samples(DIR_FASTQ): # create list with all sample names from fastq directory
SAMPLES=[]
for file in glob.glob(DIR_FASTQ+"/*.fastq.gz"):
base=os.path.basename(file)
sample=(base.replace('.fastq.gz', ''))
SAMPLES.append(sample)
return(SAMPLES)
SAMPLES = list_samples(DIR_FASTQ)
########################## RULES ############################
rule all:
input:
OUTDIR+"/multiQC/multiQC-PL09_"+idNGS+".html",
OUTDIR+"/multiQC/multiQC-PL21_"+idNGS+".html"
rule fastQC:
input:
DIR_FASTQ+"/{sample}.fastq.gz"
output:
"{OUTDIR}/FastQC/{sample}_fastqc.zip",
"{OUTDIR}/FastQC/{sample}_fastqc.html"
threads:
16
shell:
"""
fastqc {input} -o {OUTDIR}/FastQC/
"""
rule trimmomatic:
input:
DIR_FASTQ+"/{sample}.fastq.gz"
output:
trim_out="{OUTDIR}/Trimmomatic/{sample}_SL-{w}-{Q}_Min-{m}.fastq.gz",
trim1_out="{OUTDIR}/Trimmomatic/{sample}_SL-{w}-{Q}_Min-{m}.stdout",
trim1_err="{OUTDIR}/Trimmomatic/{sample}_SL-{w}-{Q}_Min-{m}.stderr"
threads:
16
shell:
"trimmomatic SE -phred33 {input} {output.trim_out} SLIDINGWINDOW:{w}:{Q} MINLEN:{m} > {output.trim1_out} 2> {output.trim1_err}"
rule Bowtie2:
input:
fasta_trim="{OUTDIR}/Trimmomatic/{sample}_SL-"+str(w)+"-"+str(Q)+"_Min-"+str(m)+".fastq.gz"
output:
BWT2_ERR="{OUTDIR}/Bowtie2/{sample}_SL-{w}-{Q}_Min-{m}_bowtie2_{version}.stderr",
BWT2_SAM=temp("{OUTDIR}/Bowtie2/{sample}_SL-{w}-{Q}_Min-{m}_bowtie2_{version}.sam"),
BWT2_BAM=temp("{OUTDIR}/Bowtie2/{sample}_SL-{w}-{Q}_Min-{m}_bowtie2_{version}.bam"),
BWT2_BAM_SORTED="{OUTDIR}/Bowtie2/{sample}_SL-{w}-{Q}_Min-{m}_bowtie2_{version}.sorted.bam"
threads:
16
shell:
"""
bowtie2 -x {BWT2_DB} -U {input} 2> {output.BWT2_ERR} > {output.BWT2_SAM}
samtools view -bS -o {output.BWT2_BAM} {output.BWT2_SAM}
samtools sort {output.BWT2_BAM} -o {output.BWT2_BAM_SORTED}
samtools index {output.BWT2_BAM_SORTED}
"""
rule copy_to_pigz: # we just copy fastq files to gunzip them and use them in sortMeRNA, this way we don't touch the original ones
input:
DIR_FASTQ+"/{sample}.fastq.gz"
output:
"{OUTDIR}/temp/{sample}.fastq.gz" # this temp directory will be deleted at the end of the pipeline in multiQC rule
shell:
"cp {input} {output}"
rule SortMeRNA:
input:
"OUTDIR/temp/{sample}.fastq.gz"
output:
fasta_pigz=temp("{OUTDIR}/temp/{sample}.fastq"),
blast="{OUTDIR}/SortMeRNA/{sample}.fastq_SortMeRNA_{rRNAdb}.blast",
log="{OUTDIR}/SortMeRNA/{sample}.fastq_SortMeRNA_{rRNAdb}.log"
params:
prefixSortMeRNA="{OUTDIR}/SortMeRNA/{sample}.fastq_SortMeRNA_{rRNAdb}",
sortmerna_DB=config["SORTMERNA_DB"]
shell:
"""
pigz -d -k {input}
sortmerna --ref {params.sortmerna_DB} --reads {output.fasta_pigz} --aligned {params.prefixSortMeRNA} --blast '1' --log TRUE
"""
rule multiQC:
input:
expand(OUTDIR+"/FastQC/{sample}_fastqc.zip", sample=SAMPLES),
expand(OUTDIR+"/Trimmomatic/{sample}_SL-"+str(w)+"-"+str(Q)+"_Min-"+str(m)+".fastq.gz", sample=SAMPLES),
expand(OUTDIR+"/Trimmomatic/{sample}_SL-"+str(w)+"-"+str(Q)+"_Min-"+str(m)+".stdout", sample=SAMPLES),
expand(OUTDIR+"/Trimmomatic/{sample}_SL-"+str(w)+"-"+str(Q)+"_Min-"+str(m)+".stderr", sample=SAMPLES),
expand(OUTDIR+"/Bowtie2/{sample}_SL-"+str(w)+"-"+str(Q)+"_Min-"+str(m)+"_bowtie2_"+version+".stderr", sample=SAMPLES),
expand(OUTDIR+"/SortMeRNA/{sample}.fastq_SortMeRNA_"+rRNAdb+".blast", sample=SAMPLES),
expand(OUTDIR+"/SortMeRNA/{sample}.fastq_SortMeRNA_"+rRNAdb+".log", sample=SAMPLES),
tpl_plNGS=DIR_TPL+"/template-PL-NGS_multiqc_config.yaml",
tpl_plBioinfo=DIR_TPL+"/template-PL-Bioinfo_multiqc_config.yaml"
output:
rap_plNGS="{OUTDIR}/multiQC/multiQC-PL09_{idNGS}.html",
rap_plBioinfo="{OUTDIR}/multiQC/multiQC-PL21_{idNGS}.html"
shell:
"""
sed -e "s/ID_NGS_HERE/{idNGS}/g" < {input.tpl_plNGS} > {OUTDIR}/multiQC/PL-NGS_multiqc_config.yaml
sed -e "s/ID_NGS_HERE/{idNGS}/g" < {input.tpl_plBioinfo} > {OUTDIR}/multiQC/PL-Bioinfo_multiqc_config.yaml
multiqc -c {OUTDIR}/multiQC/PL-NGS_multiqc_config.yaml -n {output.rap_plNGS} {OUTDIR}/
multiqc -c {OUTDIR}/multiQC/PL-Bioinfo_multiqc_config.yaml -n {output.rap_plBioinfo} {OUTDIR}/
rm -r {OUTDIR}/temp
"""
onsuccess:
shell("date '+%d/%m/%Y %H:%M:%S' > time_end_RNAseq.txt")
是因为我在规则中使用它params
而不是在这条规则中吗?因为在规则中,在规则中使用它似乎不是问题,而不是在......output
SortMeRNA
inputs
version
Bowtie
outputs
input