1

所以我在 python 中做一些生物信息学工作,利用 Biopython 和 Clustalw2 来对齐蛋白质序列。我对此相当陌生(只有几个月的经验),并且在使用 stdout 并迭代整个目录时遇到了问题。任何帮助,将不胜感激。

所以我写了这个,它一次处理一个文件并产生所需的结果......

#!/usr/bin/python

import Bio
import os
from Bio.Align.Applications import ClustalwCommandline
from Bio import Seq
from Bio import SeqIO
from Bio import AlignIO
from Bio.SeqRecord import SeqRecord


clustal_loc=r"/Users/Wes/Desktop/eggNOG_files/clustalw-2.1-macosx/clustalw2"
try:
    f_in=raw_input("Enter the filepath of the FASTA to align: ")
    f_out= raw_input("Enter the output filename: ")
    fh= open(f_in)
    fo=open(f_out,'w')
    for record in SeqIO.parse(fh,"fasta"):
            id = record.id
            seq = record.seq
            print("Name: %s, size: %s"%(id,len(seq)))
    try:
            cl = ClustalwCommandline(clustal_loc,infile=f_in, outfile=f_out, align=True, outorder="ALIGNED", convert=True, output="pir")
            assert os.path.isfile(clustal_loc), "Clustal W not found"
            stdout, stderr = cl()
            print cl
    except:
            print("There was a problem aligning. Check ClustalW path and .fasta input.")


    fh.close()
    fo.close()


except:
        print("Could not parse. Check to make sure filepath is correct and that file is in   FASTA format")

...这似乎工作得很好。当我尝试在整个目录上迭代它时问题就来了(比如需要对齐的 1000 多个蛋白质序列文件。我知道问题出在标准输出上,但此时我有点太业余了,不知道如何解决它. 下面是损坏的代码——</p>

/usr/bin/python

import Bio
import os
from Bio.Align.Applications import ClustalwCommandline
from Bio import Seq
from Bio import SeqIO
from Bio import AlignIO
from Bio.SeqRecord import SeqRecord
import subprocess
from subprocess import Popen
clustal_loc=r"/Users/Wes/Desktop/eggNOG_files/clustalw-2.1-macosx/clustalw2"

try:

    folder= raw_input("Enter the folder of .fasta files to iterate over and align: ")
    listing = os.listdir(folder)

    for infile in listing:
        print folder+'/'+infile
        f_in = open(folder+'/'+infile,'r')

        f_out=open(folder+'/'+infile+".pir",'w')


        for record in SeqIO.parse(f_in,"fasta"):
                id = record.id
                seq = record.seq
                print("Name: %s, size: %s"%(id,len(seq)))

        clustalw_cline= ClustalwCommandline(clustal_loc,infile=f_in, outfile=f_out, align=True, outorder="ALIGNED", convert=True, output="pir")

        assert os.path.isfile(clustal_loc), "Clustal W not found"
        saveout = sys.stdout
        sys.stdout = clustalw_cline()
        sys.stdout = saveout






        f_in.close()

        f_out.close()
except:
    print("There was a problem aligning. Check ClustalW path and .fasta folder format/location")

正如你所看到的,我已经把这个搞砸了。感谢您的任何帮助,您可以提供。

4

1 回答 1

0

您看到的错误究竟是什么?您不应该将 sys.sterr 和 sys.stdout 设置为字符串值(clustalw_cline() 函数将 clustal stderr 和 stdout 作为字符串返回),因为您将无法从 python 向 stdout 写入任何内容。

我试图在下面清理和更正您的代码。

#!/usr/bin/env python

import Bio
import os
from glob import glob
from Bio.Align.Applications import ClustalwCommandline
from Bio import Seq
from Bio import SeqIO
from Bio import AlignIO
from Bio.SeqRecord import SeqRecord
import subprocess
from subprocess import Popen
clustal_loc=r"/Users/Wes/Desktop/eggNOG_files/clustalw-2.1-macosx/clustalw2"

try:
    folder= raw_input("Enter the folder of .fasta files to iterate over and align: ")
    listing = glob(os.path.join(folder, '*.fasta'))
    for infile in listing:
        print infile
        with open(os.path.splitext(infile) + '.pir') as f_out:
            with open(infile) as f_in:
                for record in SeqIO.parse(infile,"fasta"):
                        id = record.id
                        seq = record.seq
                        print("Name: %s, size: %s"%(id,len(seq)))
                assert os.path.isfile(clustal_loc), "Clustal W not found"
                clustalw_cline= ClustalwCommandline(clustal_loc,infile=f_in,
                                                    outfile=f_out, align=True, 
                                                    outorder="ALIGNED",convert=True, output="pir")
                stdout, stderr = clustalw_cline()
except Exception:
    print("There was a problem aligning. Check ClustalW path and .fasta folder format/location")
于 2012-08-27T22:29:29.283 回答