0

编码:

def readFasta(filename):
    """ Reads a sequence in Fasta format """
    fp = open(filename, 'rb')
    header = ""
    seq = ""
    while True:
        line = fp.readline()
        if (line == ""):
            break
        if (line.startswith(b'>')):
            header = line[1:].strip()
        else:
            seq = fp.read().replace(b'\n',b'')
            seq = seq.replace(b'\r',b'')          # for windows
            break
    fp.close()
    return (header, seq)

FASTAsequence = readFasta("MusChr01.fa")

'' 之前的 b 是必要的,因为我处于字节模式。问题是运行时, fp.read.replace 和 seq.replace 删除字符串中的所有内容。我确实知道 read 工作正常,因为

def readFasta(filename):
    """ Reads a sequence in Fasta format """
    fp = open(filename, 'rb')
    header = ""
    seq = ""
    while True:
        line = fp.readline()
        if (line == ""):
            break
        if (line.startswith(b'>')):
            header = line[1:].strip()
        else:
            seq = fp.read()
            break
    fp.close()
    return (header, seq)

FASTAsequence = readFasta("MusChr01.fa")

工作得很好。这里发生了什么?

4

2 回答 2

0

这是编写函数的更简洁的方法。不知道为什么它还不适合你

def readFasta(filename):
    """ Reads a sequence in Fasta format """
    header = seq = b""
    with open(filename, 'rb') as fp:
        for line in fp:
            if not line:
                break
            if line.startswith(b'>')):
                header = line[1:].strip()
            else:
                seq = fp.read().translate(None, b'\r\n')
                break
    return (header, seq)
于 2013-11-07T06:20:31.020 回答
0

else块中,代码不考虑line. 尝试跟随。

def readFasta(filename):
    header = b""
    seq = b""
    with open(filename, 'rb') as fp:
        while True:
            line = fp.readline()
            if not line:
                break
            if line.startswith(b'>'):
                header = line[1:].strip()
            else:
                seq = line + fp.read() # <--- without `line +`, you lose a line.
                seq = seq.translate(None, b'\r\n')
                break
    return header, seq

来自维基百科的示例序列示例:

>>> with open('mchu.fasta', 'rb') as f: print(f.read().decode('ascii'))
... 
>MCHU - Calmodulin - Human, rabbit, bovine, rat, and chicken
ADQLTEEQIAEFKEAFSLFDKDGDGTITTKELGTVMRSLGQNPTEAELQDMINEVDADGNGTID
FPEFLTMMARKMKDTDSEEEIREAFRVFDKDGNGYISAAELRHVMTNLGEKLTDEEVDEMIREA
DIDGDGQVNYEEFVQMMTAK*

>>> readFasta('mchu.fasta')
(b'MCHU - Calmodulin - Human, rabbit, bovine, rat, and chicken', b'ADQLTEEQIAEFKEAFSLFDKDGDGTITTKELGTVMRSLGQNPTEAELQDMINEVDADGNGTIDFPEFLTMMARKMKDTDSEEEIREAFRVFDKDGNGYISAAELRHVMTNLGEKLTDEEVDEMIREADIDGDGQVNYEEFVQMMTAK*')
于 2013-11-07T06:35:16.280 回答