0

我需要使用 OQ 标记将读取质量作为附加字段添加到使用 pysam 的 bam 文件中

其他使用 samtools 等的传统方式会消耗更多时间并创建多个文件。

我尝试使用下面给出的脚本,但最终是无符号字符而不是质量得分字符串!任何帮助表示赞赏。

提前致谢。

例如。输入 bam:

E00577:205:HVF37CCXY:3:2224:32461:53258 99      chr1    10419997        60      151M    =       10420034        188     GGCAGTGGCTTCCGCGTGCCCCGTGTGCTGGTGCGGTTCCCATCACGCAGACAGGAAGGGTGTTTGCGCACTCTGATCAACTGGAACCTCTGTATCANGCGGCTGAATTCCCTTTTTCCTTNACTCNATAAAAGCTACATCAGACTGATGN      AAFFFJJJJJJJJJJJJJJJJJJJJJJJJJJJ<F<J-FJJJJJJJJJJJJJJFJJF<JJJFJ<JJJJJJ<JJFJJJJJJJJJJJJJJJJJJJFJJJJ#JJJJJAJJFJJJJJJJJJJJAFJ#FJJJ#JJJJJJJJFJJAJFAF<JJJAJF# MD:Z:97T23T4A23C0       PG:Z:MarkDuplicates  RG:Z:HVF37CCXY.3        NM:i:4  AS:i:144        XS:i:107

预期输出 bam:

E00577:205:HVF37CCXY:3:2224:32461:53258 99      chr1    10419997        60      151M    =       10420034        188     GGCAGTGGCTTCCGCGTGCCCCGTGTGCTGGTGCGGTTCCCATCACGCAGACAGGAAGGGTGTTTGCGCACTCTGATCAACTGGAACCTCTGTATCANGCGGCTGAATTCCCTTTTTCCTTNACTCNATAAAAGCTACATCAGACTGATGN      AAFFFJJJJJJJJJJJJJJJJJJJJJJJJJJJ<F<J-FJJJJJJJJJJJJJJFJJF<JJJFJ<JJJJJJ<JJFJJJJJJJJJJJJJJJJJJJFJJJJ#JJJJJAJJFJJJJJJJJJJJAFJ#FJJJ#JJJJJJJJFJJAJFAF<JJJAJF# MD:Z:97T23T4A23C0       PG:Z:MarkDuplicates  RG:Z:HVF37CCXY.3        NM:i:4  AS:i:144        XS:i:107        OQ:Z:AAFFFJJJJJJJJJJJJJJJJJJJJJJJJJJJ<F<J-FJJJJJJJJJJJJJJFJJF<JJJFJ<JJJJJJ<JJFJJJJJJJJJJJJJJJJJJJFJJJJ#JJJJJAJJFJJJJJJJJJJJAFJ#FJJJ#JJJJJJJJFJJAJFAF<JJJAJF#

皮萨姆

import os
import argparse
import pysam
parser = argparse.ArgumentParser(description = 'Generate BAM with OQ tag')
parser.add_argument('-i', '--input', required=True, help='Input mark dup BAM File')
parser.add_argument('-o', '--output', required=True, help='Output BAM file with OQ tag')

args = parser.parse_args()
infile_path = os.path.abspath(args.input)
outfile_path = os.path.abspath(args.output)

infile = pysam.AlignmentFile(infile_path, "rb")
outfile = pysam.AlignmentFile(outfile_path, "wb", template=infile)
iter = infile.fetch(until_eof=True)
for read in iter:
    read.set_tag("OQ", read.query_qualities, replace=False)
    outfile.write(read)
infile.close()
outfile.close()

python GenerateBamWithOQTag.py -i subset.bam -o subset_OQ.bam

E00577:205:HVF37CCXY:3:2224:32461:53258 99      chr1    10419997        60      151M    =       10420034        188     GGCAGTGGCTTCCGCGTGCCCCGTGTGCTGGTGCGGTTCCCATCACGCAGACAGGAAGGGTGTTTGCGCACTCTGATCAACTGGAACCTCTGTATCANGCGGCTGAATTCCCTTTTTCCTTNACTCNATAAAAGCTACATCAGACTGATGN      AAFFFJJJJJJJJJJJJJJJJJJJJJJJJJJJ<F<J-FJJJJJJJJJJJJJJFJJF<JJJFJ<JJJJJJ<JJFJJJJJJJJJJJJJJJJJJJFJJJJ#JJJJJAJJFJJJJJJJJJJJAFJ#FJJJ#JJJJJJJJFJJAJFAF<JJJAJF# MD:Z:97T23T4A23C0       PG:Z:MarkDuplicates  RG:Z:HVF37CCXY.3        NM:i:4  AS:i:144        XS:i:107        OQ:B:C,32,32,37,37,37,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,27,37,27,41,12,37,41,41,41,41,41,41,41,41,41,41,41,41,41,41,37,41,41,37,27,41,41,41,37,41,27,41,41,41,41,41,41,27,41,41,37,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,37,41,41,41,41,2,41,41,41,41,41,32,41,41,37,41,41,41,41,41,41,41,41,41,41,41,32,37,41,2,37,41,41,41,2,41,41,41,41,41,41,41,41,37,41,41,32,41,37,32,37,27,41,41,41,32,41,37,2
4

1 回答 1

0
"""
USAGE:-

python Generate_bam_with_OQtag.py -i <input_file>  -o <output_file>

"""

import os
import argparse
import pysam

parser = argparse.ArgumentParser(description = 'Generate BAM with OQ tag')
parser.add_argument('-i', '--input', required=True, help='Input mark dup BAM File')
parser.add_argument('-o', '--output', required=True, help='Output BAM file with OQ tag')

args = parser.parse_args()
infile_path = os.path.abspath(args.input)
outfile_path = os.path.abspath(args.output)

infile = pysam.AlignmentFile(infile_path, "rb")
outfile = pysam.AlignmentFile(outfile_path, "wb", template=infile)
iter = infile.fetch(until_eof=True)
for read in iter:
    read.set_tag('OQ', pysam.qualities_to_qualitystring(read.query_qualities), replace=False)
    outfile.write(read)
infile.close()
outfile.close()
于 2020-07-19T12:18:01.257 回答