我正在尝试修改一段代码来提取 bam 文件中特定元素的信息。
这是我到目前为止所拥有的:
import pysam
import pandas as pd
import argparse
args = []
def parseargs(required_args=True):
class formatter(argparse.ArgumentDefaultsHelpFormatter, argparse.RawTextHelpFormatter):
pass
epilog = ("")
parser = argparse.ArgumentParser(description='Run.',
epilog=epilog,
formatter_class=formatter)
readable = argparse.FileType('r')
parser.add_argument('--bamfile')
parser.add_argument('--bed_file')
parser.add_argument('--genome_sizes')
parser.add_argument('--output')
global args
args = parser.parse_args()
for name, val in vars(args).items():
if hasattr(val, 'name'):
setattr(args, name, val.name)
print(args)
return args
print(args)
bamfile = "/mnt/d/Axiotl/ABC/Input/bam/ENCFF070PWH.bam"
bed_file = "/mnt/d/R/abc_3a_scaffold_hg19.bed"
output = "/mnt/d/Axiotl/ABC/Output/bam"
genome_sizes = "/mnt/d/Axiotl/ABC/Working_Copy/ABC-Enhancer-Gene-Prediction-master/reference/chr_sizes"
def count_bam(bamfile, bed_file, output, genome_sizes, use_fast_count=True, verbose=True):
reads = pysam.AlignmentFile(bamfile)
read_chrs = set(reads.references)
bed_regions = pd.read_table(bed_file, header=None)
bed_regions = bed_regions[bed_regions.columns[:3]]
bed_regions.columns = "chr start end".split()
counts = [(reads.count(row.chr, row.start, row.end) if (row.chr in read_chrs) else 0) for _, row in bed_regions.iterrows()]
bed_regions['count'] = counts
bed_regions.to_csv(output, header=None, index=None, sep="\t")
count_bam(bamfile, bed_file, output, genome_sizes)
我得到的错误是:
ValueError: Length mismatch: Expected axis has 1 elements, new values have 3 elements
这是在我在函数全局变量中创建所需文件之后。之前,我得到的错误是:
TypeError: count_bam() missing 3 required positional arguments: 'bed_file', 'output', and 'genome_sizes'
我觉得这与argparse有关。它读取第一个必需的参数(bamfile),但不注册其他 3 个。
任何帮助将不胜感激。