我的脚本需要一个输入文件,文件看起来像这样。
chr11 3037016 chr11_3037016 8 39 6 44 4 24 7 22 6 31 7 44 8 39 13 41 10 37 9 23 18 32 8 37 2 9 16 33 9 29 4 7 5 30 11 18 chr11 nonsense_mediate_decay 外显子 3036949 3037109 。- 。基因ID“ENSMUSG00000023764”;成绩单ID“ENSMUST00000126746”;外显子编号“11”;基因名称“Sfi1”;基因生物型“蛋白质编码”;成绩单名称“Sfi1-015”; chr11 3037016 chr11_3037016 8 39 6 44 4 24 7 22 6 31 7 44 8 39 13 41 10 37 9 23 18 32 8 37 2 9 16 33 9 29 4 7 5 30 11 18 chr11 nonsense_mediate_decay 外显子 3036949 3037109 。- 。基因ID“ENSMUSG00000023764”;成绩单ID“ENSMUST00000132893”;外显子编号“17”;基因名称“Sfi1”;基因生物型“蛋白质编码”;成绩单名称“Sfi1-003”; chr11 3037016 chr11_3037016 8 39 6 44 4 24 7 22 6 31 7 44 8 39 13 41 10 37 9 23 18 32 8 37 2 9 16 33 9 29 4 7 5 30 11 18 chr11 蛋白质编码外显子 3036949 3037109。- 。基因ID“ENSMUSG00000023764”;成绩单id“ENSMUST00000066391”;外显子编号“22”;基因名称“Sfi1”;基因生物型“蛋白质编码”;成绩单名称“Sfi1-001”; chr11 3037016 chr11_3037016 8 39 6 44 4 24 7 22 6 31 7 44 8 39 13 41 10 37 9 23 18 32 8 37 2 9 16 33 9 29 4 7 5 30 11 18 chr11 蛋白质编码外显子 3036949 3037109。- 。基因ID“ENSMUSG00000023764”;transcript_id "ENSMUST00000081318"; 外显子编号“23”;基因名称“Sfi1”;基因生物型“蛋白质编码”;成绩单名称“Sfi1-017”; chr11 3037016 chr11_3037016 8 39 6 44 4 24 7 22 6 31 7 44 8 39 13 41 10 37 9 23 18 32 8 37 2 9 16 33 9 29 4 7 5 30 11 18 chr11 蛋白质编码外显子 3036949 3037109。- 。基因ID“ENSMUSG00000023764”;transcript_id "ENSMUST00000093407"; 外显子编号“16”;基因名称“Sfi1”;基因生物型“蛋白质编码”;成绩单名称“Sfi1-202”; chr11 3037016 chr11_3037016 8 39 6 44 4 24 7 22 6 31 7 44 8 39 13 41 10 37 9 23 18 32 8 37 2 9 16 33 9 29 4 7 5 30 11 18 chr11 蛋白质编码外显子 3036949 3037109。- 。基因ID“ENSMUSG00000023764”;transcript_id "ENSMUST00000101649"; 外显子编号“22”;基因名称“Sfi1”;基因生物型“蛋白质编码”;成绩单名称“Sfi1-203”; chr11 3037016 chr11_3037016 8 39 6 44 4 24 7 22 6 31 7 44 8 39 13 41 10 37 9 23 18 32 8 37 2 9 16 33 9 29 4 7 5 30 11 18 chr11 蛋白质编码外显子 3036949 3037109。- 。基因ID“ENSMUSG00000023764”;成绩单ID“ENSMUST00000101655”;外显子编号“22”;基因名称“Sfi1”;基因生物型“蛋白质编码”;成绩单名称“Sfi1-020”; chr11 3037016 chr11_3037016 8 39 6 44 4 24 7 22 6 31 7 44 8 39 13 41 10 37 9 23 18 32 8 37 2 9 16 33 9 29 4 7 5 30 11 18 chr11 蛋白质编码外显子 3036949 3037109。- 。基因ID“ENSMUSG00000023764”;成绩单ID“ENSMUST00000140846”;外显子编号“20”;基因名称“Sfi1”;基因生物型“蛋白质编码”;成绩单名称“Sfi1-016”;
这是我的脚本:
#include <stdio.h>
#include <string.h>
int main(void)
{
static const char filename[] = "input_file.txt";
FILE *file3 = fopen("thirdstep2a.txt","w");
FILE *file = fopen(filename, "r");
if ( file != NULL )
{
char line[BUFSIZ],line2[BUFSIZ] ;
char one[20], three[22], four[20], a1[20],a2[20],a3[20],a4[20],a5[20],a6[20],a7[20],a8[20], a9[20], a10[20],a11[20],a12[20],a13[20],a14[20],a15[20],a16[20],a17[20],a18[20],a19[20],a20[20],a21[20],a22[20],a23[20],a24[20],a25[20],a26[20],a27[20],a28[20],a29[20],a30[20],a31[20],a32[20],a33[20],a34[20];
char one2[20],three2[20],four2[26], six2[26], seven2[20], eight2[11], nine2[20];
fgets(line, sizeof line, file);
fgets(line2, sizeof line2, file);
while(1)
{
sscanf(line, "%19s %21s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s", one, three, four, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20, a21, a22, a23, a24, a25, a26, a27, a28, a29, a30, a31, a32, a33, a34);
sscanf(line2, "%*s %*s %10s %19s %19s %*s %*s %*s %*s %25s %*s %25s %*s %19s %*s %19s", eight2, one2, three2, four2, six2, seven2, nine2);
fprintf(file3,"%s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s \t %s\t %s\t %s\t %s\t %s\t %s\t", one, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20, a21, a22, a23, a24, a25, a26, a27, a28, a29, a30, a31, a32, a33, a34);
fprintf(file3,"%s\t %s\t %s\t %s\t %s\t\n", four2, six2, seven2, one2, three2);
if(fgets(line, sizeof line, file) == NULL)
break;
if(fgets(line2, sizeof line, file) == NULL)
break;
}
}
else
{
perror(filename);
}
return 0;
}
我的输出如下所示:
chr11 8 39 6 44 “Sfi1”;24 7 22 6 31 7 44 8 39 13 41 10 37 9 23 18 32 8 37 2 9 16 33 9 29 4 7 5 30 “ENSMUSG00000023764”;“ENSMUST00000126746”;“11”;3036949 3037109 chr11 8 39 6 44 “Sfi1”;24 7 22 6 31 7 44 8 39 13 41 10 37 9 23 18 32 8 37 2 9 16 33 9 29 4 7 5 30 “ENSMUSG00000023764”;“ENSMUST00000132893”;“17”;3036949 3037109 chr11 8 39 6 44 “Sfi1”;24 7 22 6 31 7 44 8 39 13 41 10 37 9 23 18 32 8 37 2 9 16 33 9 29 4 7 5 30 “ENSMUSG00000023764”;“ENSMUST00000066391”;“22”;3036949 3037109 chr11 8 39 6 44 “Sfi1”;24 7 22 6 31 7 44 8 39 13 41 10 37 9 23 18 32 8 37 2 9 16 33 9 29 4 7 5 30 “ENSMUSG00000023764”;“ENSMUST00000081318”;“23”;3036949 3037109 chr11 8 39 6 44 “Sfi1”;24 7 22 6 31 7 44 8 39 13 41 10 37 9 23 18 32 8 37 2 9 16 33 9 29 4 7 5 30 “ENSMUSG00000023764”;“ENSMUST00000093407”;“16”;3036949 3037109 chr11 8 39 6 44 “Sfi1”;24 7 22 6 31 7 44 8 39 13 41 10 37 9 23 18 32 8 37 2 9 16 33 9 29 4 7 5 30 “ENSMUSG00000023764”;“ENSMUST00000101649”;“22”;3036949 3037109 chr11 8 39 6 44 “Sfi1”;24 7 22 6 31 7 44 8 39 13 41 10 37 9 23 18 32 8 37 2 9 16 33 9 29 4 7 5 30 “ENSMUSG00000023764”;“ENSMUST00000101655”;“22”;3036949 3037109 chr11 8 39 6 44 “Sfi1”;24 7 22 6 31 7 44 8 39 13 41 10 37 9 23 18 32 8 37 2 9 16 33 9 29 4 7 5 30 “ENSMUSG00000023764”;“ENSMUST00000140846”;“20”;3036949 3037109 chr11 8 39 6 44 “Sfi1”;24 7 22 6 31 7 44 8 39 13 41 10 37 9 23 18 32 8 37 2 9 16 33 9 29 4 7 5 30 “ENSMUSG00000023764”;“ENSMUST00000153425”;“21”;3036949 3037109 chr11 8 39 6 44 “Sfi1”;24 7 22 6 31 7 44 8 39 13 41 10 37 9 23 18 32 8 37 2 9 16 33 9 29 4 7 5 30 “ENSMUSG00000023764”;“ENSMUST00000137633”;“3”;3036949 3037109 chr11 8 39 6 44 “Sfi1”;24 7 22 6 31 7 44 8 39 13 41 10 37 9 23 18 32 8 37 2 9 16 33 9 29 4 7 5 30 “ENSMUSG00000023764”;“ENSMUST00000138126”;
如您所见,我的输出看起来不正确。我从 a5 字符串中得到了一些虚假数据。我无法弄清楚错误。