0

我的脚本需要一个输入文件,文件看起来像这样。

chr11 3037016 chr11_3037016 8 39 6 44 4 24 7 22 6 31 7 44 8 39 13 41 10 37 9 23 18 32 8 37 2 9 16 33 9 29 4 7 5 30 11 18
chr11 nonsense_mediate_decay 外显子 3036949 3037109 。- 。基因ID“ENSMUSG00000023764”;成绩单ID“ENSMUST00000126746”;外显子编号“11”;基因名称“Sfi1”;基因生物型“蛋白质编码”;成绩单名称“Sfi1-015”;
chr11 3037016 chr11_3037016 8 39 6 44 4 24 7 22 6 31 7 44 8 39 13 41 10 37 9 23 18 32 8 37 2 9 16 33 9 29 4 7 5 30 11 18
chr11 nonsense_mediate_decay 外显子 3036949 3037109 。- 。基因ID“ENSMUSG00000023764”;成绩单ID“ENSMUST00000132893”;外显子编号“17”;基因名称“Sfi1”;基因生物型“蛋白质编码”;成绩单名称“Sfi1-003”;
chr11 3037016 chr11_3037016 8 39 6 44 4 24 7 22 6 31 7 44 8 39 13 41 10 37 9 23 18 32 8 37 2 9 16 33 9 29 4 7 5 30 11 18
chr11 蛋白质编码外显子 3036949 3037109。- 。基因ID“ENSMUSG00000023764”;成绩单id“ENSMUST00000066391”;外显子编号“22”;基因名称“Sfi1”;基因生物型“蛋白质编码”;成绩单名称“Sfi1-001”;
chr11 3037016 chr11_3037016 8 39 6 44 4 24 7 22 6 31 7 44 8 39 13 41 10 37 9 23 18 32 8 37 2 9 16 33 9 29 4 7 5 30 11 18
chr11 蛋白质编码外显子 3036949 3037109。- 。基因ID“ENSMUSG00000023764”;transcript_id "ENSMUST00000081318"; 外显子编号“23”;基因名称“Sfi1”;基因生物型“蛋白质编码”;成绩单名称“Sfi1-017”;
chr11 3037016 chr11_3037016 8 39 6 44 4 24 7 22 6 31 7 44 8 39 13 41 10 37 9 23 18 32 8 37 2 9 16 33 9 29 4 7 5 30 11 18
chr11 蛋白质编码外显子 3036949 3037109。- 。基因ID“ENSMUSG00000023764”;transcript_id "ENSMUST00000093407"; 外显子编号“16”;基因名称“Sfi1”;基因生物型“蛋白质编码”;成绩单名称“Sfi1-202”;
chr11 3037016 chr11_3037016 8 39 6 44 4 24 7 22 6 31 7 44 8 39 13 41 10 37 9 23 18 32 8 37 2 9 16 33 9 29 4 7 5 30 11 18
chr11 蛋白质编码外显子 3036949 3037109。- 。基因ID“ENSMUSG00000023764”;transcript_id "ENSMUST00000101649"; 外显子编号“22”;基因名称“Sfi1”;基因生物型“蛋白质编码”;成绩单名称“Sfi1-203”;

chr11 3037016 chr11_3037016 8 39 6 44 4 24 7 22 6 31 7 44 8 39 13 41 10 37 9 23 18 32 8 37 2 9 16 33 9 29 4 7 5 30 11 18
chr11 蛋白质编码外显子 3036949 3037109。- 。基因ID“ENSMUSG00000023764”;成绩单ID“ENSMUST00000101655”;外显子编号“22”;基因名称“Sfi1”;基因生物型“蛋白质编码”;成绩单名称“Sfi1-020”;
chr11 3037016 chr11_3037016 8 39 6 44 4 24 7 22 6 31 7 44 8 39 13 41 10 37 9 23 18 32 8 37 2 9 16 33 9 29 4 7 5 30 11 18
chr11 蛋白质编码外显子 3036949 3037109。- 。基因ID“ENSMUSG00000023764”;成绩单ID“ENSMUST00000140846”;外显子编号“20”;基因名称“Sfi1”;基因生物型“蛋白质编码”;成绩单名称“Sfi1-016”;

这是我的脚本:

#include <stdio.h>
#include <string.h>
int main(void)
{
   static const char filename[] = "input_file.txt";
   FILE *file3 = fopen("thirdstep2a.txt","w");
   FILE *file = fopen(filename, "r");

   if ( file != NULL )
   {

      char line[BUFSIZ],line2[BUFSIZ] ;
      char one[20], three[22], four[20], a1[20],a2[20],a3[20],a4[20],a5[20],a6[20],a7[20],a8[20], a9[20], a10[20],a11[20],a12[20],a13[20],a14[20],a15[20],a16[20],a17[20],a18[20],a19[20],a20[20],a21[20],a22[20],a23[20],a24[20],a25[20],a26[20],a27[20],a28[20],a29[20],a30[20],a31[20],a32[20],a33[20],a34[20];

      char one2[20],three2[20],four2[26], six2[26], seven2[20], eight2[11], nine2[20];

     fgets(line, sizeof line, file);
     fgets(line2, sizeof line2, file);
     while(1)
     {
         sscanf(line, "%19s %21s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s", one, three, four, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20, a21, a22, a23, a24, a25, a26, a27, a28, a29, a30, a31, a32, a33, a34);
         sscanf(line2, "%*s %*s %10s %19s %19s %*s %*s %*s %*s %25s %*s %25s %*s %19s %*s %19s", eight2, one2, three2, four2, six2, seven2, nine2); 

         fprintf(file3,"%s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s \t %s\t %s\t %s\t %s\t %s\t %s\t", one, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20, a21, a22, a23, a24, a25, a26, a27, a28, a29, a30, a31, a32, a33, a34);

         fprintf(file3,"%s\t %s\t %s\t %s\t %s\t\n", four2, six2, seven2, one2, three2);
         if(fgets(line, sizeof line, file) == NULL)
             break;

         if(fgets(line2, sizeof line, file) == NULL)
             break;
      }
   }
   else
   {
      perror(filename);
   }
   return 0;
}

我的输出如下所示:

chr11 8 39 6 44 “Sfi1”;24 7 22 6 31 7 44 8 39 13 41 10 37 9 23 18 32 8 37 2 9 16 33 9 29 4 7 5 30 “ENSMUSG00000023764”;“ENSMUST00000126746”;“11”;3036949 3037109    
chr11 8 39 6 44 “Sfi1”;24 7 22 6 31 7 44 8 39 13 41 10 37 9 23 18 32 8 37 2 9 16 33 9 29 4 7 5 30 “ENSMUSG00000023764”;“ENSMUST00000132893”;“17”;3036949 3037109    
chr11 8 39 6 44 “Sfi1”;24 7 22 6 31 7 44 8 39 13 41 10 37 9 23 18 32 8 37 2 9 16 33 9 29 4 7 5 30 “ENSMUSG00000023764”;“ENSMUST00000066391”;“22”;3036949 3037109    
chr11 8 39 6 44 “Sfi1”;24 7 22 6 31 7 44 8 39 13 41 10 37 9 23 18 32 8 37 2 9 16 33 9 29 4 7 5 30 “ENSMUSG00000023764”;“ENSMUST00000081318”;“23”;3036949 3037109    
chr11 8 39 6 44 “Sfi1”;24 7 22 6 31 7 44 8 39 13 41 10 37 9 23 18 32 8 37 2 9 16 33 9 29 4 7 5 30 “ENSMUSG00000023764”;“ENSMUST00000093407”;“16”;3036949 3037109    
chr11 8 39 6 44 “Sfi1”;24 7 22 6 31 7 44 8 39 13 41 10 37 9 23 18 32 8 37 2 9 16 33 9 29 4 7 5 30 “ENSMUSG00000023764”;“ENSMUST00000101649”;“22”;3036949 3037109    
chr11 8 39 6 44 “Sfi1”;24 7 22 6 31 7 44 8 39 13 41 10 37 9 23 18 32 8 37 2 9 16 33 9 29 4 7 5 30 “ENSMUSG00000023764”;“ENSMUST00000101655”;“22”;3036949 3037109    
chr11 8 39 6 44 “Sfi1”;24 7 22 6 31 7 44 8 39 13 41 10 37 9 23 18 32 8 37 2 9 16 33 9 29 4 7 5 30 “ENSMUSG00000023764”;“ENSMUST00000140846”;“20”;3036949 3037109    
chr11 8 39 6 44 “Sfi1”;24 7 22 6 31 7 44 8 39 13 41 10 37 9 23 18 32 8 37 2 9 16 33 9 29 4 7 5 30 “ENSMUSG00000023764”;“ENSMUST00000153425”;“21”;3036949 3037109    
chr11 8 39 6 44 “Sfi1”;24 7 22 6 31 7 44 8 39 13 41 10 37 9 23 18 32 8 37 2 9 16 33 9 29 4 7 5 30 “ENSMUSG00000023764”;“ENSMUST00000137633”;“3”;3036949 3037109    
chr11 8 39 6 44 “Sfi1”;24 7 22 6 31 7 44 8 39 13 41 10 37 9 23 18 32 8 37 2 9 16 33 9 29 4 7 5 30 “ENSMUSG00000023764”;“ENSMUST00000138126”;

如您所见,我的输出看起来不正确。我从 a5 字符串中得到了一些虚假数据。我无法弄清楚错误。

4

2 回答 2

3

编译代码时(在文件中bio.c),编译器会说:

$ gcc -O3 -g -Wall -Wextra -std=c99  bio.c -o bio  
bio.c: In function ‘main’:
bio.c:24: warning: too few arguments for format
bio.c:24: warning: too few arguments for format
bio.c:17: warning: unused variable ‘two2’
bio.c:15: warning: unused variable ‘five’
bio.c:9: warning: unused variable ‘i’
$

我懒得更准确地找出问题所在(第 24 行是sscanf()for line2),但警告表明代码有问题。

于 2012-07-23T04:16:17.137 回答
1

阅读您的编译器输出。海合会给出了这个

../../../vmc/_tests/chom.c:23:10: warning: too few arguments for format
../../../vmc/_tests/chom.c:23:10: warning: too few arguments for format
../../../vmc/_tests/chom.c:16:22: warning: unused variable ‘two2’
../../../vmc/_tests/chom.c:14:32: warning: unused variable ‘five’
../../../vmc/_tests/chom.c:8:8: warning: unused variable ‘i’

你要特别担心前两个。

于 2012-07-23T04:19:21.297 回答