1

我正在尝试编写一个读取 .pdb 文件的程序,该文件是生物学应用程序中使用的一种文件类型。这种类型的文件具有标准格式,在数据之间具有不同的空白。该文件的格式为

ATOM      4  N   ALA     1       2.670   1.801   1.072  0.00  0.00
ATOM      5  CA  ALA     1       3.225   3.144   1.197  0.00  0.00
ATOM      6  C   ALA     1       4.408   3.341   0.256  0.00  0.00
ATOM      7  O   ALA     1       4.553   4.394  -0.363  0.00  0.00
....      .  ..  ...     .       .....   .....   .....  ..... ....

所以我的程序(可能写得不好)定义了一个结构,读取数据(我从这里的另一篇文章中偷来的http://www.daniweb.com/software-development/c/threads/65455/reading-a-file- using-fscanf# ),并将其存储到索引结构中。现在,如果我在内部 if 循环中打印值,它会吐出正确的数据。但是,当我在外部 while 循环之外打印出相同的值时,它给了我错误的 atom[].name (恰好是 HA,输入文件中数据的第三列中的最后一个值。所有其他值是正确的。

这是我的程序

#include <stdio.h>

typedef struct 
{
   char *atm;
   int serial;
   char *name;
   char *resName;
   int resSeq;
   double x;
   double y;
   double z;
   double occupancy;
   double tempFactor;
} pdb;

int main(int argc, char** argv)
{
   int i, j;
   pdb atom[28]; 
   char atm[5]; 
   char name[3]; 
   char resName[4];
   int serial; 
   int resSeq;
   double x;
   double y;
   double z;
   double occupancy;
   double tempFactor;
   char buff[BUFSIZ];
   FILE *file = fopen("test.pdb", "r");

   i = 0;
   while (fgets(buff, sizeof buff, file) != NULL) 
   {
      if (sscanf(buff, "%s %d %s %s %d %lf %lf %lf %lf %lf", 
         atm, &serial, name, resName, &resSeq, &x, &y, &z, 
         &occupancy, &tempFactor) == 10)
      {
         atom[i].atm = atm;
         atom[i].serial = serial;
         atom[i].name = name;
         atom[i].resName = resName;
         atom[i].resSeq = resSeq;
         atom[i].x = x;
         atom[i].y = y;
         atom[i].z = z;
         atom[i].occupancy = occupancy;
         atom[i].tempFactor = tempFactor;
         i++;
         /*printf("%s ", atom[i].atm);
         printf("%d ", atom[i].serial);
         printf("%s ", atom[i].name);
         printf("%s ", atom[i].resName);
         printf("%d ", atom[i].resSeq);
         printf("%lf ", atom[i].x);
         printf("%lf ", atom[i].y);
         printf("%lf ", atom[i].z);
         printf("%lf ", atom[i].occupancy);
         printf("%lf\n", atom[i].tempFactor);*/
      }
   }
   fclose(file);
   for (j = 0; j < i; j++)
      printf("%d of %d: %s\n", j, i-1, atom[j].name);

   return(0);
}

知道为什么会发生这种情况吗?此外,对程序格式/结构的任何帮助也将不胜感激。我更像是一个 Fortran 人,所以 C 结构超出了我的专业领域。

提前致谢。

编辑: jsn 帮了我,Randy Howard 改进了它。这是更新的工作程序:

#include <stdio.h>
#include <stdlib.h>

typedef struct 
{
   char *atm;
   int serial;
   char *name;
   char *resName;
   int resSeq;
   double x;
   double y;
   double z;
   double occupancy;
   double tempFactor;
} pdb;

int main(int argc, char** argv)
{
   int i, j;
   pdb atom[28]; 
   int serial; 
   int resSeq;
   double x;
   double y;
   double z;
   double occupancy;
   double tempFactor;
   char buff[BUFSIZ];
   FILE *file = fopen("test.pdb", "r");

   i = 0;
   while (fgets(buff, sizeof buff, file) != NULL) 
   {
      char *atm = malloc(sizeof(char) * 5); 
      char *name = malloc(sizeof(char) * 3); 
      char *resName = malloc(sizeof(char) * 4); 

      if (sscanf(buff, "%s %d %s %s %d %lf %lf %lf %lf %lf", 
         atm, &serial, name, resName, &resSeq, &x, &y, &z, 
         &occupancy, &tempFactor) == 10)
      {
         atom[i].atm = atm;
         atom[i].serial = serial;
         atom[i].name = name;
         atom[i].resName = resName;
         atom[i].resSeq = resSeq;
         atom[i].x = x;
         atom[i].y = y;
         atom[i].z = z;
         atom[i].occupancy = occupancy;
         atom[i].tempFactor = tempFactor;
         i++;
      }
   }
   fclose(file);
   for (j = 0; j < i; j++)
   {
         printf("%s ", atom[j].atm);
         printf("%d ", atom[j].serial);
         printf("%s ", atom[j].name);
         printf("%s ", atom[j].resName);
         printf("%d ", atom[j].resSeq);
         printf("%lf ", atom[j].x);
         printf("%lf ", atom[j].y);
         printf("%lf ", atom[j].z);
         printf("%lf ", atom[j].occupancy);
         printf("%lf\n", atom[j].tempFactor);
   }

   return(0);
}
4

1 回答 1

2

在 while 循环中,您需要为每个名称的每个 char* 分配新内存。您现在正在覆盖它们。

while (fgets(buff, sizeof buff, file) != NULL) 
{

    char *atm = malloc(sizeof(char) * 5); 
    char *name = malloc(sizeof(char) * 3); 
    char *resName = malloc(sizeof(char) * 4); 

    if (sscanf(buff, "%s %d %s %s %d %lf %lf %lf %lf %lf", 
             atm, &serial, name, resName, &resSeq, &x, &y, &z, 
             &occupancy, &tempFactor) == 10)

您正在复制 char 数组(指针),因此所有名称都应该相同(最后一个条目)。

于 2013-04-02T14:30:51.723 回答