1

我正在尝试计算 18456 个基因的相关性度量,但编译器(Dev C)在增加宏GENEINDEX达到 4000 到 5000 或更大之间的值后退出。例如,它适用于:

# define GENE 4000
# define INDEX 3000 

但不是:

#define GENE 5000 
#define INDEX 100

输入文件是一个以空格分隔的文本文件,有 18456 行和 57 列。这是代码:

#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <time.h>
#include <limits.h>
#define GENE 5000
#define N 57
#define INDEX 1000


int main (void) {

clock_t start, stop;
double t = 0.0;

int i, j, p, q, wp, wq;
double x;
double *S_matrix = (double *)malloc(INDEX * GENE * sizeof(double));
double sum_S, S_max;
double S[11] = {0};
double r = 0.0, xbar = 0.0, ybar = 0.0, sx = 0.0, sy = 0.0;

// read E matrix

FILE *fq;
double E[GENE][N] = {{0}};

if ((fq = fopen("E_disease.txt", "r")) == NULL ) 
{
     printf("Error\n");
     exit(EXIT_FAILURE);
}

fq = fopen("E_disease.txt","r");
printf("\n");

for (i=0;i<GENE;i++)
{
    for(j=0;j<N;j++)
    {
        fscanf(fq,"%lf",&x);
        E[i][j] = x;
    }
}


printf("\n");
fclose(fq);


// calculate correlation

assert((start = clock())!=-1);

for(p=0; p < INDEX; p++)
{
    for(q=0; q < GENE; q++)
    {
        for(i=0; i<11; i++)
        {

            /*compute xbar */
            for(j = i; j < N; j++) 
            {
                xbar += E[p][j];
            }

            xbar /= N;

            /*compute ybar*/
            for(j = i; j < N; j++) 
            {
                ybar += E[q][j];
            }

            ybar /= N;

            /* compute standard deviation of x*/
            for(j = i; j < N; j++) 
            {
                sx += (E[p][j] - xbar) * (E[p][j] - xbar);
            }

            sx = sqrt(sx);

            /* compute standard deviation of y */
            for(j = i; j < N; j++) 
            {
                sy += (E[q][j] - ybar) * (E[q][j] - ybar);
            }

            sy = sqrt(sy);

            /*compute r, the correlation coefficient between the two arrays */
            for( j = i; j < N; j++ ) 
            {
                r += (((E[p][j] - xbar)/sx) * ((E[q][j] - ybar)/sy));
            }

            r /= (N); 

            if(r>0)
            {
                S[i] = r;
            }
            else if(r<=0)
            {
                S[i] = 0;
            }

         }


         for(j=0, sum_S=0; j<11; j++)
         {  
             sum_S += S[j];
         }


         for(j=0, S_max = 0; j<11; j++)
         {
            if(S[j] > S_max)
            {
                S_max = S[j];
            }
         }

      S_matrix[p*GENE + q] = sum_S/(11*S_max);

  }

}

FILE * fs;

fs = fopen ("s_matrix.txt", "w+");

for(wp=0; wp<INDEX; ++wp)
{
    for(wq=0; wq<GENE; ++wq)
    {

        fprintf(fs, "%lf", S_matrix[wp*GENE + wq]);
        fprintf(fs, "\t");
    }

    fprintf(fs, "\n");
    printf("\n");
}

fclose(fs);

stop = clock();
t = (double) (stop-start)/CLOCKS_PER_SEC;


printf("Run time: %f\n", t);    

//print results 



//return (0);

getchar();

}

让我简化代码。当我运行下面的代码几次时,它通常会立即退出。有一次,它说找不到类似0xff12345. S_matrix[55]另一次,当我在 main 中定义常量(其余代码相同)时它打印出来,int GENE=100;但只是一次。这是否意味着内存泄漏?当我编译它时它没有给出错误消息,而是定义矩阵并为它们赋值是真的吗?

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <assert.h>
#include <time.h>
#include <limits.h>
#define GENE 100
#define N 57
#define INDEX 10




int main (void) {

//int GENE = 100;
//int N = 57;
//int INDEX = 10;
int i, j; 
double x;
double *S_matrix = (double *)malloc(INDEX * GENE * sizeof(double));
double *E = (double*)malloc(GENE*N*sizeof(double));


// read E matrix

FILE *fq;


if ((fq = fopen("E_control.txt", "r")) == NULL ) 
{
     printf("Error\n");
     exit(EXIT_FAILURE);
}

fq = fopen("E_control.txt","r");
printf("\n");

for (i=0;i<GENE;i++)
{
    for(j=0;j<N;j++)
    {
        fscanf(fq,"%lf",&x);
        E[i*GENE+j] = x;
    }
}


printf("\n");
fclose(fq);

for(i=0; i<INDEX; i++)
{
         for(j=0; j<GENE; j++)
         {        
              S_matrix[i*INDEX+j]=i*j;
         }
}

printf("%f " , S_matrix[55]);


free(S_matrix);
S_matrix=NULL;
free(E);
E=NULL;
return(0);
getchar();
getchar();

}
4

1 回答 1

4

由于固定数组声明过大,您正试图保留2280000字节的堆栈空间(实际上更多) 。main()具体来说,这一行:

double E[GENE][N] = {{0}};

相当于

double E[5000][57] = {{0}};

在每双 8 字节的情况下,这很可能会耗尽你的堆栈。改为对该数组使用动态分配。例如:

double (*E)[N] = malloc(5000*sizeof(*E));

完成后不要忘记释放它。

全局固定分配也将起作用(即在功能块之外将其声明为全局。main()

static double E[GENE][N];

int main()
{
    ... your code ...
}

您选择的任何方法都有潜在的优势和缺陷,因此请做出相应的计划。

于 2013-08-27T18:30:38.473 回答