12

我有从 R 调用的 C 代码:

.C("giveProb",as.double(2),as.double(2),as.double(c(0,1,0,1,1,0,1,0)))

我想将它称为一堆(数十万或数百万)次(具有不同的第三个参数),当我将它放在 for 循环中大约 100 次时它工作正常,但任何高于此的东西都会使 R 崩溃。

我感觉这是使用 R_alloc 的内存问题。我在 C 中分配了六个数组,例如:

newCoefArray = (double *)R_alloc(1,curSize * sizeof(double));

但根据 R 手册“编写 R 扩展”:

此内存取自堆,并在 .C、.Call 或 .External 调用结束时释放。

我认为这意味着在循环的每次迭代期间都会释放内存。但在下一句中:

用户还可以通过调用 vmaxget 记录当前位置并随后通过调用 vmaxset 清除分配的内存来管理它。这仅推荐给专家。

作为一个远非专家的人,我希望这里有人可以提供帮助。C 代码的全部内容如下。

#include <R.h>
#include <Rmath.h>
#include <stdio.h>
void giveProb(double *k, double *q,double *order){
double  curSize;
double  tmpSize;
double  *newCoefArray;
double  *oldCoefArray;
double  *newAArray;
double  *oldAArray;
double  *newBArray;
double  *oldBArray;
int     position=0;

long int factorial(int n){
    if(n==0||n==1){
        return(1);
    }
    int tmp=1,i=1;
    while(i<=n){
        tmp=tmp*i;
        i++;
    }
    return(tmp);
} 

void expander(double a, double b,double c,double d,double coeff){
    double leadingTerm=beta(a,b);
    int bb=b; 
    double index[bb], sumLeaders[bb];
    for(int i=0;i<bb;i++){
        index[i]=a+i;
        sumLeaders[i]=factorial(a+b-1)/(factorial(index[i])*factorial(a+b-1-index[i]));
        newCoefArray[i+position]=coeff*leadingTerm*sumLeaders[i];
        newAArray[i+position]=index[i]+c+1;
        newBArray[i+position]=a+b+d-index[i];
    }
    position=position+bb;
    curSize=position;
}

void separator(double e, double f){
    double a, b, coeff;
    for(int i=0; i<tmpSize; i++){
        coeff=oldCoefArray[i];
        a=oldAArray[i];
        b=oldBArray[i];
        expander(a,b,e,f,coeff);
    }
}

void condenser(){
    tmpSize=0;
    for(int i=1; i<curSize; i++){
        for(int j=0; j<i; j++){
            if(newAArray[j]==newAArray[i]){
                newCoefArray[j]=newCoefArray[j]+newCoefArray[i];
                newCoefArray[i]=0;
            }
        }
    }
    for(int i=0; i<curSize; i++){
        tmpSize=tmpSize+(newCoefArray[i]!=0);
    }
    oldCoefArray =(double *) R_alloc(1,tmpSize * sizeof(double));
    oldAArray = (double *)R_alloc(1,tmpSize * sizeof(double));
    oldBArray = (double *)R_alloc(1,tmpSize * sizeof(double));
    for(int i=0; i<tmpSize; i++){
        oldCoefArray[i]=newCoefArray[i];
        oldAArray[i]=newAArray[i];
        oldBArray[i]=newBArray[i];
    }
    curSize=tmpSize;    
}

  long double coefficient=1;
  for(int i=0;i<*k;i++){
    coefficient=coefficient*factorial(*k)/(factorial(i)*factorial(*k-i-1));
  }
  for(int i=0;i<*q;i++){
    coefficient=coefficient*factorial(*q)/(factorial(i)*factorial(*q-i-1));
  }

  double numObs=*k+*q;
  double out=0;
  curSize=order[1]+1;

  newCoefArray = (double *)R_alloc(1,curSize * sizeof(double));
  newAArray = (double *)R_alloc(1,curSize * sizeof(double));
  newBArray = (double *)R_alloc(1,curSize * sizeof(double));

  expander(order[0]+1,order[1]+1,order[2],order[3],coefficient);

  oldCoefArray = (double *)R_alloc(1,curSize * sizeof(double));
  oldAArray = (double *)R_alloc(1,curSize * sizeof(double));
  oldBArray = (double *)R_alloc(1,curSize * sizeof(double));

  for(int i=0;i<curSize; i++){
    oldCoefArray[i]=newCoefArray[i];
    oldAArray[i]=newAArray[i];
    oldBArray[i]=newBArray[i];
  }

  for(int i=4;i<2*numObs;i+=2){ 
    position=0;
    tmpSize=curSize;
    separator(order[i],order[i+1]);
    condenser();
  }
  position=0;
  for(int i=0;i<curSize;i++){
    out=out+newCoefArray[i]*beta(newAArray[i],newBArray[i]);
  }
  *k=out;

}

更新: 使用下面评论中的建议,我得到以下信息(这证实了我之前的想法,对吧?):

R -d valgrind -f test_script.R ==11131== Memcheck,内存错误检测器 ==11131== 版权所有 (C) 2002-2010 和 GNU GPL,由 Julian Seward 等人撰写。==11131== 使用 Valgrind-3.6.0 和 LibVEX;使用 -h 重新运行以获取版权信息 ==11131== 命令:/usr/lib64/R/bin/exec/R -f test_script.R ==11131==

R version 2.15.1 (2012-06-22) -- "Roasted Marshmallows"
Copyright (C) 2012 The R Foundation for Statistical Computing
ISBN 3-900051-07-0
Platform: x86_64-redhat-linux-gnu (64-bit)

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

==11131== Conditional jump or move depends on uninitialised value(s)
==11131==    at 0x3A6A685F80: __GI___strcasecmp_l (in /lib64/libc-2.12.so)
==11131==    by 0x3A6A61FF24: __gconv_open (in /lib64/libc-2.12.so)
==11131==    by 0x3A6A62D3B7: _nl_find_msg (in /lib64/libc-2.12.so)
==11131==    by 0x3A6A62DB83: __dcigettext (in /lib64/libc-2.12.so)
==11131==    by 0x3A6C3BD2DF: ??? (in /usr/lib64/R/lib/libR.so)
==11131==    by 0x3A6C313FC8: setup_Rmainloop (in /usr/lib64/R/lib/libR.so)
==11131==    by 0x3A6C315278: Rf_mainloop (in /usr/lib64/R/lib/libR.so)
==11131==    by 0x40084A: main (in /usr/lib64/R/bin/exec/R)
==11131==
==11131== Use of uninitialised value of size 8
==11131==    at 0x3A6A6863A4: __GI___strcasecmp_l (in /lib64/libc-2.12.so)
==11131==    by 0x3A6A61FF24: __gconv_open (in /lib64/libc-2.12.so)
==11131==    by 0x3A6A62D3B7: _nl_find_msg (in /lib64/libc-2.12.so)
==11131==    by 0x3A6A62DB83: __dcigettext (in /lib64/libc-2.12.so)
==11131==    by 0x3A6C3BD2DF: ??? (in /usr/lib64/R/lib/libR.so)
==11131==    by 0x3A6C313FC8: setup_Rmainloop (in /usr/lib64/R/lib/libR.so)
==11131==    by 0x3A6C315278: Rf_mainloop (in /usr/lib64/R/lib/libR.so)
==11131==    by 0x40084A: main (in /usr/lib64/R/bin/exec/R)
==11131==
==11131== Use of uninitialised value of size 8
==11131==    at 0x3A6A6863A8: __GI___strcasecmp_l (in /lib64/libc-2.12.so)
==11131==    by 0x3A6A61FF24: __gconv_open (in /lib64/libc-2.12.so)
==11131==    by 0x3A6A62D3B7: _nl_find_msg (in /lib64/libc-2.12.so)
==11131==    by 0x3A6A62DB83: __dcigettext (in /lib64/libc-2.12.so)
==11131==    by 0x3A6C3BD2DF: ??? (in /usr/lib64/R/lib/libR.so)
==11131==    by 0x3A6C313FC8: setup_Rmainloop (in /usr/lib64/R/lib/libR.so)
==11131==    by 0x3A6C315278: Rf_mainloop (in /usr/lib64/R/lib/libR.so)
==11131==    by 0x40084A: main (in /usr/lib64/R/bin/exec/R)
==11131==
  Natural language support but running in an English locale

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

[Previously saved workspace restored]

> dyn.load("SchWolfenew.so")
> for(i in 1:1000){
+ .C("giveProb",as.double(2),as.double(2),as.double(c(0,1,0,1,1,0,1,0)))
+ }

==29371== Invalid read of size 1
==29371==    at 0x3A6C31A2E9: ??? (in /usr/lib64/R/lib/libR.so)
==29371==    by 0x3A6C31CBC9: Rf_cons (in /usr/lib64/R/lib/libR.so)
==29371==    by 0x3A6C2D2B74: ??? (in /usr/lib64/R/lib/libR.so)
==29371==    by 0x3A6C2DD121: Rf_eval (in /usr/lib64/R/lib/libR.so)
==29371==    by 0x3A6C2DF830: Rf_applyClosure (in /usr/lib64/R/lib/libR.so)
==29371==    by 0x3A6C32B828: Rf_usemethod (in /usr/lib64/R/lib/libR.so)
==29371==    by 0x3A6C32BAE7: ??? (in /usr/lib64/R/lib/libR.so)
==29371==    by 0x3A6C2D290B: ??? (in /usr/lib64/R/lib/libR.so)
==29371==    by 0x3A6C2DD121: Rf_eval (in /usr/lib64/R/lib/libR.so)
==29371==    by 0x3A6C2DF830: Rf_applyClosure (in /usr/lib64/R/lib/libR.so)
==29371==    by 0x3A6C2DD3F7: Rf_eval (in /usr/lib64/R/lib/libR.so)
==29371==    by 0x3A6C2DEF4F: ??? (in /usr/lib64/R/lib/libR.so)
==29371==  Address 0x3ff0000000000003 is not stack'd, malloc'd or (recently) free'd
==29371==

 *** caught segfault ***
address (nil), cause 'unknown'
==29371== Invalid read of size 1
==29371==    at 0x3A6C31AF0B: ??? (in /usr/lib64/R/lib/libR.so)
==29371==    by 0x3A6C31CBC9: Rf_cons (in /usr/lib64/R/lib/libR.so)
==29371==    by 0x3A6C31CC71: Rf_allocList (in /usr/lib64/R/lib/libR.so)
==29371==    by 0x3A6C2C8CD4: R_GetTraceback (in /usr/lib64/R/lib/libR.so)
==29371==    by 0x3A6C313472: ??? (in /usr/lib64/R/lib/libR.so)
==29371==    by 0x3A6B20F4FF: ??? (in /lib64/libpthread-2.12.so)
==29371==    by 0x3A6C31A2E8: ??? (in /usr/lib64/R/lib/libR.so)
==29371==    by 0x3A6C31CBC9: Rf_cons (in /usr/lib64/R/lib/libR.so)
==29371==    by 0x3A6C2D2B74: ??? (in /usr/lib64/R/lib/libR.so)
==29371==    by 0x3A6C2DD121: Rf_eval (in /usr/lib64/R/lib/libR.so)
==29371==    by 0x3A6C2DF830: Rf_applyClosure (in /usr/lib64/R/lib/libR.so)
==29371==    by 0x3A6C32B828: Rf_usemethod (in /usr/lib64/R/lib/libR.so)
==29371==  Address 0x4020000000000003 is not stack'd, malloc'd or (recently) free'd
==29371==
==29371==
==29371== Process terminating with default action of signal 11 (SIGSEGV)
==29371==  General Protection Fault
==29371==    at 0x3A6C31AF0B: ??? (in /usr/lib64/R/lib/libR.so)
==29371==    by 0x3A6C31CBC9: Rf_cons (in /usr/lib64/R/lib/libR.so)
==29371==    by 0x3A6C31CC71: Rf_allocList (in /usr/lib64/R/lib/libR.so)
==29371==    by 0x3A6C2C8CD4: R_GetTraceback (in /usr/lib64/R/lib/libR.so)
==29371==    by 0x3A6C313472: ??? (in /usr/lib64/R/lib/libR.so)
==29371==    by 0x3A6B20F4FF: ??? (in /lib64/libpthread-2.12.so)
==29371==    by 0x3A6C31A2E8: ??? (in /usr/lib64/R/lib/libR.so)
==29371==    by 0x3A6C31CBC9: Rf_cons (in /usr/lib64/R/lib/libR.so)
==29371==    by 0x3A6C2D2B74: ??? (in /usr/lib64/R/lib/libR.so)
==29371==    by 0x3A6C2DD121: Rf_eval (in /usr/lib64/R/lib/libR.so)
==29371==    by 0x3A6C2DF830: Rf_applyClosure (in /usr/lib64/R/lib/libR.so)
==29371==    by 0x3A6C32B828: Rf_usemethod (in /usr/lib64/R/lib/libR.so)
==29371==
==29371== HEAP SUMMARY:
==29371==     in use at exit: 29,307,724 bytes in 12,896 blocks
==29371==   total heap usage: 28,845 allocs, 15,949 frees, 48,495,252 bytes allocated
==29371==
==29371== LEAK SUMMARY:
==29371==    definitely lost: 0 bytes in 0 blocks
==29371==    indirectly lost: 0 bytes in 0 blocks
==29371==      possibly lost: 0 bytes in 0 blocks
==29371==    still reachable: 29,307,724 bytes in 12,896 blocks
==29371==         suppressed: 0 bytes in 0 blocks
==29371== Rerun with --leak-check=full to see details of leaked memory
==29371==
==29371== For counts of detected and suppressed errors, rerun with: -v
==29371== Use --track-origins=yes to see where uninitialised values come from
==29371== ERROR SUMMARY: 5 errors from 5 contexts (suppressed: 21 from 9)
Segmentation fault (core dumped)
4

1 回答 1

14

我将您的 C 代码放在文件 memory.c 中,然后运行 ​​R CMD SHLIB memory.c。这是我的测试脚本

dyn.load("/tmp/memory.so")
set.seed(123L)
while (TRUE)
    .C("giveProb",as.double(2),as.double(2), sample(c(0, 1), 8, TRUE))

而 R -d valgrind -f test_script.R 说

> dyn.load("/tmp/memory.so")
> set.seed(123L)
> while (TRUE)
+     .C("giveProb",as.double(2),as.double(2), sample(c(0, 1), 8, TRUE))
==3461== Invalid write of size 8
==3461==    at 0xBF29D78: expander.4631 (memory.c:35)
==3461==    by 0xBF29EBB: separator.4643 (memory.c:49)
==3461==    by 0xBF29AAB: giveProb (memory.c:108)
==3461==    by 0x4EEAF49: do_dotCode (dotcode.c:1689)
==3461==    by 0x4F1F4E2: Rf_eval (eval.c:493)
==3461==    by 0x4F218F6: do_for (eval.c:1310)
==3461==    by 0x4F1F2E8: Rf_eval (eval.c:467)
==3461==    by 0x4F6B5FB: Rf_ReplIteration (main.c:256)
==3461==    by 0x4F6B7B2: R_ReplConsole (main.c:305)
==3461==    by 0x4F6D022: run_Rmainloop (main.c:987)
==3461==    by 0x4F6D037: Rf_mainloop (main.c:994)
==3461==    by 0x400845: main (Rmain.c:32)

(加上更多)。memory.c 第 35 行是

newCoefArray[i+position]=coeff*leadingTerm*sumLeaders[i];

所以 i + 位置大于分配的数组。

于 2012-08-23T13:27:26.733 回答