2

我正在编写我的 R mcmc 代码的 C 版本。抛出错误的部分代码如下:

#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <stddef.h>
#include <limits.h>

#include <gsl/gsl_machine.h>
#include <gsl/gsl_rng.h>
#include <gsl/gsl_randist.h>
#include <gsl/gsl_cdf.h>

#include <R.h>
#include <Rmath.h>
#include <Rembedded.h>
#include <Rdefines.h>
#include <R_ext/BLAS.h>
#include <R_ext/Lapack.h>
#include <R_ext/Linpack.h>
//#include <vecLib/cblas.h>      /* C BLAS in APPLE */
#include "blaio.h"           /*Linear Algebra I/O using http://www.mitchr.me/SS/exampleCode/blas.html*/

double rTruncNorm (double, double*, double, double);

// C version of group lasso
void BinaryBayesianGroupLasso (double *X, int *dim, int *y, int *iterations, double *bprev, double *Lprev) {

    // X is design matrix with intercept
    //dim is dimensions of X
    int iter = 1, i;
    int n = dim[0];
    int p = dim[1];
    double sigma = 1, z[n], mu[n], u, a, b;

    // random number generation set up
    const double lowest_double = -GSL_DBL_MAX;
    const double highest_double = GSL_DBL_MAX;
    const gsl_rng *gBaseRand;       /* global rand number generator */
    unsigned long randSeed;    

    /* specifying to use Mersenne twister as the PRNG */
    gBaseRand = gsl_rng_alloc(gsl_rng_mt19937);
    srand(time(NULL));                    /* initialization for rand() */
    randSeed = rand();                    /* returns a non-negative integer */
    gsl_rng_set(gBaseRand, randSeed);    /* seed the PRNG */

    while(iter < iterations){

        cblas_dgemv(CblasRowMajor, CblasNoTrans, n, p, 1, X, p, bprev, 1, 0, mu, 1);
        printVector(n, mu, 8, 3, NULL, NULL, NULL, "       mu"); // width=8, precision=3

        for (i = 0; i < n; i++) {
            u = gsl_ran_flat(gBaseRand, 0, 1);
            if(y[i]==1){
                a = 0.0 - mu[i];
                b = highest_double;
            } else {
                a = lowest_double;
                b = 0.0 - mu[i];
            }
            z[i] = mu[i] + rTruncNorm(sigma, &u, a, b);            
        }

        printVector(n, z, 8, 3, NULL, NULL, NULL, "       mu"); // width=8, precision=3

    }
    gsl_rng_free(gBaseRand);
}



// gaussian truncated normal between [a,b], params - mu=0, sigma, unifrnd = u
double rTruncNorm(double sigma, double *u, double a, double b){

    double unifr = *u;
    double Fa = gsl_cdf_gaussian_P(a, sigma);
    double Fb = gsl_cdf_gaussian_P(b, sigma);
    double v = Fa + (Fb-Fa)*unifr;
    double x = gsl_cdf_gaussian_Pinv(v, sigma);
    return(x);
}

blaio.h 中的函数已经使用网站中的 makefile 构建,我是从那里得到的。

我正在使用以下内容进行编译:

R CMD SHLIB -lgsl -lgslcblas BinaryBayesianGroupLasso.c

它抛出以下错误:

gcc -arch x86_64 -std=gnu99 -I/Library/Frameworks/R.framework/Resources/include -I/Library/Frameworks/R.framework/Resources/include/x86_64 -DNDEBUG  -I/usr/local/include    -fPIC  -g -O2  -c BinaryBayesianGroupLasso.c -o BinaryBayesianGroupLasso.o
BinaryBayesianGroupLasso.c: In function ‘BinaryBayesianGroupLassoC’:
BinaryBayesianGroupLasso.c:36: error: nested functions are disabled, use -fnested-functions to re-enable
BinaryBayesianGroupLasso.c:36: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before ‘gBaseRand’
BinaryBayesianGroupLasso.c:37: warning: implicit declaration of function ‘time’
BinaryBayesianGroupLasso.c:41: warning: comparison between pointer and integer
BinaryBayesianGroupLasso.c:50: warning: implicit declaration of function ‘cblas_dgemv’
BinaryBayesianGroupLasso.c:50: error: ‘CblasRowMajor’ undeclared (first use in this function)
BinaryBayesianGroupLasso.c:50: error: (Each undeclared identifier is reported only once
BinaryBayesianGroupLasso.c:50: error: for each function it appears in.)
BinaryBayesianGroupLasso.c:50: error: ‘CblasNoTrans’ undeclared (first use in this function)
BinaryBayesianGroupLasso.c:50: error: ‘mu’ undeclared (first use in this function)
BinaryBayesianGroupLasso.c:51: warning: implicit declaration of function ‘printVector’
BinaryBayesianGroupLasso.c:70: error: ‘i’ undeclared (first use in this function)
BinaryBayesianGroupLasso.c:71: error: ‘u’ undeclared (first use in this function)
BinaryBayesianGroupLasso.c:72: error: ‘y’ undeclared (first use in this function)
BinaryBayesianGroupLasso.c:73: error: ‘a’ undeclared (first use in this function)
BinaryBayesianGroupLasso.c:74: error: ‘b’ undeclared (first use in this function)
BinaryBayesianGroupLasso.c:79: warning: implicit declaration of function ‘rTruncNorm’
BinaryBayesianGroupLasso.c:91: warning: passing argument 1 of ‘gsl_rng_free’ discards qualifiers from pointer target type
BinaryBayesianGroupLasso.c:92: warning: ‘return’ with a value, in function returning void
BinaryBayesianGroupLasso.c: At top level:
BinaryBayesianGroupLasso.c:98: error: conflicting types for ‘rTruncNorm’
BinaryBayesianGroupLasso.c:79: error: previous implicit declaration of ‘rTruncNorm’ was here
make: *** [BinaryBayesianGroupLasso.o] Error 1

我在 C 中没有嵌套函数。为什么会抛出这样的错误?如果是链接问题,它不应该抛出未找到函数的错误吗?

编辑:我被要求发布解决方案。在我的初始代码中,我没有编译上面的代码(而是另一个目录中的旧版本)。(是的,这是可能的最愚蠢的错误)。我正在重新发布代码的工作版本。除了查看 mcmc 采样步骤如何编码以及如何从 R 调用它的示例之外,我认为它没有太大价值。但是,就分组贝叶斯套索而言,此代码是不完整的。它还有代码(带有用于打印 blas 中使用的矩阵的来源)。我在大约一周内就开始使用 C 进行编码,所以对所有内容(打印代码除外)都持保留态度。如果您有更漂亮的版本(例如放置单独的文件以进行打印和编译命令),请继续进行更改。

我在这个练习中学到了 3 件事:

  • 从 R 调用 C

  • 使用 blas

  • 使用 gsl

代码将在稍后演变为解决第 4 点: - 从 C 调用 R

我必须在此模型中从广义逆高斯采样(此代码中排除)。但是,我看不到在 C 中执行此操作的简单函数。那时我可能不得不使用 R 的函数。所以变得很奇怪。R 调用此 C 函数,并且在此 C 函数中将调用其他一些 R 函数。如果有任何替代方法,您的意见将不胜感激,如果是这样,怎么做 - RInside?有什么容易的吗?

时间对我来说是最重要的。我必须用大约一百万个数据点运行这个模型,至少有 10,000 次 mcmc 迭代。我想在 5 分钟内完成,最好是 2 分钟。

本练习的第三阶段是并行化 z 中的采样步骤。

#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <stddef.h>
#include <limits.h>
#include <time.h> 
#include <ctype.h>              /* Char classes    ISOC  */
#include <string.h>             /* Strings         ISOC  */


#include <gsl/gsl_machine.h>
#include <gsl/gsl_rng.h>
#include <gsl/gsl_randist.h>
#include <gsl/gsl_cdf.h>
#include <gsl/gsl_cblas.h>

#include <R.h>
#include <Rmath.h>
#include <Rembedded.h>
#include <Rdefines.h>
//#include <R_ext/BLAS.h>
#include <R_ext/Lapack.h>
#include <R_ext/Linpack.h>
//#include <vecLib/cblas.h>      /* C BLAS          APPLE */
//#include "blaio.h"             /* Basic Linear Algebra I/O */


// source for the printing codes: //source: http://www.mitchr.me/SS/exampleCode/blas.html

#define REALT double

#ifndef REALT
#define REALT double
#endif

void sgeprt(int m, int n, REALT, char *c);
void dgeprt(int m, int n, REALT, char *c);

/* ****************************** - external functions from blaio.h.  */

void printVector(int n, REALT  *v,        /* Size and array                 */
                 int wide, int prec,      /* Width and precesion for floats */
                 char *pad,               /* Right pad string               */
                 char *ldel, char *rdel,  /* Left and right delimiter       */
                 char *tag                /* Tag for first line             */
  );

void printMatrix(const enum CBLAS_ORDER order, 
                 int n, int m, REALT  *a,  /* Size and array                 */
                 int wide, int prec,       /* Width and precesion for floats */
                 char *pad,                /* Right pad string               */
                 char *ldel, char *rdel,   /* Left and right delimiter       */
                 char *lidel, char *ridel, /* Left and right INNER delimiter */
                 char *tag                 /* Tag for first line             */
  );

int readMatrix(int *n, int *m,   /* Will contain size of the array after the read */
               REALT  *a,        /* Will point to the data */
               int maxEle,       /* Maximum number of elements to read */
               char *fileName    /* The file name to read the data from */
  );

void printMatrixThr(const enum CBLAS_ORDER order, 
                   int n, int m, REALT  *a,    /* Size and array                 */
                   char *inStr, char *outStr,  /* "in" string, and "out" string  */
                   REALT  minIn, REALT  maxIn, /* Min/Max values for "in" range. */
                   char *pad,                  /* Right pad string               */
                   char *ldel, char *rdel,     /* Left and right delimiter       */
                   char *lidel, char *ridel,   /* Left and right INNER delimiter */
                   char *tag                   /* Tag for first line             */
  );


double rTruncNorm (double, double*, double, double);

// C version of group lasso
void BinaryBayesianGroupLassoC (double *X, int *dim, int *y, int *iterations, double *bprev, double *Lprev) {  

    // X is design matrix with intercept
    //dim is dimensions of X
    int iter = 1, i;
    int n = dim[0];
    int p = dim[1];
    double sigma = 1, z[n], mu[n], u, a, b, *Li;

    // random number generation set up
    const double lowest_double = -GSL_DBL_MAX;
    const double highest_double = GSL_DBL_MAX;
    const gsl_rng *gBaseRand;       /* global rand number generator */
    unsigned long randSeed;         

    /* specifying to use Mersenne twister as the uniform PRNG */
    gBaseRand = gsl_rng_alloc(gsl_rng_mt19937);
    srand(time(NULL));                    /* initialization for rand() */
    randSeed = rand();                    /* returns a non-negative integer */
    gsl_rng_set(gBaseRand, randSeed);     /* seed the PRNG */

    while(iter < *iterations){

        cblas_dgemv(CblasRowMajor, CblasNoTrans, n, p, 1, X, p, bprev, 1, 0, mu, 1);
        printVector(n, mu, 8, 3, NULL, NULL, NULL, "       mu"); // width=8, precision=3

        for (i = 0; i < n; i++) {
            u = gsl_ran_flat(gBaseRand, 0, 1);
            if(y[i]==1){
                a = 0.0 - mu[i];
                b = highest_double;
            } else {
                a = lowest_double;
                b = 0.0 - mu[i];
            }
            z[i] = mu[i] + rTruncNorm(sigma, &u, a, b);            
        }

        printVector(n, z, 8, 3, NULL, NULL, NULL, "       z"); // width=8, precision=3                  
        iter = iter + 1;

    }
    gsl_rng_free(gBaseRand);    
}



// gaussian truncated normal between [a,b], params - mu=0, sigma, unifrnd = u
double rTruncNorm(double sigma, double *u, double a, double b){

    double unifr = *u;
    double Fa = gsl_cdf_gaussian_P(a, sigma);
    double Fb = gsl_cdf_gaussian_P(b, sigma);
    double v = Fa + (Fb-Fa)*unifr;
    double x = gsl_cdf_gaussian_Pinv(v, sigma);
    return(x);
}

/* function definitions from blaio.h *******/

void printMatrixUbr(const enum CBLAS_ORDER order, /* CBLAS row order                */
                    int n, int m, REALT  *a,      /* Size and array                 */
                    char *inStr, char *outStr,    /* "in" string, and "out" string  */
                    REALT  minIn, REALT  maxIn,   /* Min/Max values for "in" range. */
                    int wide, int prec,           /* Width and precesion for floats */
                    int *rowPerm, int *colPerm,
                    char prtMode, /* b=bitmap, V=values, *=in/out) */
                    char *fileName, 
                    int maskMode, // (L-below diag, U=above diag, D-Diagnal, M-Mask, 0=NONE, \0-NONE
                    char *mask,
                    char *pad,                    /* Right pad string               */
                    char *ldel, char *rdel,       /* Left and right delimiter       */
                    char *lidel, char *ridel,     /* Left and right INNER delimiter */
                    char *tag                     /* Tag for first line             */
  );

/* ************************************************************************** */

void printVector(int n, REALT  *v, int wide, int prec, char *pad, char *ldel, char *rdel, char *tag) {
  printMatrixUbr(CblasRowMajor, 1, n, v, NULL, NULL, 0.0, 0.0, wide, prec, NULL, NULL, 'V', NULL, '0', NULL, pad, ldel, rdel, "", "", tag);
} /* end func printVector */

/* ************************************************************************** */
void printMatrix(const enum CBLAS_ORDER order, int n, int m, REALT  *a, int wide, int prec, char *pad, char *ldel, char *rdel, char *lidel, char *ridel, char *tag) {
  printMatrixUbr(order, n, m, a, NULL, NULL, 0.0, 0.0, wide, prec, NULL, NULL, 'V', NULL, '0', NULL, pad, ldel, rdel, lidel, ridel, tag);
} /* end func printMatrix */

/* ************************************************************************** */
void printMatrixThr(const enum CBLAS_ORDER order, int n, int m, REALT  *a, char *inStr, char *outStr, REALT  minIn, REALT  maxIn, char *pad, char *ldel, char *rdel, char *lidel, char *ridel, char *tag) {
  printMatrixUbr(order, n, m, a, inStr, outStr, minIn, maxIn, 0, 0, NULL, NULL, '*', NULL, '0', NULL, pad, ldel, rdel, lidel, ridel, tag);
} /* end func printMatrixThr*/

/* ************************************************************************** */
void printMatrixUbr(const enum CBLAS_ORDER order, /* CBLAS row order                  */
                    int n, int m, REALT  *a,      /* Size and array                   */
                    char *inStr, char *outStr,    /* "in" string, and "out" string    */
                    REALT  minIn, REALT  maxIn,   /* Min/Max values for "in" range.   */
                    int wide, int prec,           /* Width and precesion for floats   */
                    int *rowPerm, int *colPerm,   /* Permute rows i->xx[i]            */
                    char prtMode,                 /* b=bitmap, V=values, *=in/out     */
                    char *fileName,               /* if NULL, stdout.                 */
                    int maskMode,                 /* L, U, D, M-Mask, 0=NONE          */
                    char *mask,                   /* Mask (same size as a) ctrl print */
                    char *pad,                    /* Right pad string                 */
                    char *ldel, char *rdel,       /* Left and right delimiter         */
                    char *lidel, char *ridel,     /* Left and right INNER delimiter   */
                    char *tag                     /* Tag for first line               */
  ) {
  int i, j, iP, jP;
  int k, ldelLen, tagLen, cIdx, prtPerMask;
  REALT  pVal;

  if(inStr == NULL)  inStr  = "*";
  if(outStr == NULL) outStr = " ";
  if(wide < 0)       wide   = 5;
  if(prec < 0)       prec   = 2;
  if(ldel == NULL)   ldel   = "[";
  if(ridel == NULL)  ridel  = "]";
  if(lidel == NULL)  lidel  = "[";
  if(rdel == NULL)   rdel   = "]";
  if(pad  == NULL)   pad    = " ";
  if(tag  == NULL)   tag    = "";

  ldelLen = strlen(ldel);
  tagLen = strlen(tag);
  for(j=0; j<n; j++) {
    if(j==0) 
      printf("%s%s%s%s", tag, ldel, lidel, pad);
    else {
      for(k=0;k<tagLen;k++) printf(" ");
      for(k=0;k<ldelLen;k++) printf(" ");
      printf("%s%s", lidel, pad);
    } /* end if/else */
    for(i=0; i<m; i++) {
      if(colPerm != NULL)
        iP = colPerm[i];
      else
        iP = i;
      if(rowPerm != NULL)
        jP = rowPerm[j];
      else
        jP = j;
      if(order == CblasColMajor)
        cIdx = n*iP+jP;
      else
        cIdx = m*jP+iP;
      pVal = a[cIdx];
      // Figure out what the mask has to do with printing..
      if(maskMode == '0')
        prtPerMask = 1;
      else if(maskMode == 'L')
        prtPerMask = (iP<jP);  // Row order specific!  Fix this.
      else if(maskMode == 'D')
        prtPerMask = (iP==jP);
      else if(maskMode == 'U')
        prtPerMask = (iP>jP);  // Row order specific!  Fix this.
      else if(maskMode == 'M')
        prtPerMask = mask[cIdx];
      else
        prtPerMask = 1;

      if(prtMode == '*') {
        if( prtPerMask && (pVal >= minIn) && (pVal <= maxIn) )
          printf("%s%s", inStr, pad);
        else 
          printf("%s%s", outStr, pad);
      } else {
        if(prtPerMask)
          printf("%*.*f%s", wide, prec, pVal, pad);
        else
          printf("%*s%s", wide, outStr, pad);
      } /* end if/else */
    } /* end for */
    if(j==n-1)
      printf("%s%s\n", ridel, rdel);
    else
      printf("%s\n", ridel);
  } /* end for */
} /* end func printMatrixUbr*/

/* ************************************************************************** */
int readMatrix(int *n, int *m, REALT  *a, int maxEle, char *fileName) {
  int i, j;
  int inComment;
  int numNumbers;
  int ch;
  int lengthOfNumber;
  char numberBuffer[256];
  FILE *FP;

  FP = fopen(fileName, "r");
  if(FP == NULL) 
    return 1;
  i = j = 0;
  inComment = 0;
  lengthOfNumber = -1;
  numNumbers = 0;
  while(EOF != (ch = getc(FP))) {
    if(ch == '#') inComment = 1;               /* Enter comment upon # char. */
    if(inComment && (ch < 20)) inComment = 0;  /* Break out of comment upon ANY control char. */
    if( !(inComment)) {
      if(isdigit(ch) || (ch=='.') || (ch=='E') || (ch=='e') || (ch=='-') | (ch=='+')) {
        lengthOfNumber++;
        numberBuffer[lengthOfNumber] = ch;
      } else {
        if(lengthOfNumber>=0) {
          numberBuffer[lengthOfNumber+1] = '\0';
          lengthOfNumber = -1;
          numNumbers++;
          if(numNumbers==1) {
            *n = atoi(numberBuffer);
          } else if(numNumbers==2) {
            *m = atoi(numberBuffer);
          } else if (numNumbers<maxEle) {
            a[numNumbers-3] = atof(numberBuffer);
          } else {
            return(-1);
          } /* end if/else */
        } /* end if */
      } /* end if/else */
    } /* end if */
  } /* end while */
  fclose(FP);
  return(numNumbers-2);
} /* end func main */

在 R 中模拟随机数据并调用 C 函数:

# simulate data
n = 1000000; p=30; iterations = 1000; burnin = 5;
X = as.data.frame(sapply(1:p, function(x) rnorm(n, 0, 1)))
Y = sample(c(0,1), n, replace=TRUE)



call_BinaryBayesianGroupLasso <- function(X,Y,iterations,burnin){

  # X is design matrix without intercept
  p = dim(X)[2]+1 # number of predictors including dummy vars and intercept
  n = dim(X)[1]
  z = rep(0,n)
  h = 1000 # hyperparameter for shrinkage (controls number of parameters in model)
  b = matrix(0, nrow=p, ncol=iterations) # store beta
  L = matrix(0, nrow=p, ncol=iterations) # store lambda in
  initmodel = glm(Y ~ ., data = X, family = binomial(link = "probit")) # full model initial estimates 

  # add intercept to design matrix
  X = cbind(rep(1,n), X) 
  colnames(X) = names(initmodel$coefficients)
  b[,1] = summary(initmodel)$coefficients[,1]
  L[,1] = summary(initmodel)$coefficients[,2]
  eye = diag(rep(1,n)) # used in mcmc step for b

  bprev=b[,1]
  Lprev=L[,1]

  # call C function for group lasso
  res = .C("BinaryBayesianGroupLassoC",
           as.double(as.matrix(X)),            
           as.integer(dim(X)),
           as.integer(Y), 
           as.integer(iterations),
           as.double(bprev),
           as.double(Lprev))

}
4

0 回答 0