我正在编写我的 R mcmc 代码的 C 版本。抛出错误的部分代码如下:
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <stddef.h>
#include <limits.h>
#include <gsl/gsl_machine.h>
#include <gsl/gsl_rng.h>
#include <gsl/gsl_randist.h>
#include <gsl/gsl_cdf.h>
#include <R.h>
#include <Rmath.h>
#include <Rembedded.h>
#include <Rdefines.h>
#include <R_ext/BLAS.h>
#include <R_ext/Lapack.h>
#include <R_ext/Linpack.h>
//#include <vecLib/cblas.h> /* C BLAS in APPLE */
#include "blaio.h" /*Linear Algebra I/O using http://www.mitchr.me/SS/exampleCode/blas.html*/
double rTruncNorm (double, double*, double, double);
// C version of group lasso
void BinaryBayesianGroupLasso (double *X, int *dim, int *y, int *iterations, double *bprev, double *Lprev) {
// X is design matrix with intercept
//dim is dimensions of X
int iter = 1, i;
int n = dim[0];
int p = dim[1];
double sigma = 1, z[n], mu[n], u, a, b;
// random number generation set up
const double lowest_double = -GSL_DBL_MAX;
const double highest_double = GSL_DBL_MAX;
const gsl_rng *gBaseRand; /* global rand number generator */
unsigned long randSeed;
/* specifying to use Mersenne twister as the PRNG */
gBaseRand = gsl_rng_alloc(gsl_rng_mt19937);
srand(time(NULL)); /* initialization for rand() */
randSeed = rand(); /* returns a non-negative integer */
gsl_rng_set(gBaseRand, randSeed); /* seed the PRNG */
while(iter < iterations){
cblas_dgemv(CblasRowMajor, CblasNoTrans, n, p, 1, X, p, bprev, 1, 0, mu, 1);
printVector(n, mu, 8, 3, NULL, NULL, NULL, " mu"); // width=8, precision=3
for (i = 0; i < n; i++) {
u = gsl_ran_flat(gBaseRand, 0, 1);
if(y[i]==1){
a = 0.0 - mu[i];
b = highest_double;
} else {
a = lowest_double;
b = 0.0 - mu[i];
}
z[i] = mu[i] + rTruncNorm(sigma, &u, a, b);
}
printVector(n, z, 8, 3, NULL, NULL, NULL, " mu"); // width=8, precision=3
}
gsl_rng_free(gBaseRand);
}
// gaussian truncated normal between [a,b], params - mu=0, sigma, unifrnd = u
double rTruncNorm(double sigma, double *u, double a, double b){
double unifr = *u;
double Fa = gsl_cdf_gaussian_P(a, sigma);
double Fb = gsl_cdf_gaussian_P(b, sigma);
double v = Fa + (Fb-Fa)*unifr;
double x = gsl_cdf_gaussian_Pinv(v, sigma);
return(x);
}
blaio.h 中的函数已经使用网站中的 makefile 构建,我是从那里得到的。
我正在使用以下内容进行编译:
R CMD SHLIB -lgsl -lgslcblas BinaryBayesianGroupLasso.c
它抛出以下错误:
gcc -arch x86_64 -std=gnu99 -I/Library/Frameworks/R.framework/Resources/include -I/Library/Frameworks/R.framework/Resources/include/x86_64 -DNDEBUG -I/usr/local/include -fPIC -g -O2 -c BinaryBayesianGroupLasso.c -o BinaryBayesianGroupLasso.o
BinaryBayesianGroupLasso.c: In function ‘BinaryBayesianGroupLassoC’:
BinaryBayesianGroupLasso.c:36: error: nested functions are disabled, use -fnested-functions to re-enable
BinaryBayesianGroupLasso.c:36: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before ‘gBaseRand’
BinaryBayesianGroupLasso.c:37: warning: implicit declaration of function ‘time’
BinaryBayesianGroupLasso.c:41: warning: comparison between pointer and integer
BinaryBayesianGroupLasso.c:50: warning: implicit declaration of function ‘cblas_dgemv’
BinaryBayesianGroupLasso.c:50: error: ‘CblasRowMajor’ undeclared (first use in this function)
BinaryBayesianGroupLasso.c:50: error: (Each undeclared identifier is reported only once
BinaryBayesianGroupLasso.c:50: error: for each function it appears in.)
BinaryBayesianGroupLasso.c:50: error: ‘CblasNoTrans’ undeclared (first use in this function)
BinaryBayesianGroupLasso.c:50: error: ‘mu’ undeclared (first use in this function)
BinaryBayesianGroupLasso.c:51: warning: implicit declaration of function ‘printVector’
BinaryBayesianGroupLasso.c:70: error: ‘i’ undeclared (first use in this function)
BinaryBayesianGroupLasso.c:71: error: ‘u’ undeclared (first use in this function)
BinaryBayesianGroupLasso.c:72: error: ‘y’ undeclared (first use in this function)
BinaryBayesianGroupLasso.c:73: error: ‘a’ undeclared (first use in this function)
BinaryBayesianGroupLasso.c:74: error: ‘b’ undeclared (first use in this function)
BinaryBayesianGroupLasso.c:79: warning: implicit declaration of function ‘rTruncNorm’
BinaryBayesianGroupLasso.c:91: warning: passing argument 1 of ‘gsl_rng_free’ discards qualifiers from pointer target type
BinaryBayesianGroupLasso.c:92: warning: ‘return’ with a value, in function returning void
BinaryBayesianGroupLasso.c: At top level:
BinaryBayesianGroupLasso.c:98: error: conflicting types for ‘rTruncNorm’
BinaryBayesianGroupLasso.c:79: error: previous implicit declaration of ‘rTruncNorm’ was here
make: *** [BinaryBayesianGroupLasso.o] Error 1
我在 C 中没有嵌套函数。为什么会抛出这样的错误?如果是链接问题,它不应该抛出未找到函数的错误吗?
编辑:我被要求发布解决方案。在我的初始代码中,我没有编译上面的代码(而是另一个目录中的旧版本)。(是的,这是可能的最愚蠢的错误)。我正在重新发布代码的工作版本。除了查看 mcmc 采样步骤如何编码以及如何从 R 调用它的示例之外,我认为它没有太大价值。但是,就分组贝叶斯套索而言,此代码是不完整的。它还有代码(带有用于打印 blas 中使用的矩阵的来源)。我在大约一周内就开始使用 C 进行编码,所以对所有内容(打印代码除外)都持保留态度。如果您有更漂亮的版本(例如放置单独的文件以进行打印和编译命令),请继续进行更改。
我在这个练习中学到了 3 件事:
从 R 调用 C
使用 blas
使用 gsl
代码将在稍后演变为解决第 4 点: - 从 C 调用 R
我必须在此模型中从广义逆高斯采样(此代码中排除)。但是,我看不到在 C 中执行此操作的简单函数。那时我可能不得不使用 R 的函数。所以变得很奇怪。R 调用此 C 函数,并且在此 C 函数中将调用其他一些 R 函数。如果有任何替代方法,您的意见将不胜感激,如果是这样,怎么做 - RInside?有什么容易的吗?
时间对我来说是最重要的。我必须用大约一百万个数据点运行这个模型,至少有 10,000 次 mcmc 迭代。我想在 5 分钟内完成,最好是 2 分钟。
本练习的第三阶段是并行化 z 中的采样步骤。
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <stddef.h>
#include <limits.h>
#include <time.h>
#include <ctype.h> /* Char classes ISOC */
#include <string.h> /* Strings ISOC */
#include <gsl/gsl_machine.h>
#include <gsl/gsl_rng.h>
#include <gsl/gsl_randist.h>
#include <gsl/gsl_cdf.h>
#include <gsl/gsl_cblas.h>
#include <R.h>
#include <Rmath.h>
#include <Rembedded.h>
#include <Rdefines.h>
//#include <R_ext/BLAS.h>
#include <R_ext/Lapack.h>
#include <R_ext/Linpack.h>
//#include <vecLib/cblas.h> /* C BLAS APPLE */
//#include "blaio.h" /* Basic Linear Algebra I/O */
// source for the printing codes: //source: http://www.mitchr.me/SS/exampleCode/blas.html
#define REALT double
#ifndef REALT
#define REALT double
#endif
void sgeprt(int m, int n, REALT, char *c);
void dgeprt(int m, int n, REALT, char *c);
/* ****************************** - external functions from blaio.h. */
void printVector(int n, REALT *v, /* Size and array */
int wide, int prec, /* Width and precesion for floats */
char *pad, /* Right pad string */
char *ldel, char *rdel, /* Left and right delimiter */
char *tag /* Tag for first line */
);
void printMatrix(const enum CBLAS_ORDER order,
int n, int m, REALT *a, /* Size and array */
int wide, int prec, /* Width and precesion for floats */
char *pad, /* Right pad string */
char *ldel, char *rdel, /* Left and right delimiter */
char *lidel, char *ridel, /* Left and right INNER delimiter */
char *tag /* Tag for first line */
);
int readMatrix(int *n, int *m, /* Will contain size of the array after the read */
REALT *a, /* Will point to the data */
int maxEle, /* Maximum number of elements to read */
char *fileName /* The file name to read the data from */
);
void printMatrixThr(const enum CBLAS_ORDER order,
int n, int m, REALT *a, /* Size and array */
char *inStr, char *outStr, /* "in" string, and "out" string */
REALT minIn, REALT maxIn, /* Min/Max values for "in" range. */
char *pad, /* Right pad string */
char *ldel, char *rdel, /* Left and right delimiter */
char *lidel, char *ridel, /* Left and right INNER delimiter */
char *tag /* Tag for first line */
);
double rTruncNorm (double, double*, double, double);
// C version of group lasso
void BinaryBayesianGroupLassoC (double *X, int *dim, int *y, int *iterations, double *bprev, double *Lprev) {
// X is design matrix with intercept
//dim is dimensions of X
int iter = 1, i;
int n = dim[0];
int p = dim[1];
double sigma = 1, z[n], mu[n], u, a, b, *Li;
// random number generation set up
const double lowest_double = -GSL_DBL_MAX;
const double highest_double = GSL_DBL_MAX;
const gsl_rng *gBaseRand; /* global rand number generator */
unsigned long randSeed;
/* specifying to use Mersenne twister as the uniform PRNG */
gBaseRand = gsl_rng_alloc(gsl_rng_mt19937);
srand(time(NULL)); /* initialization for rand() */
randSeed = rand(); /* returns a non-negative integer */
gsl_rng_set(gBaseRand, randSeed); /* seed the PRNG */
while(iter < *iterations){
cblas_dgemv(CblasRowMajor, CblasNoTrans, n, p, 1, X, p, bprev, 1, 0, mu, 1);
printVector(n, mu, 8, 3, NULL, NULL, NULL, " mu"); // width=8, precision=3
for (i = 0; i < n; i++) {
u = gsl_ran_flat(gBaseRand, 0, 1);
if(y[i]==1){
a = 0.0 - mu[i];
b = highest_double;
} else {
a = lowest_double;
b = 0.0 - mu[i];
}
z[i] = mu[i] + rTruncNorm(sigma, &u, a, b);
}
printVector(n, z, 8, 3, NULL, NULL, NULL, " z"); // width=8, precision=3
iter = iter + 1;
}
gsl_rng_free(gBaseRand);
}
// gaussian truncated normal between [a,b], params - mu=0, sigma, unifrnd = u
double rTruncNorm(double sigma, double *u, double a, double b){
double unifr = *u;
double Fa = gsl_cdf_gaussian_P(a, sigma);
double Fb = gsl_cdf_gaussian_P(b, sigma);
double v = Fa + (Fb-Fa)*unifr;
double x = gsl_cdf_gaussian_Pinv(v, sigma);
return(x);
}
/* function definitions from blaio.h *******/
void printMatrixUbr(const enum CBLAS_ORDER order, /* CBLAS row order */
int n, int m, REALT *a, /* Size and array */
char *inStr, char *outStr, /* "in" string, and "out" string */
REALT minIn, REALT maxIn, /* Min/Max values for "in" range. */
int wide, int prec, /* Width and precesion for floats */
int *rowPerm, int *colPerm,
char prtMode, /* b=bitmap, V=values, *=in/out) */
char *fileName,
int maskMode, // (L-below diag, U=above diag, D-Diagnal, M-Mask, 0=NONE, \0-NONE
char *mask,
char *pad, /* Right pad string */
char *ldel, char *rdel, /* Left and right delimiter */
char *lidel, char *ridel, /* Left and right INNER delimiter */
char *tag /* Tag for first line */
);
/* ************************************************************************** */
void printVector(int n, REALT *v, int wide, int prec, char *pad, char *ldel, char *rdel, char *tag) {
printMatrixUbr(CblasRowMajor, 1, n, v, NULL, NULL, 0.0, 0.0, wide, prec, NULL, NULL, 'V', NULL, '0', NULL, pad, ldel, rdel, "", "", tag);
} /* end func printVector */
/* ************************************************************************** */
void printMatrix(const enum CBLAS_ORDER order, int n, int m, REALT *a, int wide, int prec, char *pad, char *ldel, char *rdel, char *lidel, char *ridel, char *tag) {
printMatrixUbr(order, n, m, a, NULL, NULL, 0.0, 0.0, wide, prec, NULL, NULL, 'V', NULL, '0', NULL, pad, ldel, rdel, lidel, ridel, tag);
} /* end func printMatrix */
/* ************************************************************************** */
void printMatrixThr(const enum CBLAS_ORDER order, int n, int m, REALT *a, char *inStr, char *outStr, REALT minIn, REALT maxIn, char *pad, char *ldel, char *rdel, char *lidel, char *ridel, char *tag) {
printMatrixUbr(order, n, m, a, inStr, outStr, minIn, maxIn, 0, 0, NULL, NULL, '*', NULL, '0', NULL, pad, ldel, rdel, lidel, ridel, tag);
} /* end func printMatrixThr*/
/* ************************************************************************** */
void printMatrixUbr(const enum CBLAS_ORDER order, /* CBLAS row order */
int n, int m, REALT *a, /* Size and array */
char *inStr, char *outStr, /* "in" string, and "out" string */
REALT minIn, REALT maxIn, /* Min/Max values for "in" range. */
int wide, int prec, /* Width and precesion for floats */
int *rowPerm, int *colPerm, /* Permute rows i->xx[i] */
char prtMode, /* b=bitmap, V=values, *=in/out */
char *fileName, /* if NULL, stdout. */
int maskMode, /* L, U, D, M-Mask, 0=NONE */
char *mask, /* Mask (same size as a) ctrl print */
char *pad, /* Right pad string */
char *ldel, char *rdel, /* Left and right delimiter */
char *lidel, char *ridel, /* Left and right INNER delimiter */
char *tag /* Tag for first line */
) {
int i, j, iP, jP;
int k, ldelLen, tagLen, cIdx, prtPerMask;
REALT pVal;
if(inStr == NULL) inStr = "*";
if(outStr == NULL) outStr = " ";
if(wide < 0) wide = 5;
if(prec < 0) prec = 2;
if(ldel == NULL) ldel = "[";
if(ridel == NULL) ridel = "]";
if(lidel == NULL) lidel = "[";
if(rdel == NULL) rdel = "]";
if(pad == NULL) pad = " ";
if(tag == NULL) tag = "";
ldelLen = strlen(ldel);
tagLen = strlen(tag);
for(j=0; j<n; j++) {
if(j==0)
printf("%s%s%s%s", tag, ldel, lidel, pad);
else {
for(k=0;k<tagLen;k++) printf(" ");
for(k=0;k<ldelLen;k++) printf(" ");
printf("%s%s", lidel, pad);
} /* end if/else */
for(i=0; i<m; i++) {
if(colPerm != NULL)
iP = colPerm[i];
else
iP = i;
if(rowPerm != NULL)
jP = rowPerm[j];
else
jP = j;
if(order == CblasColMajor)
cIdx = n*iP+jP;
else
cIdx = m*jP+iP;
pVal = a[cIdx];
// Figure out what the mask has to do with printing..
if(maskMode == '0')
prtPerMask = 1;
else if(maskMode == 'L')
prtPerMask = (iP<jP); // Row order specific! Fix this.
else if(maskMode == 'D')
prtPerMask = (iP==jP);
else if(maskMode == 'U')
prtPerMask = (iP>jP); // Row order specific! Fix this.
else if(maskMode == 'M')
prtPerMask = mask[cIdx];
else
prtPerMask = 1;
if(prtMode == '*') {
if( prtPerMask && (pVal >= minIn) && (pVal <= maxIn) )
printf("%s%s", inStr, pad);
else
printf("%s%s", outStr, pad);
} else {
if(prtPerMask)
printf("%*.*f%s", wide, prec, pVal, pad);
else
printf("%*s%s", wide, outStr, pad);
} /* end if/else */
} /* end for */
if(j==n-1)
printf("%s%s\n", ridel, rdel);
else
printf("%s\n", ridel);
} /* end for */
} /* end func printMatrixUbr*/
/* ************************************************************************** */
int readMatrix(int *n, int *m, REALT *a, int maxEle, char *fileName) {
int i, j;
int inComment;
int numNumbers;
int ch;
int lengthOfNumber;
char numberBuffer[256];
FILE *FP;
FP = fopen(fileName, "r");
if(FP == NULL)
return 1;
i = j = 0;
inComment = 0;
lengthOfNumber = -1;
numNumbers = 0;
while(EOF != (ch = getc(FP))) {
if(ch == '#') inComment = 1; /* Enter comment upon # char. */
if(inComment && (ch < 20)) inComment = 0; /* Break out of comment upon ANY control char. */
if( !(inComment)) {
if(isdigit(ch) || (ch=='.') || (ch=='E') || (ch=='e') || (ch=='-') | (ch=='+')) {
lengthOfNumber++;
numberBuffer[lengthOfNumber] = ch;
} else {
if(lengthOfNumber>=0) {
numberBuffer[lengthOfNumber+1] = '\0';
lengthOfNumber = -1;
numNumbers++;
if(numNumbers==1) {
*n = atoi(numberBuffer);
} else if(numNumbers==2) {
*m = atoi(numberBuffer);
} else if (numNumbers<maxEle) {
a[numNumbers-3] = atof(numberBuffer);
} else {
return(-1);
} /* end if/else */
} /* end if */
} /* end if/else */
} /* end if */
} /* end while */
fclose(FP);
return(numNumbers-2);
} /* end func main */
在 R 中模拟随机数据并调用 C 函数:
# simulate data
n = 1000000; p=30; iterations = 1000; burnin = 5;
X = as.data.frame(sapply(1:p, function(x) rnorm(n, 0, 1)))
Y = sample(c(0,1), n, replace=TRUE)
call_BinaryBayesianGroupLasso <- function(X,Y,iterations,burnin){
# X is design matrix without intercept
p = dim(X)[2]+1 # number of predictors including dummy vars and intercept
n = dim(X)[1]
z = rep(0,n)
h = 1000 # hyperparameter for shrinkage (controls number of parameters in model)
b = matrix(0, nrow=p, ncol=iterations) # store beta
L = matrix(0, nrow=p, ncol=iterations) # store lambda in
initmodel = glm(Y ~ ., data = X, family = binomial(link = "probit")) # full model initial estimates
# add intercept to design matrix
X = cbind(rep(1,n), X)
colnames(X) = names(initmodel$coefficients)
b[,1] = summary(initmodel)$coefficients[,1]
L[,1] = summary(initmodel)$coefficients[,2]
eye = diag(rep(1,n)) # used in mcmc step for b
bprev=b[,1]
Lprev=L[,1]
# call C function for group lasso
res = .C("BinaryBayesianGroupLassoC",
as.double(as.matrix(X)),
as.integer(dim(X)),
as.integer(Y),
as.integer(iterations),
as.double(bprev),
as.double(Lprev))
}