我在quantreg
win7 64bit 和 linux Mint Maya 64bit 上都使用库,我意识到速度差异很大。
我正在使用用 Fortran 编写的 Frisch-Newton 方法,并且我只测量 .Fortran() 调用的速度。
我正在分析长度为 1600 的向量。
在 Windows 上大约需要 2.5 秒,在 Linux 上大约需要 22 秒。而且我真的不明白为什么..
在此之后,我尝试通过 F2C 将 Fortran 代码翻译成 C 代码,并使用 clapack 函数将其编译成动态库。我通过 .C() 调用了这个函数,我再次测量了时间效率:
在 Windows 上需要 8 秒,在 Linux 上需要 22.5 秒。我真的很困惑。
一切都在 I7-3770、16GB RAM 上完成。
有人可以向我解释为什么它不起作用吗?
这是代码:
Fortran 代码:
C Output from Public domain Ratfor, version 1.0
subroutine rqfnb(n,p,a,y,rhs,d,u,beta,eps,wn,wp,nit,info)
integer n,p,info,nit(3)
double precision a(p,n),y(n),rhs(p),d(n),u(n),wn(n,9),wp(p,p+3)
double precision one,beta,eps
parameter( one = 1.0d0)
call lpfnb(n,p,a,y,rhs,d,u,beta,eps,wn(1,1),wn(1,2), wp(1,1),wn(1,
*3),wn(1,4),wn(1,5),wn(1,6), wp(1,2),wn(1,7),wn(1,8),wn(1,9),wp(1,3
*),wp(1,4),nit,info)
return
end
subroutine lpfnb(n,p,a,c,b,d,u,beta,eps,x,s,y,z,w, dx,ds,dy,dz,dw,
*dr,rhs,ada,nit,info)
integer n,p,pp,i,info,nit(3),maxit
double precision a(p,n),c(n),b(p)
double precision zero,one,mone,big,ddot,dmax1,dmin1,dxdz,dsdw
double precision deltap,deltad,beta,eps,mu,gap,g
double precision x(n),u(n),s(n),y(p),z(n),w(n),d(n),rhs(p),ada(p,p
*)
double precision dx(n),ds(n),dy(p),dz(n),dw(n),dr(n)
parameter( zero = 0.0d0)
parameter( one = 1.0d0)
parameter( mone = -1.0d0)
parameter( big = 1.0d+20)
parameter( maxit = 50)
nit(1)=0
nit(2)=0
nit(3)=n
pp=p*p
call dgemv('N',p,n,one,a,p,c,1,zero,y,1)
do23000 i=1,n
d(i)=one
23000 continue
23001 continue
call stepy(n,p,a,d,y,ada,info)
if(info .ne. 0)then
return
endif
call dcopy(n,c,1,s,1)
call dgemv('T',p,n,mone,a,p,y,1,one,s,1)
do23004 i=1,n
if(dabs(s(i)).lt.eps)then
z(i)=dmax1(s(i), zero) + eps
w(i)=dmax1(-s(i),zero) + eps
else
z(i)=dmax1(s(i), zero)
w(i)=dmax1(-s(i),zero)
endif
s(i)=u(i)-x(i)
23004 continue
23005 continue
gap = ddot(n,z,1,x,1)+ddot(n,w,1,s,1)
23008 if(gap .gt. eps .and. nit(1).lt.maxit)then
nit(1)=nit(1)+1
do23010 i = 1,n
d(i) = one/(z(i)/x(i) + w(i)/s(i))
ds(i)=z(i)-w(i)
dz(i)=d(i)*ds(i)
23010 continue
23011 continue
call dcopy(p,b,1,dy,1)
call dgemv('N',p,n,mone,a,p,x,1,one,dy,1)
call dgemv('N',p,n,one,a,p,dz,1,one,dy,1)
call dcopy(p,dy,1,rhs,1)
call stepy(n,p,a,d,dy,ada,info)
if(info .ne. 0)then
return
endif
call dgemv('T',p,n,one,a,p,dy,1,mone,ds,1)
deltap=big
deltad=big
do23014 i=1,n
dx(i)=d(i)*ds(i)
ds(i)=-dx(i)
dz(i)=-z(i)*(dx(i)/x(i) + one)
dw(i)=-w(i)*(ds(i)/s(i) + one)
if(dx(i).lt.0)then
deltap=dmin1(deltap,-x(i)/dx(i))
endif
if(ds(i).lt.0)then
deltap=dmin1(deltap,-s(i)/ds(i))
endif
if(dz(i).lt.0)then
deltad=dmin1(deltad,-z(i)/dz(i))
endif
if(dw(i).lt.0)then
deltad=dmin1(deltad,-w(i)/dw(i))
endif
23014 continue
23015 continue
deltap=dmin1(beta*deltap,one)
deltad=dmin1(beta*deltad,one)
if(min(deltap,deltad) .lt. one)then
nit(2)=nit(2)+1
mu = ddot(n,x,1,z,1)+ddot(n,s,1,w,1)
g = mu + deltap*ddot(n,dx,1,z,1)+ deltad*ddot(n,dz,1,x,1) + deltap
**deltad*ddot(n,dz,1,dx,1)+ deltap*ddot(n,ds,1,w,1)+ deltad*ddot(n,
*dw,1,s,1) + deltap*deltad*ddot(n,ds,1,dw,1)
mu = mu * ((g/mu)**3) /dfloat(2*n)
do23026 i=1,n
dr(i)=d(i)*(mu*(1/s(i)-1/x(i))+ dx(i)*dz(i)/x(i)-ds(i)*dw(i)/s(i))
23026 continue
23027 continue
call dswap(p,rhs,1,dy,1)
call dgemv('N',p,n,one,a,p,dr,1,one,dy,1)
call dpotrs('U',p,1,ada,p,dy,p,info)
call dgemv('T',p,n,one,a,p,dy,1,zero,u,1)
deltap=big
deltad=big
do23028 i=1,n
dxdz = dx(i)*dz(i)
dsdw = ds(i)*dw(i)
dx(i)= d(i)*(u(i)-z(i)+w(i))-dr(i)
ds(i)= -dx(i)
dz(i)= -z(i)+(mu - z(i)*dx(i) - dxdz)/x(i)
dw(i)= -w(i)+(mu - w(i)*ds(i) - dsdw)/s(i)
if(dx(i).lt.0)then
deltap=dmin1(deltap,-x(i)/dx(i))
endif
if(ds(i).lt.0)then
deltap=dmin1(deltap,-s(i)/ds(i))
endif
if(dz(i).lt.0)then
deltad=dmin1(deltad,-z(i)/dz(i))
endif
if(dw(i).lt.0)then
deltad=dmin1(deltad,-w(i)/dw(i))
endif
23028 continue
23029 continue
deltap=dmin1(beta*deltap,one)
deltad=dmin1(beta*deltad,one)
endif
call daxpy(n,deltap,dx,1,x,1)
call daxpy(n,deltap,ds,1,s,1)
call daxpy(p,deltad,dy,1,y,1)
call daxpy(n,deltad,dz,1,z,1)
call daxpy(n,deltad,dw,1,w,1)
gap = ddot(n,z,1,x,1)+ddot(n,w,1,s,1)
goto 23008
endif
23009 continue
call daxpy(n,mone,w,1,z,1)
call dswap(n,z,1,x,1)
return
end
subroutine stepy(n,p,a,d,b,ada,info)
integer n,p,pp,i,info
double precision a(p,n),b(p),d(n),ada(p,p),zero
parameter( zero = 0.0d0)
pp=p*p
do23038 j=1,p
do23040 k=1,p
ada(j,k)=zero
23040 continue
23041 continue
23038 continue
23039 continue
do23042 i=1,n
call dsyr('U',p,d(i),a(1,i),1,ada,p)
23042 continue
23043 continue
call dposv('U',p,1,ada,p,b,p,info)
return
end
C代码:
/* rqfnb.f -- translated by f2c (version 20090411).
You must link the resulting object file with libf2c:
on Microsoft Windows system, link with libf2c.lib;
on Linux or Unix systems, link with .../path/to/libf2c.a -lm
or, if you install libf2c.a in a standard place, with -lf2c -lm
-- in that order, at the end of the command line, as in
cc *.o -lf2c -lm
Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
http://www.netlib.org/f2c/libf2c.zip
*/
#include "f2c.h"
#include "clapack.h"
/* Table of constant values */
static doublereal c_b4 = 1.;
static integer c__1 = 1;
static doublereal c_b6 = 0.;
static doublereal c_b13 = -1.;
/* Output from Public domain Ratfor, version 1.0 */
/* Subroutine */ int rqfnb_(integer *n, integer *p, doublereal *a, doublereal
*y, doublereal *rhs, doublereal *d__, doublereal *u, doublereal *beta,
doublereal *eps, doublereal *wn, doublereal *wp, integer *nit,
integer *info)
{
/* System generated locals */
integer a_dim1, a_offset, wn_dim1, wn_offset, wp_dim1, wp_offset;
/* Local variables */
extern /* Subroutine */ int lpfnb_(integer *, integer *, doublereal *,
doublereal *, doublereal *, doublereal *, doublereal *,
doublereal *, doublereal *, doublereal *, doublereal *,
doublereal *, doublereal *, doublereal *, doublereal *,
doublereal *, doublereal *, doublereal *, doublereal *,
doublereal *, doublereal *, doublereal *, integer *, integer *);
/* Parameter adjustments */
wn_dim1 = *n;
wn_offset = 1 + wn_dim1;
wn -= wn_offset;
--u;
--d__;
--y;
wp_dim1 = *p;
wp_offset = 1 + wp_dim1;
wp -= wp_offset;
--rhs;
a_dim1 = *p;
a_offset = 1 + a_dim1;
a -= a_offset;
--nit;
/* Function Body */
lpfnb_(n, p, &a[a_offset], &y[1], &rhs[1], &d__[1], &u[1], beta, eps, &wn[
wn_dim1 + 1], &wn[(wn_dim1 << 1) + 1], &wp[wp_dim1 + 1], &wn[
wn_dim1 * 3 + 1], &wn[(wn_dim1 << 2) + 1], &wn[wn_dim1 * 5 + 1], &
wn[wn_dim1 * 6 + 1], &wp[(wp_dim1 << 1) + 1], &wn[wn_dim1 * 7 + 1]
, &wn[(wn_dim1 << 3) + 1], &wn[wn_dim1 * 9 + 1], &wp[wp_dim1 * 3
+ 1], &wp[(wp_dim1 << 2) + 1], &nit[1], info);
return 0;
} /* rqfnb_ */
/* Subroutine */ int lpfnb_(integer *n, integer *p, doublereal *a, doublereal
*c__, doublereal *b, doublereal *d__, doublereal *u, doublereal *beta,
doublereal *eps, doublereal *x, doublereal *s, doublereal *y,
doublereal *z__, doublereal *w, doublereal *dx, doublereal *ds,
doublereal *dy, doublereal *dz, doublereal *dw, doublereal *dr,
doublereal *rhs, doublereal *ada, integer *nit, integer *info)
{
/* System generated locals */
integer a_dim1, a_offset, ada_dim1, ada_offset, i__1;
doublereal d__1, d__2;
/* Local variables */
static doublereal g;
static integer i__;
static doublereal mu, gap;
extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *,
integer *);
static doublereal dsdw, dxdz;
extern /* Subroutine */ int stepy_(integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *);
static doublereal deltad, deltap;
/* Parameter adjustments */
--dr;
--dw;
--dz;
--ds;
--dx;
--w;
--z__;
--s;
--x;
--u;
--d__;
--c__;
ada_dim1 = *p;
ada_offset = 1 + ada_dim1;
ada -= ada_offset;
--rhs;
--dy;
--y;
--b;
a_dim1 = *p;
a_offset = 1 + a_dim1;
a -= a_offset;
--nit;
/* Function Body */
nit[1] = 0;
nit[2] = 0;
nit[3] = *n;
dgemv_("N", p, n, &c_b4, &a[a_offset], p, &c__[1], &c__1, &c_b6, &y[1], &c__1);
i__1 = *n;
for (i__ = 1; i__ <= i__1; ++i__) {
d__[i__] = 1.;
/* L23000: */
}
/* L23001: */
stepy_(n, p, &a[a_offset], &d__[1], &y[1], &ada[ada_offset], info);
if (*info != 0) {
return 0;
}
dcopy_(n, &c__[1], &c__1, &s[1], &c__1);
dgemv_("T", p, n, &c_b13, &a[a_offset], p, &y[1], &c__1, &c_b4, &s[1], &c__1);
i__1 = *n;
for (i__ = 1; i__ <= i__1; ++i__) {
if ((d__1 = s[i__], abs(d__1)) < *eps) {
/* Computing MAX */
d__1 = s[i__];
z__[i__] = max(d__1,0.) + *eps;
/* Computing MAX */
d__1 = -s[i__];
w[i__] = max(d__1,0.) + *eps;
} else {
/* Computing MAX */
d__1 = s[i__];
z__[i__] = max(d__1,0.);
/* Computing MAX */
d__1 = -s[i__];
w[i__] = max(d__1,0.);
}
s[i__] = u[i__] - x[i__];
/* L23004: */
}
/* L23005: */
gap = ddot_(n, &z__[1], &c__1, &x[1], &c__1) + ddot_(n, &w[1], &c__1, &s[
1], &c__1);
L23008:
if (gap > *eps && nit[1] < 50) {
++nit[1];
i__1 = *n;
for (i__ = 1; i__ <= i__1; ++i__) {
d__[i__] = 1. / (z__[i__] / x[i__] + w[i__] / s[i__]);
ds[i__] = z__[i__] - w[i__];
dz[i__] = d__[i__] * ds[i__];
/* L23010: */
}
/* L23011: */
dcopy_(p, &b[1], &c__1, &dy[1], &c__1);
dgemv_("N", p, n, &c_b13, &a[a_offset], p, &x[1], &c__1, &c_b4, &dy[1], &c__1);
dgemv_("N", p, n, &c_b4, &a[a_offset], p, &dz[1], &c__1, &c_b4, &dy[1], &c__1);
dcopy_(p, &dy[1], &c__1, &rhs[1], &c__1);
stepy_(n, p, &a[a_offset], &d__[1], &dy[1], &ada[ada_offset], info);
if (*info != 0) {
return 0;
}
dgemv_("T", p, n, &c_b4, &a[a_offset], p, &dy[1], &c__1, &c_b13, &ds[1], &c__1);
deltap = 1e20;
deltad = 1e20;
i__1 = *n;
for (i__ = 1; i__ <= i__1; ++i__) {
dx[i__] = d__[i__] * ds[i__];
ds[i__] = -dx[i__];
dz[i__] = -z__[i__] * (dx[i__] / x[i__] + 1.);
dw[i__] = -w[i__] * (ds[i__] / s[i__] + 1.);
if (dx[i__] < 0.) {
/* Computing MIN */
d__1 = deltap, d__2 = -x[i__] / dx[i__];
deltap = min(d__1,d__2);
}
if (ds[i__] < 0.) {
/* Computing MIN */
d__1 = deltap, d__2 = -s[i__] / ds[i__];
deltap = min(d__1,d__2);
}
if (dz[i__] < 0.) {
/* Computing MIN */
d__1 = deltad, d__2 = -z__[i__] / dz[i__];
deltad = min(d__1,d__2);
}
if (dw[i__] < 0.) {
/* Computing MIN */
d__1 = deltad, d__2 = -w[i__] / dw[i__];
deltad = min(d__1,d__2);
}
/* L23014: */
}
/* L23015: */
/* Computing MIN */
d__1 = *beta * deltap;
deltap = min(d__1,1.);
/* Computing MIN */
d__1 = *beta * deltad;
deltad = min(d__1,1.);
if (min(deltap,deltad) < 1.) {
++nit[2];
mu = ddot_(n, &x[1], &c__1, &z__[1], &c__1) + ddot_(n, &s[1], &
c__1, &w[1], &c__1);
g = mu + deltap * ddot_(n, &dx[1], &c__1, &z__[1], &c__1) +
deltad * ddot_(n, &dz[1], &c__1, &x[1], &c__1) + deltap *
deltad * ddot_(n, &dz[1], &c__1, &dx[1], &c__1) + deltap *
ddot_(n, &ds[1], &c__1, &w[1], &c__1) + deltad * ddot_(n,
&dw[1], &c__1, &s[1], &c__1) + deltap * deltad * ddot_(n,
&ds[1], &c__1, &dw[1], &c__1);
/* Computing 3rd power */
d__1 = g / mu;
mu = mu * (d__1 * (d__1 * d__1)) / (doublereal) (*n << 1);
i__1 = *n;
for (i__ = 1; i__ <= i__1; ++i__) {
dr[i__] = d__[i__] * (mu * (1 / s[i__] - 1 / x[i__]) + dx[i__]
* dz[i__] / x[i__] - ds[i__] * dw[i__] / s[i__]);
/* L23026: */
}
/* L23027: */
dswap_(p, &rhs[1], &c__1, &dy[1], &c__1);
dgemv_("N", p, n, &c_b4, &a[a_offset], p, &dr[1], &c__1, &c_b4, &dy[1], &c__1);
dpotrs_("U", p, &c__1, &ada[ada_offset], p, &dy[1], p, info);
dgemv_("T", p, n, &c_b4, &a[a_offset], p, &dy[1], &c__1, &c_b6, &u[1], &c__1);
deltap = 1e20;
deltad = 1e20;
i__1 = *n;
for (i__ = 1; i__ <= i__1; ++i__) {
dxdz = dx[i__] * dz[i__];
dsdw = ds[i__] * dw[i__];
dx[i__] = d__[i__] * (u[i__] - z__[i__] + w[i__]) - dr[i__];
ds[i__] = -dx[i__];
dz[i__] = -z__[i__] + (mu - z__[i__] * dx[i__] - dxdz) / x[
i__];
dw[i__] = -w[i__] + (mu - w[i__] * ds[i__] - dsdw) / s[i__];
if (dx[i__] < 0.) {
/* Computing MIN */
d__1 = deltap, d__2 = -x[i__] / dx[i__];
deltap = min(d__1,d__2);
}
if (ds[i__] < 0.) {
/* Computing MIN */
d__1 = deltap, d__2 = -s[i__] / ds[i__];
deltap = min(d__1,d__2);
}
if (dz[i__] < 0.) {
/* Computing MIN */
d__1 = deltad, d__2 = -z__[i__] / dz[i__];
deltad = min(d__1,d__2);
}
if (dw[i__] < 0.) {
/* Computing MIN */
d__1 = deltad, d__2 = -w[i__] / dw[i__];
deltad = min(d__1,d__2);
}
/* L23028: */
}
/* L23029: */
/* Computing MIN */
d__1 = *beta * deltap;
deltap = min(d__1,1.);
/* Computing MIN */
d__1 = *beta * deltad;
deltad = min(d__1,1.);
}
daxpy_(n, &deltap, &dx[1], &c__1, &x[1], &c__1);
daxpy_(n, &deltap, &ds[1], &c__1, &s[1], &c__1);
daxpy_(p, &deltad, &dy[1], &c__1, &y[1], &c__1);
daxpy_(n, &deltad, &dz[1], &c__1, &z__[1], &c__1);
daxpy_(n, &deltad, &dw[1], &c__1, &w[1], &c__1);
gap = ddot_(n, &z__[1], &c__1, &x[1], &c__1) + ddot_(n, &w[1], &c__1,
&s[1], &c__1);
goto L23008;
}
/* L23009: */
daxpy_(n, &c_b13, &w[1], &c__1, &z__[1], &c__1);
dswap_(n, &z__[1], &c__1, &x[1], &c__1);
return 0;
} /* lpfnb_ */
/* Subroutine */ int stepy_(integer *n, integer *p, doublereal *a, doublereal
*d__, doublereal *b, doublereal *ada, integer *info)
{
/* System generated locals */
integer a_dim1, a_offset, ada_dim1, ada_offset, i__1, i__2;
/* Local variables */
static integer i__, j, k;
/* Parameter adjustments */
--d__;
ada_dim1 = *p;
ada_offset = 1 + ada_dim1;
ada -= ada_offset;
--b;
a_dim1 = *p;
a_offset = 1 + a_dim1;
a -= a_offset;
/* Function Body */
i__1 = *p;
for (j = 1; j <= i__1; ++j) {
i__2 = *p;
for (k = 1; k <= i__2; ++k) {
ada[j + k * ada_dim1] = 0.;
/* L23040: */
}
/* L23041: */
/* L23038: */
}
/* L23039: */
i__1 = *n;
for (i__ = 1; i__ <= i__1; ++i__) {
dsyr_("U", p, &d__[i__], &a[i__ * a_dim1 + 1], &c__1, &ada[ada_offset], p);
/* L23042: */
}
/* L23043: */
dposv_("U", p, &c__1, &ada[ada_offset], p, &b[1], p, info);
return 0;
} /* stepy_ */
从 R 调用:
# C calling
y = rep(c(24,25,26,27,12,11,13,12),200)
lambda=2
tau = 0.50
m <- length(y)
E <- diag(m);
Dmat <- diff(E);
x <- rbind(E, lambda * Dmat)
y <- c(y, rep(0, m - 1))
beta = 0.99995
eps = 1e-06
n <- length(y)
p <- ncol(x)
rhs <- (1 - tau) * apply(x, 2, sum)
d <- rep(1, n)
u <- rep(1, n)
wn <- rep(0, 10 * n)
wn[1:n] <- (1 - tau)
dyn.load("rqfnb.dll")
system.time(
.C("rqfnb_", as.integer(n), as.integer(p), a = as.double(t(as.matrix(x))),
c = as.double(-y), rhs = as.double(rhs), d = as.double(d),
as.double(u), beta = as.double(beta), eps = as.double(eps),
wn = as.double(wn), wp = double((p + 3) * p), it.count = integer(3),
info = integer(1))
)
dyn.unload("rqfnb.dll")
# Fortran calling
library("quantreg")
y = rep(c(24,25,26,27,12,11,13,12),200)
lambda=2
tau = 0.50
m <- length(y)
E <- diag(m);
Dmat <- diff(E);
x <- rbind(E, lambda * Dmat)
y <- c(y, rep(0, m - 1))
beta = 0.99995
eps = 1e-06
n <- length(y)
p <- ncol(x)
rhs <- (1 - tau) * apply(x, 2, sum)
d <- rep(1, n)
u <- rep(1, n)
wn <- rep(0, 10 * n)
wn[1:n] <- (1 - tau)
system.time(
.Fortran("rqfnb", as.integer(n), as.integer(p), a = as.double(t(as.matrix(x))),
c = as.double(-y), rhs = as.double(rhs), d = as.double(d),
as.double(u), beta = as.double(beta), eps = as.double(eps),
wn = as.double(wn), wp = double((p + 3) * p), it.count = integer(3),
info = integer(1), PACKAGE = "quantreg")
)
动态库的编译:
R CMD SHLIB -L. -lf2c -lblas -llapack -lf2c -lblas -llapack rqfnb.c