6

在大多数情况下,函数参数的默认值在文档中给出。但是,在某些情况下,默认值是根据其他参数(包括数据本身)计算得出的,因此不可能在文档中指定。

例如,我如何发现库lambda中函数使用的默认网格?根据文档,默认值是基于 计算的,默认为和,这似乎是数据派生的值。glmnetglmnetlambdanlambda100lambda.min.ratio

当我用给定的数据集运行这个函数时,我想知道lambda它使用的值。这在使用 时特别有用,因为当我不提供时cv.glmnet,我想知道它在选择哪个。lambda

样本输入:

library(glmnet)

set.seed(1)
x=rnorm(100)
eps=rnorm(100)

y = 1 + x + x^2 + x^3 + eps

xmat=model.matrix(y~poly(x,10,raw=T),data=data.frame(x=x))

cv.out=cv.glmnet(xmat, y,alpha=0) # What is the lambda used here?
bestlam=cv.out$lambda.min
print(bestlam)


# When a grid is specified, the result is very different and sometimes worse.
grid=10^seq(10,-2,length=100)
cv.out=cv.glmnet(xmat, y,alpha=0, lambda=grid)
bestlam=cv.out$lambda.min
print(bestlam)

示例输出(注意它们非常不同):

0.3619167
0.04037017
4

3 回答 3

4

如果默认值取决于其他参数的值,那么除了在调用函数时以调试模式进入函数外,我没有其他解决方案。您可以使用debugonce例如:

> debugonce(cv.glmnet)
> 
> cv.out=cv.glmnet(xmat, y,alpha=0) # What is the lambda used here?
debugging in: cv.glmnet(xmat, y, alpha = 0)
[...]
Browse[2]> ls()
#  [1] "foldid"       "grouped"      "keep"         "lambda"       "nfolds"       "offset"      
#  [7] "parallel"     "type.measure" "weights"      "x"            "y"           
Browse[2]> lambda
NULL
Browse[2]> c
>

所以对于第一个电话,lambdaNULL。但是,如果您在第二次调用时重复该方法cv.glmnet,您将看到在这种情况下lambda是长度为 100 的数字向量。

于 2013-11-06T03:16:16.940 回答
3

我很惊讶这些都没有发布,但明显的功能是argsformals

args与调用不同,仅显示没有主体的函数的“顶部” cv.glmnet

> args(cv.glmnet)
function (x, y, weights, offset = NULL, lambda = NULL, type.measure = c("mse", 
    "deviance", "class", "auc", "mae"), nfolds = 10, foldid, 
    grouped = TRUE, keep = FALSE, parallel = FALSE, ...) 
NULL

formals将这些参数作为列表给出:

> formals(cv.glmnet)
$x


$y


$weights


$offset
NULL

$lambda
NULL

$type.measure
c("mse", "deviance", "class", "auc", "mae")

$nfolds
[1] 10

$foldid


$grouped
[1] TRUE

$keep
[1] FALSE

$parallel
[1] FALSE

$...
于 2013-11-06T06:52:14.657 回答
0

您可以随时键入函数名称并按 Enter 键以获取函数的源代码。在您给定的示例中, lambda 默认为 NULL。

cv.glmnet
## function (x, y, weights, offset = NULL, lambda = NULL, type.measure = c("mse", 
##     "deviance", "class", "auc", "mae"), nfolds = 10, foldid, 
##     grouped = TRUE, keep = FALSE, parallel = FALSE, ...) 
## {
##     if (missing(type.measure)) 
##         type.measure = "default"
##     else type.measure = match.arg(type.measure)
##     if (!is.null(lambda) && length(lambda) < 2) 
##         stop("Need more than one value of lambda for cv.glmnet")
##     N = nrow(x)
##     if (missing(weights)) 
##         weights = rep(1, N)
##     else weights = as.double(weights)
##     y = drop(y)
##     glmnet.call = match.call(expand.dots = TRUE)
##     which = match(c("type.measure", "nfolds", "foldid", "grouped", 
##         "keep"), names(glmnet.call), F)
##     if (any(which)) 
##         glmnet.call = glmnet.call[-which]
##     glmnet.call[[1]] = as.name("glmnet")
##     glmnet.object = glmnet(x, y, weights = weights, offset = offset, 
##         lambda = lambda, ...)
##     glmnet.object$call = glmnet.call
##     is.offset = glmnet.object$offset
##     lambda = glmnet.object$lambda
##     if (inherits(glmnet.object, "multnet")) {
##         nz = predict(glmnet.object, type = "nonzero")
##         nz = sapply(nz, function(x) sapply(x, length))
##         nz = ceiling(apply(nz, 1, median))
##     }
##     else nz = sapply(predict(glmnet.object, type = "nonzero"), 
##         length)
##     if (missing(foldid)) 
##         foldid = sample(rep(seq(nfolds), length = N))
##     else nfolds = max(foldid)
##     if (nfolds < 3) 
##         stop("nfolds must be bigger than 3; nfolds=10 recommended")
##     outlist = as.list(seq(nfolds))
##     if (parallel && require(foreach)) {
##         outlist = foreach(i = seq(nfolds), .packages = c("glmnet")) %dopar% 
##             {
##                 which = foldid == i
##                 if (is.matrix(y)) 
##                   y_sub = y[!which, ]
##                 else y_sub = y[!which]
##                 if (is.offset) 
##                   offset_sub = as.matrix(offset)[!which, ]
##                 else offset_sub = NULL
##                 glmnet(x[!which, , drop = FALSE], y_sub, lambda = lambda, 
##                   offset = offset_sub, weights = weights[!which], 
##                   ...)
##             }
##     }
##     else {
##         for (i in seq(nfolds)) {
##             which = foldid == i
##             if (is.matrix(y)) 
##                 y_sub = y[!which, ]
##             else y_sub = y[!which]
##             if (is.offset) 
##                 offset_sub = as.matrix(offset)[!which, ]
##             else offset_sub = NULL
##             outlist[[i]] = glmnet(x[!which, , drop = FALSE], 
##                 y_sub, lambda = lambda, offset = offset_sub, 
##                 weights = weights[!which], ...)
##         }
##     }
##     fun = paste("cv", class(glmnet.object)[[1]], sep = ".")
##     cvstuff = do.call(fun, list(outlist, lambda, x, y, weights, 
##         offset, foldid, type.measure, grouped, keep))
##     cvm = cvstuff$cvm
##     cvsd = cvstuff$cvsd
##     cvname = cvstuff$name
##     out = list(lambda = lambda, cvm = cvm, cvsd = cvsd, cvup = cvm + 
##         cvsd, cvlo = cvm - cvsd, nzero = nz, name = cvname, glmnet.fit = glmnet.object)
##     if (keep) 
##         out = c(out, list(fit.preval = cvstuff$fit.preval, foldid = foldid))
##     lamin = if (type.measure == "auc") 
##         getmin(lambda, -cvm, cvsd)
##     else getmin(lambda, cvm, cvsd)
##     obj = c(out, as.list(lamin))
##     class(obj) = "cv.glmnet"
##     obj
## }
## <environment: namespace:glmnet>
于 2013-11-06T03:05:25.110 回答