根据@r2evans 评论,这是一种可能的解决方案:quote()
参数列表中的大对象。do.call
然后在评估函数时将它们从全局环境中拉出:
library(caret)
x <- iris[,1:3]
y <- iris[,4]
global_args <- list(
x=quote(x),
y=quote(y),
trControl=trainControl(
method='cv',
number=2,
returnResamp='final'
)
)
global_args$trControl$index <- createFolds(
y,
global_args$trControl$number
)
model_specific_args <- list(
'lm' = list(method='lm', tuneLength=1),
'nn' = list(method='nnet', tuneLength=3, trace=FALSE),
'gbm' = list(
method='gbm',
verbose=FALSE,
tuneGrid=expand.grid(
n.trees=1:100,
interaction.depth=c(2, 3),
shrinkage=c(.1, .01)
)
)
)
list_of_models <- lapply(model_specific_args, function(args){
return(do.call(train, c(global_args, args), quote=FALSE))
})
print(list_of_models[[1]]$call)
结果要小得多:
train.default(x = x, y = y, method = "lm", trControl = list(method = "cv",
number = 2, repeats = 1, p = 0.75, initialWindow = NULL,
horizon = 1, fixedWindow = TRUE, verboseIter = FALSE, returnData = TRUE,
returnResamp = "final", savePredictions = FALSE, classProbs = FALSE,
summaryFunction = function (data, lev = NULL, model = NULL)
{
if (is.character(data$obs))
data$obs <- factor(data$obs, levels = lev)
postResample(data[, "pred"], data[, "obs"])
}, selectionFunction = "best", preProcOptions = list(thresh = 0.95,
ICAcomp = 3, k = 5), index = list(Fold1 = c(6L, 7L, 11L,
12L, 13L, 14L, 15L, 16L, 21L, 22L, 25L, 26L, 29L, 32L, 34L,
35L, 36L, 37L, 38L, 39L, 40L, 41L, 48L, 49L, 50L, 51L, 52L,
54L, 57L, 58L, 59L, 64L, 65L, 66L, 67L, 69L, 70L, 71L, 72L,
74L, 78L, 80L, 83L, 84L, 85L, 91L, 92L, 93L, 95L, 98L, 99L,
100L, 103L, 105L, 106L, 107L, 109L, 111L, 112L, 116L, 118L,
122L, 123L, 124L, 125L, 128L, 130L, 132L, 133L, 135L, 138L,
141L, 143L, 144L, 145L, 148L), Fold2 = c(1L, 2L, 3L, 4L,
5L, 8L, 9L, 10L, 17L, 18L, 19L, 20L, 23L, 24L, 27L, 28L,
30L, 31L, 33L, 42L, 43L, 44L, 45L, 46L, 47L, 53L, 55L, 56L,
60L, 61L, 62L, 63L, 68L, 73L, 75L, 76L, 77L, 79L, 81L, 82L,
86L, 87L, 88L, 89L, 90L, 94L, 96L, 97L, 101L, 102L, 104L,
108L, 110L, 113L, 114L, 115L, 117L, 119L, 120L, 121L, 126L,
127L, 129L, 131L, 134L, 136L, 137L, 139L, 140L, 142L, 146L,
147L, 149L, 150L)), indexOut = NULL, timingSamps = 0, predictionBounds = c(FALSE,
FALSE), seeds = NA, adaptive = list(min = 5, alpha = 0.05,
method = "gls", complete = TRUE), allowParallel = TRUE),
tuneLength = 1)
不过,不必序列化所有其他选项仍然很好。尤其是第三个模型的呼声仍然很大:print(list_of_models[[3]]$call)