为交叉验证获得的预测值存储在 中model$predicted[[1]]
,观察值为y
。如果要分别查看每个折叠的预测值,则需要获取有关折叠拆分的信息。为此,您可以:
1)手动拆分折叠并自己进行交叉验证
2)使用caret
包
3)稍微修改cvrf
以输出此信息 - 添加idx
到输出列表
rfcv2 <- function (trainx, trainy, cv.fold = 5, scale = "log", step = 0.5,
mtry = function(p) max(1, floor(sqrt(p))), recursive = FALSE,
...)
{
classRF <- is.factor(trainy)
n <- nrow(trainx)
p <- ncol(trainx)
if (scale == "log") {
k <- floor(log(p, base = 1/step))
n.var <- round(p * step^(0:(k - 1)))
same <- diff(n.var) == 0
if (any(same))
n.var <- n.var[-which(same)]
if (!1 %in% n.var)
n.var <- c(n.var, 1)
}
else {
n.var <- seq(from = p, to = 1, by = step)
}
k <- length(n.var)
cv.pred <- vector(k, mode = "list")
for (i in 1:k) cv.pred[[i]] <- trainy
if (classRF) {
f <- trainy
}
else {
f <- factor(rep(1:5, length = length(trainy))[order(order(trainy))])
}
nlvl <- table(f)
idx <- numeric(n)
for (i in 1:length(nlvl)) {
idx[which(f == levels(f)[i])] <- sample(rep(1:cv.fold,
length = nlvl[i]))
}
for (i in 1:cv.fold) {
all.rf <- randomForest(trainx[idx != i, , drop = FALSE],
trainy[idx != i], trainx[idx == i, , drop = FALSE],
trainy[idx == i], mtry = mtry(p), importance = TRUE,
...)
cv.pred[[1]][idx == i] <- all.rf$test$predicted
impvar <- (1:p)[order(all.rf$importance[, 1], decreasing = TRUE)]
for (j in 2:k) {
imp.idx <- impvar[1:n.var[j]]
sub.rf <- randomForest(trainx[idx != i, imp.idx,
drop = FALSE], trainy[idx != i], trainx[idx ==
i, imp.idx, drop = FALSE], trainy[idx == i],
mtry = mtry(n.var[j]), importance = recursive,
...)
cv.pred[[j]][idx == i] <- sub.rf$test$predicted
if (recursive) {
impvar <- (1:length(imp.idx))[order(sub.rf$importance[,
1], decreasing = TRUE)]
}
NULL
}
NULL
}
if (classRF) {
error.cv <- sapply(cv.pred, function(x) mean(trainy !=
x))
}
else {
error.cv <- sapply(cv.pred, function(x) mean((trainy -
x)^2))
}
names(error.cv) <- names(cv.pred) <- n.var
list(n.var = n.var, error.cv = error.cv, predicted = cv.pred, idx = idx)
}
现在你可以打电话
model <- rfcv2(x,y, cv.fold=10)
model$idx # returns the folds split.
请注意,该cvrf
函数不是为纯粹的交叉验证而设计的,而是为变量选择而设计的。因此,您执行了大量冗余计算。