0

我想知道是否有人有在 GLM 交叉验证函数cv.glm(包:引导)中指定多个排列的经验?我正在使用这些设置K=2将我的训练集和验证集分成大约两个相等的组。在某些情况下,我的$delta结果可能会有很大差异,具体取决于随机种子。从?cv.glm帮助中,我看不到任何增加排列数量的选项。

例子

require(boot)

DF <- structure(list(Y = c(0.158507483, 0.008510161, 0.002684648, 0.009587276, 
0.001803681, 0.010173461, 0.002273384, 0.00345826, 0.051424454, 
0.029937484, 0.194813452, 0.042138033, 0.022944148, 0.729585218, 
0.887009621, 0.008899131, 0.001588576, 0.0216036, 0.001409499, 
0.161051383, 0.026504919, 0.001495132, 0.059066545, 0.008317594, 
0.490868633, 0.057027831), X1 = c(0.0974369543591941, -0.11971810600977, 
-0.168908964300336, -0.0011723143713434, -0.200018273737778, 
0.0536459384966756, -0.188248143615029, -0.154736748196712, 0.0529959236206016, 
-0.152396350558232, 0.103766445240172, -0.0693365907826557, -0.114615555500542, 
0.488829422819801, 0.561719898192691, -0.0469180067616361, 0.0631502939411764, 
-0.135689617930714, 0.0343957489602316, -0.0749974069726867, 
-0.107592097416425, 0.067741017650224, -0.167713403634508, 0.275062271178857, 
0.276065626134302, -0.0926000525628916), X2 = c(-0.19192408577628, 
0.116576354094024, 0.208731289320505, -0.138772290234524, 0.364065047213473, 
-0.1574052089755, 0.285540178523006, 0.29343767019163, -0.203222931158516, 
0.0835579872715545, -0.157325117354138, -0.0242157560597033, 
-0.175123479037643, 0.174087353210292, 0.246559485637939, -0.43074835446357, 
-0.0181308378901971, 0.0525230701557242, -0.121813588478372, 
-0.0549274842561502, -0.115591654073407, -0.0190993986035446, 
0.124566313208749, 0.138414677580375, -0.0981459346380045, -0.319191657096572
)), .Names = c("Y", "X1", "X2"), class = "data.frame", row.names = c(NA, 
-26L))

fmla1 <- formula(Y ~ X1 + X2)
glm1 <- glm(fmla1, DF, family=gaussian(link="log"))
summary(glm1)

set.seed(111)
cv1 <- cv.glm(DF, glm1, K=2)
cv2 <- cv.glm(DF, glm1, K=2)
cv3 <- cv.glm(DF, glm1, K=2)
cv4 <- cv.glm(DF, glm1, K=2)

cv1$delta; cv2$delta; cv3$delta; cv4$delta # RESULTS
#[1] 0.007317702 0.005484949
#[1] 0.12918099 0.06621125
#[1]  1.029601e+31 -3.602880e+16
#[1] 0.02860412 0.01581949
    
4

1 回答 1

1

作为多次运行该函数的示例,您可以执行以下操作:

f<-function() cv.glm(DF,glm1,K=2)$delta
set.seed(111)
replicate(4,f())
#             [,1]       [,2]       [,3]     [,4]
# [1,] 0.013848041 0.05176088 0.06215253 12512004
# [2,] 0.008418343 0.02743163 0.03268880  6256002

这将为您提供一个 nx2 矩阵的结果。只是添加n味道。(请注意,该模型通常不会与您提供的数据收敛。26 行不足以将数据一分为二。)

于 2013-12-12T13:27:03.550 回答