1

我正在准备对多个估算数据集的平均预测误差进行自举估计。我的函数似乎无法在范围内找到因变量。有什么办法可以规避吗?

多重插补运行顺利,但具体问题似乎是该行

mod.nb.train <- with(data = data.mi.train, exp = glm.nb(f))

找不到变量CG.tot

eval(expr,envir,enclos)中的错误:找不到对象'CG.tot'

但是,如果我将公式声明为字符串:

glm.nb(formula=CG.tot~Fibrinogen)

有用...

最小运行示例:

library(mice)
library(MASS)

#compute the mean prediction error on a dataframe with missing data
  predicterr <- function(f, data, indices){
    if(!(class(f)=="formula")){stop("'f' must be of the 'formula' type")}
    if(!(class(data)=="data.frame")){stop("'data' must be of the 'data.frame' type")}
    #recompute random sampling & multiple imputation
    data.test <- data[sample(nrow(data), 15),]
    data.train <- data[setdiff(rownames(data), rownames(data.test)),]
    data.mi.train <- mice(data.train)
    data.mi.test <- mice(data.test)
    #recompute model
    mod.nb.train <- with(data = data.mi.train, exp = glm.nb(f))
    coeffs <- summary(pool(mod.nb.train))[,"est"]
    #compute prediction error on each dataset row
    errvec <- apply(complete(data.mi.test, include = F, action = "long")[,c(names(coeffs)[-1], as.character(f)[2])],
                    1, function(x){
                      return(exp(sum(x[1:length(x)-1]*coeffs[-1], coeffs[1]))-x[length(x)])
    })
    return(mean(errvec))
  }
predicterr(CG.tot~Fibrinogen, d.mi)

数据集(有点长,但这是为了估算......):

d <- structure(list(Hb = c(7.5, 12.9, 12.9, 10.2, 10.5, 11.2, 12.7, 
9.3, 11.7, 13.4, 151, 10.9, 5.9, 12.8, 10.2, 15.3, 13.8, 9.6, 
7.6, 12.2, 11.1, 13.6, 8.9, 7.2, 7.8, 8.7, 10.3, 14, 8.8, 7.5
), Hct = c(23, 39.8, 39.4, 31.6, 32.5, 34.4, 39, 28, 35.9, 41.2, 
43.8, 33.7, 18.6, 37.7, 31.7, 44, 87.3, 29.4, 23.6, 37.7, 34.3, 
39.8, 27.4, 22.6, 24.2, 29.1, 31.8, 43.1, 27.3, 23.3), EXTEM.CT = c(51L, 
60L, 45L, 115L, 55L, 48L, 49L, 106L, 56L, 68L, 61L, 53L, 69L, 
44L, 58L, 126L, 47L, 68L, 49L, 68L, 51L, 84L, 63L, 66L, 51L, 
108L, 63L, 51L, 53L, 63L), EXTEM.CFT = c(133L, 162L, 175L, 216L, 
101L, 60L, 140L, 248L, 137L, 203L, 113L, 199L, 316L, 90L, 224L, 
235L, 133L, 46L, 308L, 300L, 119L, 420L, 44L, 207L, 91L, 69L, 
96L, 130L, 153L, 99L), EXTEM.MCF = c(59L, 55L, 50L, 46L, 64L, 
72L, 52L, 46L, 50L, 50L, 60L, 40L, 40L, 56L, 46L, 47L, 52L, 67L, 
40L, 35L, 83L, 30L, 82L, 47L, 61L, 76L, 63L, 51L, 58L, 58L), 
    INTEM.CT = c(NA, 158L, 154L, 240L, 141L, 141L, 143L, 122L, 
    104L, 193L, 183L, 186L, 182L, 172L, 192L, 149L, 133L, 162L, 
    238L, 158L, 144L, 144L, 162L, 213L, 139L, 157L, 104L, 376L, 
    140L, 192L), INTEM.CFT = c(NA, 91L, 119L, 165L, 97L, 51L, 
    118L, 190L, 84L, 90L, 82L, 114L, 226L, 90L, 89L, 209L, NA, 
    64L, 203L, 222L, 64L, 104L, 43L, 170L, 66L, 50L, 61L, 332L, 
    70L, 66L), INTEM.MCF = c(NA, 57L, 48L, 48L, 74L, 70L, 49L, 
    50L, 50L, 55L, 58L, 49L, 40L, 57L, 48L, 46L, 64L, 68L, 44L, 
    39L, 64L, 54L, 80L, 51L, 64L, 78L, 68L, 54L, 62L, 61L), FIBTEM.CT = c(50L, 
    62L, 101L, 123L, 58L, 49L, 49L, 74L, 77L, 117L, 61L, 54L, 
    79L, 41L, 69L, 189L, 49L, 67L, 55L, 56L, 57L, 59L, 56L, 62L, 
    57L, 65L, 51L, 58L, 68L, 67L), FIBTEM.CFT = c(NA, NA, NA, 
    NA, NA, 94L, NA, NA, NA, NA, NA, 615L, NA, 56L, NA, NA, NA, 
    79L, NA, NA, 625L, NA, 75L, NA, 892L, NA, NA, NA, NA, 1206L
    ), FIBTEM.MCF = c(9L, 9L, NA, 5L, 10L, 21L, 11L, 4L, 6L, 
    3L, 16L, 7L, 6L, 31L, NA, 4L, NA, 35L, 11L, 10L, 42L, NA, 
    28L, 13L, 22L, 28L, 8L, 7L, 9L, 21L), INR = c(1.14, 1, 1, 
    1.33, 1.01, 1.07, 1.06, 1.43, 1.22, 1.12, 1.18, 1.54, NA, 
    1.3, 1.13, 1.05, 1.09, 1.11, 1.49, 1.22, 1.33, 1.04, NA, 
    1.87, 1.67, 1, 1, 1.07, 1.12, 1.88), PTT = c(30, 28.4, 22.1, 
    37.8, 25.6, 28.9, 27.2, 32.7, 27.2, 28.9, 27.3, 69.9, 132, 
    31.9, 26.5, NA, 28.9, 44.3, 50.8, 36.6, NA, 23.5, 30, 70.6, 
    41.2, 30.1, 25.7, 26.7, 26, 41.9), Platelets = c(150, 193, 
    343, 138, 284, 216, 141, 291, 142, 230, 254, 126, NA, 249, 
    153, 308, 253, 66, 30, 41, 293, 208, 545, 141, 136, 256, 
    249, 305, 327, 112), Fibrinogen = c(1.3, NA, NA, 0.9, 2.1, 
    3.4, 2.3, 1.1, 1.5, 1.1, 1.8, 0.8, NA, 2.3, 2.4, NA, 2.2, 
    7.4, 1.8, 1.7, NA, 2.6, 7.1, 0.6, 1.2, NA, 1.1, 2.5, 1.7, 
    2), CG.tot = c(3L, 2L, 3L, 11L, 12L, 0L, 1L, 10L, 4L, 4L, 
    5L, 0L, 12L, 11L, 3L, 9L, 5L, 0L, 4L, 0L, 0L, 3L, 0L, 21L, 
    2L, 1L, 1L, 1L, 2L, 3L)), .Names = c("Hb", "Hct", "EXTEM.CT", 
"EXTEM.CFT", "EXTEM.MCF", "INTEM.CT", "INTEM.CFT", "INTEM.MCF", 
"FIBTEM.CT", "FIBTEM.CFT", "FIBTEM.MCF", "INR", "PTT", "Platelets", 
"Fibrinogen", "CG.tot"), row.names = c(50L, 38L, 54L, 82L, 86L, 
4L, 24L, 78L, 59L, 58L, 72L, 16L, 85L, 81L, 45L, 77L, 70L, 6L, 
63L, 7L, 11L, 53L, 13L, 93L, 36L, 30L, 18L, 19L, 40L, 43L), class = "data.frame")
4

1 回答 1

2

您缺少一个参数glm.nb

  mod.nb.train <- with(data = data.mi.train, exp = glm.nb(f, environment()))

它有效。

于 2016-07-13T09:53:01.443 回答