我将 IPW 和 MI 与 PLS 结合使用,在每个 MI 模型中,我都计算了 95%CI。我的问题是如何将 95%CI 的结果结合到最终结果中。下面是我正在使用的示例脚本。假设感兴趣的参数不服从正态分布。
coeftable[[i]] 包含每个 MI 模型中的系数和 95%CI。
library(Hmisc)
library(dplyr)
library(nlme)
library(reshape)
library(plsRglm)
library(xlsx)
library(boot)
set.seed(123)
id <- c(1:1000)
y <- sample(c(1:5,NA), 1000, replace=T)
x1 <- sample(c(1:2,NA), 1000, replace=T)
x2 <- sample(c(1:3,NA), 1000, replace=T)
x3 <- sample(c(1:4,NA), 1000, replace=T)
df <- data.frame(id,y,x1,x2,x3)
df.nomiss <- subset(df, !is.na(df$y))
# obs==1: with any missing data of x
df.nomiss[,"obs"] <- 0
df.nomiss$obs[is.na(df.nomiss$x1)==TRUE |
is.na(df.nomiss$x2)==TRUE|
is.na(df.nomiss$x3)==TRUE ] <- 1
# only include obs==1 into the imputation
include<-df.nomiss[df.nomiss$obs==1,]
exclude<-anti_join(df.nomiss,include,by="id")
# imputation
m=10
include.i <- aregImpute(~factor(y) + factor(x1) + factor(x2) +factor(x3) ,
data=include,n.impute=m)
include.nomiss <- list(include, include, include, include, include, include,include, include, include, include)
# if a variable is coded as 0, use "include.i$imputed$x1[,i]-1 "
for(i in 1:m){
include.nomiss[[i]]$y[is.na(include.nomiss[[i]]$y)] <-
include.i$imputed$y[,i]
include.nomiss[[i]]$x1[is.na(include.nomiss[[i]]$x1)] <-
include.i$imputed$x1[,i]
include.nomiss[[i]]$x2[is.na(include.nomiss[[i]]$x2)] <-
include.i$imputed$x2[,i]
include.nomiss[[i]]$x3[is.na(include.nomiss[[i]]$x3)] <-
include.i$imputed$x3[,i]
}
missingmodel <- list(NA)
analysismodel<-list(NA)
all<- rep(list(NA), m)
modplsglm <- rep(list(NA), m)
coeftable <- rep(list(NA), m)
rawci <- rep(list(NA), m)
loading <- rep(list(NA), m)
temp.bootplsRglm <- rep(list(NA), m)
# PLSRGLM
R <- 1000
ncomp <- 3
# IPW with PLS
for(i in 1:m){
all[[i]]<-rbind(exclude,include.nomiss[[i]])
# IPW
missingmodel[[i]] <- glm(obs ~y + x1 + x2 +x3 ,
data=all[[i]], family=binomial)
all[[i]]$pw<-(1/missingmodel[[i]]$fitted.values)
# PLSRGLM
modplsglm[[i]] <- plsRglm(y~ factor(x1) + factor(x2) + factor(x3) ,
nt=ncomp,data=all[[i]], modele="pls", weights=all[[i]]$pw)
# bootstrap 95%CI
temp.bootplsRglm[[i]] <- bootplsglm(modplsglm[[i]], typeboot="plsmodel", R=R , statistic=coefs.plsRglmnp, sim="balanced", stype="i", stabvalue=1e6, verbose=TRUE)
indices.temp.bootplsRglm <- !is.na(temp.bootplsRglm[[i]]$t[,1])
temp.bootplsRglm[[i]]$t=temp.bootplsRglm[[i]]$t[indices.temp.bootplsRglm,]
temp.bootplsRglm[[i]]$R=sum(indices.temp.bootplsRglm)
temp.bootplsRglm[[i]]$call$R<-sum(indices.temp.bootplsRglm)
Cornell.bootYX.raw <- temp.bootplsRglm[[i]]
# generate coeftable
options(scipen=999)
coeftable[[i]] <- as.data.frame(modplsglm[[i]]$Coeffs)
colnames(coeftable[[i]]) <- "coef"
rawci[[i]] <- confints.bootpls(Cornell.bootYX.raw, typeBCa=FALSE)
rawci[[i]] <- as.data.frame(rawci[[i]])
col <- c("Normal lower","Normal upper","Basic lower","Basic upper","Percentile lower","Percentile upper")
colnames(rawci[[i]]) <- col
rawci[[i]] <- rawci[[i]][,c("Percentile lower","Percentile upper")]
coeftable[[i]] <- cbind(variable=0,coeftable[[i]],rawci[[i]])
coeftable[[i]][,"variable"]<-rownames(coeftable[[i]])
}
下面是一个 MI 模型的 coeftable,我的预期输出应该是这样的,但呈现了所有 MI 模型的总体估计。
coeftable[[1]]
variable coef Percentile lower Percentile upper
Intercept Intercept 2.96021462 0.000000000 0.00000000
factor.x1.1 factor.x1.1 0.04540381 -0.019860282 0.04854000
factor.x1.2 factor.x1.2 -0.04540381 -0.048540000 0.01986028
factor.x2.2 factor.x2.2 0.23350314 -0.002184034 0.15478083
factor.x2.3 factor.x2.3 0.04506754 -0.063760940 0.09520172
factor.x3.2 factor.x3.2 0.08297860 -0.057287056 0.09292398
factor.x3.3 factor.x3.3 -0.15542543 -0.124509722 0.02694244
factor.x3.4 factor.x3.4 -0.05176159 -0.092618253 0.05736522