我有一个二元响应变量 (0/1) 和一个以 0-1 比例连续分布的预测变量。数据中存在显着的空间自相关,因此我正在运行一个 INLA 模型来解释这一点。我创建了一个可重现的示例,它也反映了我的点的空间分布,但我是 INLA 的新手,我认为我的空间项模型公式并不完全正确。
在我的模型中,我的反应是cb
,我的自变量(预测变量)是oldr
。
在我的模型示例中,我从没有空间自相关项的 INLA 模型开始。代码的第二部分说明了我希望构建的最终模型。
普通 INLA 模型(不考虑空间自相关):
# Data
n <- 125 # number of observations
lon <- c(22.58,22.5,22.37,22.21,22.02,21.98,21.94,20.97,21.08,21.15,21.37,21.51,21.58,21.66,20.89,20.87,20.79,20.77,20.82,20.88,21.03,21.15,21.34,25.24,25.31,25.34,25.38,25.39,25.36,25.33,25.3,25.29,25.24,25.17,25.16,25.16,25.23,25.26,25.22,25.39,25.43,25.45,25.51,25.57,25.63,25.68,25.76,25.44,25.52,25.62,25.72,25.81,25.89,25.97,26.05,22.76,27.9,27.87,27.84,27.81,27.78,27.75,27.72,27.69,27.66,27.59,27.63,27.71,27.7,27.69,27.72,27.77,27.75,27.72,27.67,27.71,27.73,27.82,27.89,27.63,27.67,27.76,27.92,28.03,28.12,28.23,28.39,28.7,28.77,28.93,29.28,29.33,29.22,29.13,29.21,29.24,25.6,25.6,25.61,25.62,25.56,25.57,25.59,25.58,25.6,25.69,25.8,25.97,26.1,26.17,26.17,26.21,26.22,26.22,26.29,26.4,26.16,26.15,26.12,26.15,25.95,25.83,25.7,25.65,25.59)
lat <- c(-18.71,-18.67,-18.62,-18.6,-18.43,-18.37,-18.3,-18.04,-18.03,-18.04,-18.03,-18.07,-18.1,-18.15,-18.13,-18.26,-18.54,-19.06,-19.8,-19.69,-19.54,-19.51,-19.48,-18.06,-17.74,-17.65,-17.57,-17.45,-17.37,-17.27,-17.19,-17.1,-16.98,-16.91,-16.78,-16.69,-15.82,-16.22,-16.29,-17.41,-17.32,-17.24,-17.16,-17.07,-16.98,-16.83,-16.73,-17.51,-17.56,-17.57,-17.57,-17.58,-17.62,-17.66,-17.64,-18.66,-18.13,-18.03,-17.93,-17.83,-17.73,-17.63,-17.53,-17.43,-17.41,-17.29,-17.06,-16.59,-16.75,-16.87,-16.46,-16.38,-16.28,-16.16,-16,-15.84,-15.68,-15.52,-15.38,-15.1,-15.17,-15.17,-15.31,-15.41,-15.41,-15.41,-15.36,-15.42,-15.46,-15.58,-15.64,-15.7,-15.7,-15.83,-15.99,-16.14,-14.87,-14.87,-14.86,-14.86,-14.85,-14.83,-14.8,-14.78,-14.75,-14.87,-14.76,-14.74,-14.73,-14.75,-14.71,-14.71,-14.7,-14.75,-14.74,-14.66,-14.81,-14.86,-14.83,-14.8,-15.22,-15.36,-15.41,-15.38,-15.34)
cb <- c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,0,0,0,0,1,1,1,1,1,1,0,1,1,1,1,1,0,1,1,1)
oldr <- c(0,0.01,0,0,0,0,0,0.2,0.13,0.05,0.03,0.03,0.03,0.01,0.21,0.03,0.01,0,0,0.01,0,0,0,0.02,0,0.01,0.06,0.07,0.04,0.04,0.11,0.15,0.29,0.4,0.59,0.64,0.67,0.63,0.63,0.07,0.07,0.08,0.06,0.07,0.2,0.14,0.12,0.17,0.02,0.03,0.01,0.02,0.01,0.12,0.1,0.01,0.71,0.39,0.12,0.13,0.08,0.06,0.06,0.04,0.04,0.13,0.02,0.63,0.1,0.08,0.62,0.36,0.19,0.16,0.06,0.09,0.04,0.09,0.19,0.26,0.16,0.35,0.19,0.06,0.17,0.13,0.13,0.19,0.19,0.11,0.99,0.99,0.99,0.32,0.28,0.35,0.8,0.8,0.8,0.8,0.8,0.8,0.8,0.74,0.74,0.72,0.74,0.78,0.84,0.85,0.86,0.86,0.7,0.78,0.78,0.69,0.78,0.78,0.78,0.78,0.3,0.27,0.75,0.62,0.8)
# Libraries
library(R2jags)
library(INLA)
library(ggplot2)
# Model with no spatial terms
dat.1 <- data.frame(y=cb,x=oldr)
fitted <- subset(dat.1, select=c(x,y))
fitted$y <- NA
newdata <- expand.grid(x=seq(0.05, max(dat.1$x), len=125), y=NA)
dat.pred <- rbind(dat.1,fitted,newdata)
dat.inla <- inla(y~x, family='binomial',
data=dat.pred,
control.family=list(link='logit'),
control.predictor=list(link=1, compute=TRUE),
control.compute=list(dic=TRUE, cpo=TRUE, waic=TRUE))
summary(dat.inla)
newdata <- cbind(newdata,
dat.inla$summary.fitted.values[(nrow(dat.1)+nrow(fitted)+1):nrow(dat.pred),])
head(newdata)
library(reshape)
newdata <- reshape:::rename(newdata, c("0.025quant"="lower", "0.975quant"="upper"))
library(grid)
ggplot(newdata, aes(y=mean, x=x)) +
geom_blank()+
geom_point(data=dat.1, aes(y=y, x=x)) +
geom_ribbon(aes(ymin=lower, ymax=upper), fill='blue', alpha=0.2) +
geom_line() +
theme_classic()+
theme(legend.key.width=unit(2,'lines'), legend.position=c(0,1), legend.justification=c(0,1))+ xlab("older")
具有空间自相关项的 INLA 模型:
loc <- as.matrix(data.frame(lon,lat))
mesh = inla.mesh.create.helper(points.domain=loc,
max.edge=c(0.2, 0.5))
points(coords, col = "red", pch = 16)
proj.obs = inla.mesh.projector(mesh, loc=loc)
proj.pred = inla.mesh.projector(mesh, loc=mesh$loc)
spde = inla.spde2.matern(mesh,
B.tau=cbind(log(1), 1, 0),
B.kappa=matrix(c(log(sqrt(8)/0.2), 0, 1), 1, 3))
covar = oldr
field = inla.qsample(n=1, Q=inla.spde2.precision(spde, theta=c(0,0)))[,1]
### Is this how the dependent variable is supposed to be defined below?
y = cb + inla.mesh.project(proj.obs, field)
# or simply
y <= cb
A.obs = inla.spde.make.A(mesh, loc=loc)
A.pred = inla.spde.make.A(mesh, loc=proj.pred$loc)
stack.obs =
inla.stack(data=list(y=y),
A=list(A.obs, 1),
effects=list(c(list(intercept=rep(1, mesh$n)),
inla.spde.make.index("spatial", spde$n.spde)),
covar=covar),
tag="obs")
# Not sure if my formula is correct here:
formula = y ~ -1 + intercept + covar + f(spatial, model=spde)
result1 = inla(formula,
data=inla.stack.data(stack.obs, spde=spde),
family="binomial",
Ntrials = n,
control.predictor=list(A=inla.stack.A(stack.obs), compute=TRUE),
verbose=TRUE)
plot(y,result1$summary.fitted.values[inla.stack.index(stack.obs,"obs")$data,"mean"])
result1$summary.fix
result1$summary.hyperpar
result1.res <- inla.spde2.result(result1, 'spatial.field', spde, do.transf=TRUE)
## This does not work unfortunately, might be a problem in the model?
result1.res$summary.log.range.nominal