我正在尝试使用 r 中的 jags 在模型中集成多种类型的丰度数据。我正在努力使用区间审查来拟合具有计数和丰度类的二项式 N 混合模型。
我的数据集 1 包括来自多个站点、调查轮次和多年的计数数据丰度。数据集 2 由一年(第 1 年,嵌套在数据集 1 中的第 1-5 年)的聚合数据组成,以丰度类数据的形式。
一个简化的数据集可以大致按照Ch 进行模拟。10.3-5 in Marc Kéry & J. Andy Royle (2020):在生态学中应用层次建模。使用 R 和 BUGS 模拟分布、丰度和物种丰富度。第 2 卷:动态模型和高级模型:
library(AHMbook)
library(jagsUI)
library(berryFunctions)
## Simulate two data sets
# Constants for simulation function
M1 <- 500 # Number of sites with abundance-class surveys
J1 <- 1 # Number of surveys in abundance-class survey
M2 <- 100 # Number of sites with full-count surveys
J2 <- 4 # Number of surveys in full-count surveys
mean.lam1 <- 6.5 # Abundance intercept
mean.lam2 <- 6 # Abundance intercept
mean.lam3 <- 5.5 # Abundance intercept
mean.lam4 <- 5 # Abundance intercept
mean.lam5 <- 4.5 # Abundance intercept
beta.lam <- 0.5 # Coefficients of a site covariate in abundance
mean.p1tot <- 1-(1-0.4)^4 # Detection intercept in abundance-class survey: Total detection probability after 4 surveys with each p=0.4
mean.p2 <- 0.4 # Detection intercept in full-count survey
# Simulate data set 1 for abundance-class surveys (surveyd in year1)
set.seed(1)
str(data1 <- simNmix(nsite = M1, nvisit = J1, mean.lam = mean.lam1,
mean.p = mean.p1tot, beta2.lam = beta.lam,
show.plot = FALSE))
# Simulate data sets 2a-e for full-count surveys, 5 data sets for 5 years (year1-5) with varying mean.lam (decreasing mean abundance over time)
str(data2a <- simNmix(nsite = M2, nvisit = J2, mean.lam = mean.lam1,
mean.p = mean.p2, beta2.lam = beta.lam,
show.plot = FALSE))
str(data2b <- simNmix(nsite = M2, nvisit = J2, mean.lam = mean.lam2,
mean.p = mean.p2, beta2.lam = beta.lam,
show.plot = FALSE))
str(data2c <- simNmix(nsite = M2, nvisit = J2, mean.lam = mean.lam3,
mean.p = mean.p2, beta2.lam = beta.lam,
show.plot = FALSE))
str(data2d <- simNmix(nsite = M2, nvisit = J2, mean.lam = mean.lam4,
mean.p = mean.p2, beta2.lam = beta.lam,
show.plot = FALSE))
str(data2e <- simNmix(nsite = M2, nvisit = J2, mean.lam = mean.lam5,
mean.p = mean.p2, beta2.lam = beta.lam,
show.plot = FALSE))
# Get lower and upper class boundary for every binned count
breaks <- c(0, 1, 2, 3, 7, 20, 50, 150, 400) # Up to and including
Cclass <- classify(c(data1$C), method = 'custom', breaks = breaks)$index
Aclass <- matrix(Cclass, byrow = FALSE, ncol = data1$nvisit)
n <- length(Cclass)
limits <- array(NA, c(n, 2), list(1:n, c('Lower', 'Upper')))
for(i in 1:n){
limits[i, 1:2] <- c(breaks[Cclass[i]], breaks[Cclass[i]+1])
}
# Add NAs for unsurveyed years (year2-5) to data of surveyed year (year1)
limitsNA <- array(NA, c(500, 5, 2), list(1:500,1:5,c('Lower', 'Upper')))
for(i in 1:500){
limitsNA[i, 1, 1:2] <- c(limits[i,1], limits[i,2])
}
# Vectorize the environmental covariate
X22=rowMeans(array(c(data2a$site.cov[,2], data2b$site.cov[,2], data2c$site.cov[,2], data2d$site.cov[,2], data2e$site.cov[,2]), dim=c(100, 5))) #using mean, as my original data set includes a site cov that is constant across the survey years
# Response for interval censoring = simply a vector of ones !
y <- rep(1, 500)
# Bundle data
str(bdata <- list(y = cbind(y,y,y,y,y), M1 = nrow(Aclass), X21 = data1$site.cov[,2],
limits = limitsNA, C2 = array(c(data2a$C, data2b$C, data2c$C, data2d$C, data2e$C), dim=c(100, 4,5)), M2 = nrow(data2$C), J2 = ncol(data2$C), T2=5,
X22=X22, year2=as.vector(scale(c(2005:2009)))))
我的目标是使用数据集 1(全计数数据)来估计模型的似然部分中的检测概率和共享协变量。我想用更大但更粗糙的数据集 2 来改进共享似然协变量。我设法做到了。 但我也想使用数据集 1 提供的趋势信息(包括通过年份协变量)来推断数据集 2 的丰度。最后,我想获得数据集 2 中包含的站点的 N 估计值缺失的 2-5 年仅包含在数据集 1 中。
cat(file = "model2.txt", "
model {
# Priors for the parameters shared in both data sets
alpha.lam ~ dnorm(0, 0.01) # Abundance intercept
beta.lam ~ dnorm(0, 0.1) # Abundance slope on site cov X2
beta.lam1 ~ dnorm(0, 0.1) # Abundance slope on year
for (i in 1:M2){
for (j in 1:J2){
p2[i,j] ~ dunif(0,1)
}}
# Likelihood
# Model for latent abundance in both data sets
# Note same parameter names means parameters are identical
for (i in 1:M1){ # Data set 1
for(t in 1:T2){
N1[i,t] ~ dpois(lambda1[i,t])
log(lambda1[i,t]) <- alpha.lam + beta.lam * X21[i] + beta.lam1 * year2[t] #more complex covariates included in original model...
}}
for (i in 1:M2){ # Data set 2
for (t in 1:T2){
N2[i,t] ~ dpois(lambda2[i,t])
log(lambda2[i,t]) <- alpha.lam + beta.lam * X22[i] + beta.lam1 * year2[t]
}}
# Observation model for observed counts and for detection
# Observation model for data set 2 (full counts)
for (i in 1:M2){ # Data set 2
for(j in 1:J2){
for(t in 1:T2){
C2[i,j,t] ~ dbin(p2[i,j], N2[i,t])
}}}
for (i in 1:M2){
ptot[i]<- 1-(1-p2[i,1])*(1-p2[i,2])*(1-p2[i,3])*(1-p2[i,4]) #Total observ prob after 4 survey dates per site
}
ptotmean <- mean(ptot) #Mean total detection probability after four surveys
# Observation model for data set 1 (binned counts)
for (i in 1:M1){
for(t in 1:T2){
y[i,t] ~ dinterval(C1[i,t], limits[i,t,]) # interval censoring
C1[i,t] ~ dbin(ptotmean, N1[i,t]) # using average detection probability from oberservation model 2
}}
# Derived quantities
for (t in 1:T2){
Ntotal1[t] <- sum(N1[,t]) # Total abundance in data set 1
Ntotal2[t] <- sum(N2[,t]) # Total abundance in data set 2
Nmean1[t] <- mean(N2[,t])
Nmean2[t] <- mean(N2[,t])
}
for (j in 1:J2){
pmean[j] <- mean(p2[,j])
}
}
")
# Initial values
Nst1 <- limitsNA[,,2]+5
Nst1[is.na(Nst1)] <- round(mean(Nst1, na.rm=T))+5
Cst <- limitsNA[,,1]+1
Cst[is.na(Cst)] <- round(mean(Cst, na.rm=T))+1
Nst2 <- apply(bdata$C2, c(1,3), max)+5
inits <- function() list(N1 = Nst1, C1 = Cst, N2 = Nst2)
# Parameters monitored
params <- c("mean.lam", "beta.lam", "beta.lam1",
"Ntotal1", "Ntotal2", "GTotalN", "N1", "C1", "N2", "Ntotal1", "Ntotal2", "Nmean1", "Nmean2", "pmean", "ptot", "ptotmean")
# MCMC settings
# na <- 1000 ; nc <- 3 ; ni <- 50000 ; nb <- 10000 ; nt <- 40
na <- 1000 ; nc <- 3 ; ni <- 5000 ; nb <- 1000 ; nt <- 4 # ~~~~ for testing, 2 mins
# Call JAGS (ART 22 min), gauge convergence and summarize posteriors
out2 <- jags(bdata, inits, params, "model2.txt", n.adapt = na, n.chains = nc,
n.thin = nt, n.iter = ni, n.burnin = nb, parallel = TRUE)
R 返回以下错误:
Error in checkForRemoteErrors(val) :
3 nodes produced errors; first error: RUNTIME ERROR:
Unable to resolve the following parameters:
limits[1,2,1:2] (line 40)
limits[1,3,1:2] (line 40)
limits[1,4,1:2] (line 40)
limits[1,5,1:2] (line 40)
[...]
Dinterval 似乎缺少限制。到目前为止,我找不到解决问题的方法,因为 dinterval 的这种应用并不常见。关于如何将数据丰度类估计推断到数据集 1 的时间跨度,同时在模型部分之间共享协变量的任何想法?
我遇到的另一个小问题:与模拟数据不同,我的真实丰度级数据还包括缺勤。我未能将这些缺席包括在内,因此暂时将它们从我的数据集中排除。我想 dinterval 无法处理具有限制(0,0)的非丰富类数据。我将不胜感激有关如何将这些站点包含在我的模型中的任何想法。
我会很高兴任何有用的建议!谢谢,乔
(这篇文章也发布在 sourceforge 上:https ://sourceforge.net/p/mcmc-jags/discussion/610037/thread/224919442a/ )