0

我正在尝试使用 r 中的 jags 在模型中集成多种类型的丰度数据。我正在努力使用区间审查来拟合具有计数和丰度类的二项式 N 混合模型

我的数据集 1 包括来自多个站点、调查轮次和多年的计数数据丰度。数据集 2 由一年(第 1 年,嵌套在数据集 1 中的第 1-5 年)的聚合数据组成,以丰度类数据的形式。

一个简化的数据集可以大致按照Ch 进行模拟。10.3-5 in Marc Kéry & J. Andy Royle (2020):在生态学中应用层次建模。使用 R 和 BUGS 模拟分布、丰度和物种丰富度。第 2 卷:动态模型和高级模型

library(AHMbook)
library(jagsUI)
library(berryFunctions)

## Simulate two data sets
# Constants for simulation function
M1 <- 500           # Number of sites with abundance-class surveys
J1 <- 1             # Number of surveys in abundance-class survey
M2 <- 100           # Number of sites with full-count surveys
J2 <- 4             # Number of surveys in full-count surveys
mean.lam1 <- 6.5    # Abundance intercept
mean.lam2 <- 6      # Abundance intercept
mean.lam3 <- 5.5    # Abundance intercept
mean.lam4 <- 5      # Abundance intercept
mean.lam5 <- 4.5    # Abundance intercept
beta.lam <- 0.5     # Coefficients of a site covariate in abundance
mean.p1tot <- 1-(1-0.4)^4 # Detection intercept in abundance-class survey: Total detection probability after 4 surveys with each p=0.4
mean.p2 <- 0.4      # Detection intercept in full-count survey

# Simulate data set 1 for abundance-class surveys (surveyd in year1)
set.seed(1)
str(data1 <- simNmix(nsite = M1, nvisit = J1, mean.lam = mean.lam1,
                     mean.p = mean.p1tot, beta2.lam = beta.lam,
                     show.plot = FALSE))

# Simulate data sets 2a-e for full-count surveys, 5 data sets for 5 years (year1-5) with varying mean.lam (decreasing mean abundance over time) 
str(data2a <- simNmix(nsite = M2, nvisit = J2, mean.lam = mean.lam1,
                     mean.p = mean.p2, beta2.lam = beta.lam,
                     show.plot = FALSE))
str(data2b <- simNmix(nsite = M2, nvisit = J2, mean.lam = mean.lam2,
                      mean.p = mean.p2, beta2.lam = beta.lam,
                      show.plot = FALSE))
str(data2c <- simNmix(nsite = M2, nvisit = J2, mean.lam = mean.lam3,
                      mean.p = mean.p2, beta2.lam = beta.lam,
                      show.plot = FALSE))
str(data2d <- simNmix(nsite = M2, nvisit = J2, mean.lam = mean.lam4,
                      mean.p = mean.p2, beta2.lam = beta.lam,
                      show.plot = FALSE))
str(data2e <- simNmix(nsite = M2, nvisit = J2, mean.lam = mean.lam5,
                      mean.p = mean.p2, beta2.lam = beta.lam,
                      show.plot = FALSE))

# Get lower and upper class boundary for every binned count
breaks <- c(0, 1, 2, 3, 7, 20, 50, 150, 400) # Up to and including
Cclass <- classify(c(data1$C), method = 'custom', breaks = breaks)$index
Aclass <- matrix(Cclass, byrow = FALSE, ncol = data1$nvisit)
n <- length(Cclass)
limits <- array(NA, c(n, 2), list(1:n, c('Lower', 'Upper')))
for(i in 1:n){
  limits[i, 1:2] <- c(breaks[Cclass[i]], breaks[Cclass[i]+1])
}

# Add NAs for unsurveyed years (year2-5) to data of surveyed year (year1)
limitsNA <- array(NA, c(500, 5, 2), list(1:500,1:5,c('Lower', 'Upper'))) 
for(i in 1:500){
  limitsNA[i, 1, 1:2] <- c(limits[i,1], limits[i,2])
}

# Vectorize the environmental covariate
X22=rowMeans(array(c(data2a$site.cov[,2], data2b$site.cov[,2], data2c$site.cov[,2], data2d$site.cov[,2], data2e$site.cov[,2]), dim=c(100, 5))) #using mean, as my original data set includes a site cov that is constant across the survey years
 
# Response for interval censoring = simply a vector of ones !
y <- rep(1, 500)

# Bundle data
str(bdata <- list(y = cbind(y,y,y,y,y), M1 = nrow(Aclass), X21 = data1$site.cov[,2],
              limits = limitsNA, C2 = array(c(data2a$C, data2b$C, data2c$C, data2d$C, data2e$C), dim=c(100, 4,5)), M2 = nrow(data2$C), J2 = ncol(data2$C), T2=5,
              X22=X22, year2=as.vector(scale(c(2005:2009)))))

我的目标是使用数据集 1(全计数数据)来估计模型的似然部分中的检测概率和共享协变量。我想用更大但更粗糙的数据集 2 来改进共享似然协变量。我设法做到了。 但我也想使用数据集 1 提供的趋势信息(包括通过年份协变量)来推断数据集 2 的丰度。最后,我想获得数据集 2 中包含的站点的 N 估计值缺失的 2-5 年仅包含在数据集 1 中。

    cat(file = "model2.txt", "
model {
  # Priors for the parameters shared in both data sets
  alpha.lam ~ dnorm(0, 0.01)      # Abundance intercept
  beta.lam ~ dnorm(0, 0.1)        # Abundance slope on site cov X2
  beta.lam1 ~ dnorm(0, 0.1)        # Abundance slope on year
  for (i in 1:M2){
      for (j in 1:J2){
      p2[i,j] ~ dunif(0,1)
      }}

  # Likelihood
  # Model for latent abundance in both data sets
  # Note same parameter names means parameters are identical
  for (i in 1:M1){                # Data set 1
    for(t in 1:T2){
    N1[i,t] ~ dpois(lambda1[i,t])
    log(lambda1[i,t]) <- alpha.lam + beta.lam * X21[i] + beta.lam1 * year2[t] #more complex covariates included in original model...
  }}
  for (i in 1:M2){                # Data set 2
    for (t in 1:T2){
    N2[i,t] ~ dpois(lambda2[i,t])
    log(lambda2[i,t]) <- alpha.lam + beta.lam * X22[i] + beta.lam1 * year2[t]
  }}

  # Observation model for observed counts and for detection
  # Observation model for data set 2 (full counts)
  for (i in 1:M2){ # Data set 2
    for(j in 1:J2){
      for(t in 1:T2){
      C2[i,j,t] ~ dbin(p2[i,j], N2[i,t])
    }}}
  for (i in 1:M2){
    ptot[i]<- 1-(1-p2[i,1])*(1-p2[i,2])*(1-p2[i,3])*(1-p2[i,4]) #Total observ prob after 4 survey dates per site
    }
  ptotmean <- mean(ptot) #Mean total detection probability after four surveys
  # Observation model for data set 1 (binned counts)
  for (i in 1:M1){
  for(t in 1:T2){
    y[i,t] ~ dinterval(C1[i,t], limits[i,t,])       # interval censoring
    C1[i,t] ~ dbin(ptotmean, N1[i,t])             # using average detection probability from oberservation model 2
  }}

  # Derived quantities
  for (t in 1:T2){
  Ntotal1[t] <- sum(N1[,t])            # Total abundance in data set 1
  Ntotal2[t] <- sum(N2[,t])            # Total abundance in data set 2
  Nmean1[t] <- mean(N2[,t])
  Nmean2[t] <- mean(N2[,t])
  }
  for (j in 1:J2){
      pmean[j] <- mean(p2[,j])
    }
}
")

# Initial values
Nst1 <- limitsNA[,,2]+5
Nst1[is.na(Nst1)] <- round(mean(Nst1, na.rm=T))+5
Cst <- limitsNA[,,1]+1
Cst[is.na(Cst)] <- round(mean(Cst, na.rm=T))+1
Nst2 <- apply(bdata$C2, c(1,3), max)+5
inits <- function() list(N1 = Nst1, C1 = Cst, N2 = Nst2)

# Parameters monitored
params <- c("mean.lam", "beta.lam", "beta.lam1",
            "Ntotal1", "Ntotal2", "GTotalN", "N1", "C1", "N2", "Ntotal1", "Ntotal2", "Nmean1", "Nmean2", "pmean",  "ptot", "ptotmean")

# MCMC settings
# na <- 1000 ; nc <- 3 ; ni <- 50000 ; nb <- 10000 ; nt <- 40
na <- 1000 ; nc <- 3 ; ni <- 5000 ; nb <- 1000 ; nt <- 4  # ~~~~ for testing, 2 mins

# Call JAGS (ART 22 min), gauge convergence and summarize posteriors
out2 <- jags(bdata, inits, params, "model2.txt", n.adapt = na, n.chains = nc,
             n.thin = nt, n.iter = ni, n.burnin = nb, parallel = TRUE)

R 返回以下错误:

Error in checkForRemoteErrors(val) : 
  3 nodes produced errors; first error: RUNTIME ERROR:
Unable to resolve the following parameters:
limits[1,2,1:2] (line 40)
limits[1,3,1:2] (line 40)
limits[1,4,1:2] (line 40)
limits[1,5,1:2] (line 40)
[...]

Dinterval 似乎缺少限制。到目前为止,我找不到解决问题的方法,因为 dinterval 的这种应用并不常见。关于如何将数据丰度类估计推断到数据集 1 的时间跨度,同时在模型部分之间共享协变量的任何想法?

我遇到的另一个小问题:与模拟数据不同,我的真实丰度级数据还包括缺勤。我未能将这些缺席包括在内,因此暂时将它们从我的数据集中排除。我想 dinterval 无法处理具有限制(0,0)的非丰富类数据。我将不胜感激有关如何将这些站点包含在我的模型中的任何想法。

我会很高兴任何有用的建议!谢谢,乔

(这篇文章也发布在 sourceforge 上:https ://sourceforge.net/p/mcmc-jags/discussion/610037/thread/224919442a/ )

4

0 回答 0