我目前正在尝试对包中的数据进行拟合优度测试unmarked
。为此,我使用了在关联的 google 组中编写的代码。这依赖于 parboot 来评估模型的拟合优度。然后它产生一个卡方 P 值和 C-hat 值。
奇怪的是,当我只对模型进行 >90 次模拟时,我会收到以下错误:
checkForRemoteErrors(val) 中的错误:3 个节点产生错误;第一个错误:找不到函数“mb.chisq.RN”
低于此模拟次数,不会遇到错误,可以计算统计量。
我先跑;mb.chisq.RN
mb.chisq.RN <- function (mod, print.table = TRUE, maxK=50,
...){
y.raw <- mod@data@y
N.raw <- nrow(y.raw)
na.raw <- apply(X = y.raw, MARGIN = 1, FUN = function(i) all(is.na(i)))
y.data <- y.raw[!na.raw, ]
N <- N.raw - sum(na.raw)
T <- ncol(y.data)
K <- 0:maxK
det.hist <- apply(X = y.data, MARGIN = 1, FUN = function(i) paste(i,
collapse = ""))
preds.lam <- predict(mod, type = "state")$Predicted
preds.p <- matrix(data = predict(mod, type = "det")$Predicted,
ncol = T, byrow = TRUE)
out.hist <- data.frame(det.hist, preds.lam)
un.hist <- unique(det.hist)
n.un.hist <- length(un.hist)
na.vals <- length(grep(pattern = "NA", x = un.hist)) > 0
if (na.vals) {
id.na <- grep(pattern = "NA", x = un.hist)
id.det.hist.na <- grep(pattern = "NA", x = det.hist)
cohort.na <- sort(un.hist[id.na])
n.cohort.na <- length(cohort.na)
unique.na <- gsub(pattern = "NA", replacement = "N",
x = cohort.na)
na.visits <- sapply(strsplit(x = unique.na, split = ""),
FUN = function(i) paste(ifelse(i == "N", 1, 0), collapse = ""))
names(cohort.na) <- na.visits
n.hist.missing.cohorts <- table(na.visits)
n.missing.cohorts <- length(n.hist.missing.cohorts)
out.hist.na <- out.hist[id.det.hist.na, ]
out.hist.na$det.hist <- droplevels(out.hist.na$det.hist)
just.na <- sapply(X = out.hist.na$det.hist, FUN = function(i) gsub(pattern = "1",
replacement = "0", x = i))
out.hist.na$coh <- sapply(X = just.na, FUN = function(i) gsub(pattern = "NA",
replacement = "1", x = i))
freqs.missing.cohorts <- table(out.hist.na$coh)
na.freqs <- table(det.hist[id.det.hist.na])
preds.p.na <- preds.p[id.det.hist.na, ]
cohort.not.na <- sort(un.hist[-id.na])
out.hist.not.na <- out.hist[-id.det.hist.na, ]
out.hist.not.na$det.hist <- droplevels(out.hist.not.na$det.hist)
n.cohort.not.na <- length(cohort.not.na)
n.sites.not.na <- length(det.hist) - length(id.det.hist.na)
preds.p.not.na <- preds.p[-id.det.hist.na, ]
}
else {
cohort.not.na <- sort(un.hist)
out.hist.not.na <- out.hist
preds.p.not.na <- preds.p
n.cohort.not.na <- length(cohort.not.na)
n.sites.not.na <- length(det.hist)
}
if (n.cohort.not.na > 0) {
exp.freqs <- rep(NA, n.cohort.not.na)
names(exp.freqs) <- cohort.not.na
for (i in 1:n.cohort.not.na) {
eq.solved <- rep(NA, n.sites.not.na)
select.hist <- cohort.not.na[i]
strip.hist <- unlist(strsplit(select.hist, split = ""))
hist.mat <- new.hist.mat <- new.hist.mat1 <- new.hist.mat0 <- matrix(NA, nrow = n.sites.not.na, ncol = T)
for (j in 1:n.sites.not.na) {
if (n.sites.not.na == 1) {
hist.mat[j,] <- preds.p.not.na
} else {hist.mat[j,] <- preds.p.not.na[j,]}
#Pr(y.ij=1|K)
p.k.mat <- sapply(hist.mat[j,],function(r){1-(1-r)^K})
new.hist.mat1[j,] <- dpois(K,out.hist.not.na[j, "preds.lam"]) %*% p.k.mat
new.hist.mat0[j,] <- dpois(K,out.hist.not.na[j, "preds.lam"]) %*% (1-p.k.mat)
new.hist.mat[j,] <- ifelse(strip.hist == "1",
new.hist.mat1[j,], ifelse(strip.hist == "0",
new.hist.mat0[j,], 0))
combo.lam.p <- paste(new.hist.mat[j, ], collapse = "*")
eq.solved[j] <- eval(parse(text = as.expression(combo.lam.p)))
}
exp.freqs[i] <- sum(eq.solved, na.rm = TRUE)
}
freqs <- table(out.hist.not.na$det.hist)
out.freqs <- matrix(NA, nrow = n.cohort.not.na, ncol = 4)
colnames(out.freqs) <- c("Cohort", "Observed", "Expected",
"Chi-square")
rownames(out.freqs) <- names(freqs)
out.freqs[, 1] <- 0
out.freqs[, 2] <- freqs
out.freqs[, 3] <- exp.freqs
out.freqs[, 4] <- ((out.freqs[, "Observed"] - out.freqs[,
"Expected"])^2)/out.freqs[, "Expected"]
}
if (na.vals) {
missing.cohorts <- list()
if (!is.matrix(preds.p.na)) {
preds.p.na <- matrix(data = preds.p.na, nrow = 1)
}
for (m in 1:n.missing.cohorts) {
select.cohort <- out.hist.na[which(out.hist.na$coh ==
names(freqs.missing.cohorts)[m]), ]
select.preds.p.na <- preds.p.na[which(out.hist.na$coh ==
names(freqs.missing.cohorts)[m]), ]
if (!is.matrix(select.preds.p.na)) {
select.preds.p.na <- matrix(data = select.preds.p.na,
nrow = 1)
}
select.preds.p.na[, gregexpr(pattern = "N", text = gsub(pattern = "NA",
replacement = "N", x = select.cohort$det.hist[1]))[[1]]] <- 1
n.total.sites <- nrow(select.cohort)
freqs.na <- table(droplevels(select.cohort$det.hist))
cohort.na.un <- sort(unique(select.cohort$det.hist))
n.hist.na <- length(freqs.na)
exp.na <- rep(NA, n.hist.na)
names(exp.na) <- cohort.na.un
for (i in 1:n.hist.na) {
n.sites.hist <- freqs.na[i]
eq.solved <- rep(NA, n.total.sites)
select.hist <- gsub(pattern = "NA", replacement = "N",
x = cohort.na.un[i])
strip.hist <- unlist(strsplit(select.hist, split = ""))
hist.mat <- new.hist.mat <- new.hist.mat1 <-new.hist.mat0 <- matrix(NA, nrow = n.total.sites, ncol = T)
for (j in 1:n.total.sites) {
hist.mat[j, ] <- select.preds.p.na[j, ]
#Pr(y.ij=1|K)
p.k.mat <- sapply(hist.mat[j,],function(r){1-(1-r)^K})
new.hist.mat1[j,] <- dpois(K,select.cohort[j, "preds.lam"]) %*% p.k.mat
new.hist.mat0[j,] <- dpois(K,select.cohort[j, "preds.lam"]) %*% (1-p.k.mat)
new.hist.mat[j,] <- ifelse(strip.hist == "1",
new.hist.mat1[j,], ifelse(strip.hist == "0",
new.hist.mat0[j,], 1))
combo.lam.p <- paste(new.hist.mat[j, ], collapse = "*")
eq.solved[j] <- eval(parse(text = as.expression(combo.lam.p)))
}
exp.na[i] <- sum(eq.solved, na.rm = TRUE)
}
out.freqs.na <- matrix(NA, nrow = n.hist.na, ncol = 4)
colnames(out.freqs.na) <- c("Cohort", "Observed",
"Expected", "Chi-square")
rownames(out.freqs.na) <- cohort.na.un
out.freqs.na[, 1] <- m
out.freqs.na[, 2] <- freqs.na
out.freqs.na[, 3] <- exp.na
out.freqs.na[, 4] <- ((out.freqs.na[, "Observed"] -
out.freqs.na[, "Expected"])^2)/out.freqs.na[,
"Expected"]
missing.cohorts[[m]] <- list(out.freqs.na = out.freqs.na)
}
}
if (na.vals) {
chisq.missing <- do.call("rbind", lapply(missing.cohorts,
FUN = function(i) i$out.freqs.na))
if (n.cohort.not.na > 0) {
chisq.unobs.det <- N - sum(out.freqs[, "Expected"]) -
sum(chisq.missing[, "Expected"])
chisq.table <- rbind(out.freqs, chisq.missing)
}
else {
chisq.unobs.det <- N - sum(chisq.missing[, "Expected"])
chisq.table <- chisq.missing
}
}
else {
chisq.unobs.det <- N - sum(out.freqs[, "Expected"])
chisq.na <- 0
chisq.table <- out.freqs
}
chisq <- sum(chisq.table[, "Chi-square"]) + chisq.unobs.det
if (print.table) {
out <- list(chisq.table = chisq.table, chi.square = chisq,
model.type = "single-season")
}
else {
out <- list(chi.square = chisq, model.type = "single-season")
}
class(out) <- "mb.chisq"
return(out)
}
这将成功计算卡方值。然后我运行测试。
mb.gof.test.RN <- function (mod, nsim = 100, plot.hist = TRUE, ...){
mod.table <- mb.chisq.RN(mod)
out <- parboot(mod, statistic = function(i) mb.chisq.RN(i)$chi.square,
nsim = nsim)
p.value <- sum(out@t.star >= out@t0)/nsim
if (p.value == 0) {
p.display <- paste("<", 1/nsim)
}
else {
p.display = paste("=", round(p.value, digits = 4))
}
if (plot.hist) {
hist(out@t.star, main = paste("Bootstrapped MacKenzie and Bailey fit statistic (",
nsim, " samples)", sep = ""), xlim = range(c(out@t.star,
out@t0)), xlab = paste("Simulated statistic ", "(observed = ",
round(out@t0, digits = 2), ")", sep = ""))
title(main = bquote(paste(italic(P), " ", .(p.display))),
line = 0.5)
abline(v = out@t0, lty = "dashed", col = "red")
}
c.hat.est <- out@t0/mean(out@t.star)
gof.out <- list(model.type = mod.table$model.type, chisq.table = mod.table$chisq.table,
chi.square = mod.table$chi.square, t.star = out@t.star,
p.value = p.value, c.hat.est = c.hat.est, nsim = nsim)
class(gof.out) <- "mb.chisq"
return(gof.out)
}
>mb.gof.test.RN(fm9)
这会产生以下错误:
checkForRemoteErrors(val) 中的错误:3 个节点产生错误;第一个错误:找不到函数“mb.chisq.RN”
我不完全确定为什么这个错误只发生在一定数量的模拟之上,所以任何指针都会被大量接收。