我正在运行具有多个因素和连续预测变量的回归模型。我需要跟进多重比较。在这篇文章之后,我能够emmeans
正确运行,并且似乎得到了适当的成对比较。但是,当我尝试获取 CLD 输出时失败了。欢迎大家提出意见。
# part of my dataset
df.sub <- structure(list(Year = c(2014, 2014, 2014, 2014, 2014, 2014, 2014,
2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014,
2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014,
2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2015,
2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015,
2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015,
2015, 2015, 2015, 2015, 2015, 2015), Transect = structure(c(3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L), .Label = c("Transect1", "Transect2", "Transect3",
"Transect4"), class = "factor"), Dist = c(450, 450, 450,
450, 986, 986, 986, 986, 986, 1996, 1996, 1996, 1996, 4082, 4082,
4082, 72, 72, 72, 72, 72, 292, 292, 292, 292, 555, 555, 555,
555, 555, 1055, 1055, 1055, 1055, 1650, 1650, 1650, 1650, 1650,
450, 450, 450, 987, 987, 987, 1994, 1994, 1994, 4078, 4078, 4078,
120, 120, 120, 325, 325, 325, 560, 560, 560, 1070, 1070, 1070,
1070, 1650, 1650, 1650, 1650), Response = c(12000, 13000, 12000,
12000, 13000, 13000, 13000, 12000, 13000, 13000, 13000, 12000,
12000, 9600, 11000, 10000, 6100, 8400, 5500, 6100, 6300, 7200,
7200, 6800, 6700, 7800, 6800, 6400, 6000, 5700, 8300, 7900, 8400,
8200, 9000, 9900, 7900, 8100, 7600, 12000, 14000, 12000, 13000,
14000, 14000, 14000, 12000, 15000, 13000, 12000, 11000, 8400,
9600, 8700, 7300, 7300, 7100, 5900, 7100, 6500, 8600, 8100, 7800,
7400, 10000, 9800, 7500, 8500), Covariate = c(67, 49, 62, 70, 73,
60, 61, 68, 72, 54, 44, 43, 41, 52, 44, 47, 9.4, 18.3, 10.3,
14.4, 13.9, 14, 18.3, 10.7, 12, 23.4, 27.1, 11.6, 8.6, 8.8, 34.6,
36, 38, 30.7, 40.9, 41, 35.3, 25.7, 23.7, 73, 72, 72, 62, 73,
73, 59, 51, 63, 55, 50, 46, 20.9, 36.9, 24.5, 27.6, 29.4, 28,
14.5, 27.4, 17, 34.7, 38.8, 39, 34.1, 55.2, 56, 44.6, 35.9)), row.names = c(NA,
-68L), class = c("tbl_df", "tbl", "data.frame"))
分析:
library(dplyr)
library(tidyr)
library(emmeans)
# data adjustment
df.sub$TransDist <- log(df.sub$Dist + 1)
df.sub$YearFac <- as.factor(df.sub$Year)
df.sub$Transect <- droplevels(df.sub$Transect)
m <- lm(log(Response) ~ poly(TransDist, 2) * YearFac * Transect +
log(Covariate), data = df.sub)
# prediction grid:
new <- unique(select(df.sub, Transect, YearFac)) %>%
crossing(Dist = c(0, 500, 1000, 4000)) %>%
filter(Dist < 4000 | Transect != "Transect3") %>%
mutate(TransDist = log(Dist + 1),
Covariate = rnorm(n(), 50, 5))
termsX <- terms(model.frame(m, data = df.sub))
X_new2 <- model.matrix(delete.response(termsX), data = new)
beta = coef(m)
new$pred <- X_new2 %*% beta
# now predict with emmeans and compare
ems <- emmeans(m, ~YearFac | Transect * TransDist + Covariate, data = new,
covnest = TRUE, cov.reduce = FALSE)
as.data.frame(ems) %>%
select(Transect, YearFac, TransDist, emmean) %>%
right_join(new) ## when looking at the output, the values are identical, which is great
现在计算成对比较。这有效(基于与上述输出的手动比较)。
contrast(ems, "pairwise", by = c("Transect", "TransDist"),
data = new, covnest = TRUE, cov.reduce = FALSE)
但是,当我运行时CLD
,我得到一个错误......
CLD(ems)
Error in x@linfct[i, , drop = FALSE] : subscript out of bounds