我有一个模型,它结合了一个虚拟变量和一个连续变量来描述干扰后的结果。因此,如果有干扰,我会在干扰后的 1:16 进行时间测量。如果最近没有干扰,则将结果编码为 -1 的假时间值。这是数据集的表示:
library(lme4)
library(ggplot2)
df <- data.frame(ID = rep(c("a", "b", "c"), each = 20),
Time = c(1:16, -1, -1, -1, -1,
1:16, -1, -1, -1, -1,
1:16, -1, -1, -1, -1))
df$y <- 2 + 0.8*df$Time + 1*df$Time^2 + rnorm(30, 0, 3)
df[df$Time < 0,]$y <- rnorm(12, 5, 3)
df[df$ID == "b",]$y <- df[df$ID == "b",]$y + 5
df[df$ID == "c",]$y <- df[df$ID == "c",]$y - 5
df$Exposure <- "Before"
df[df$Time > 0,]$Exposure <- "After"
df$Exposure <- factor(df$Exposure, levels = c("Before", "After"))
ggplot(df[df$Time > 0,]) +
geom_point(aes(x = Time, y = y, colour = ID)) +
geom_point(data = df[df$Time < 0,], aes(x = -5, y = y, colour = ID))
我所追求的是将“无干扰”估计与干扰后的不同时间进行比较,以查看差异何时变得显着。
在建模之前,将“无干扰”数据分配给时间 0。
df[df$Time < 0,]$Time <- 0
m <- lmer(y ~ Exposure + poly(Time, 2) + (1|ID), data = df)
# output estimates
newdata <- data.frame(Exposure = c("Before", "After", "After", "After", "After", "After"),
Time = c(0, 1, 4, 8, 12, 16))
newdata$Pred <- predict(m, re.form = NA, newdata = newdata)
## plot looks good
ggplot(df[df$Time > 0,]) +
geom_point(aes(x = Time, y = y, colour = ID)) +
geom_point(data = df[df$Time == 0,], aes(x = -5, y = y, colour = ID)) +
geom_line(data = newdata[newdata$Exposure == "After",],
aes(x = Time, y = Pred)) +
geom_point(data = newdata[newdata$Exposure == "Before",],
aes(x = -5, y = Pred), colour = "red")
然后,我将如何比较,例如,之前的估计与在Time==3
、Time == 6
和处的估计之后Time == 9
?这样的事情会很棒,但我不知道如何解决我遇到的错误。
library(contrast)
library(multcomp)
cc <- contrast(m,
a = list(Time = 0, Exposure = "Before"),
b = list(Time = c(3, 6, 9), Exposure = "After"))
summary(glht(m, linfct = cc$X))
### 更新
在 rvl 的出色更改之后,我对我的实际数据进行了试运行,并遇到了一个新问题。我的实际时间变量不是整数,但我想在整数范围内进行预测。当我更新玩具示例时,嵌套似乎中断了:
df$Time <- df$Time + rnorm(60, 0, 0.5)
df[df$Exposure == "Before",]$Time <- -1.12
m <- lmer(y ~ Exposure + poly(Time, 2) + (1|ID), data = df)
# freshly installed emmeans from github
emm = emmeans(m, "Time", at = list(Time = c(0,3,6,9)))
emm ## no longer get the nesting info, and the preds aren't nested
在我自己的数据中(并使用at
规范,我实际上只得到一行 for Time == 0
and Exposure == Before
,仅此而已 - 输出中没有其他内容......有什么建议吗??
## 更新2
出于某种原因,该解决方案适用于玩具示例,但不适用于我自己的数据......这是我数据集的一小部分。模型拟合是奇异的,但我遇到的问题emmeans
与我的整个数据集相同......帮助?
df <- structure(list(ID = structure(c(2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L,
1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 2L, 2L), .Label = c("B", "A"), class = "factor"),
Exposure = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L), .Label = c("No exposure", "Exposure"
), class = "factor"), Time = c(0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 4.78757545912946, 9.63531173739354, 5.47889766247861,
7.17017886302881, 1.43155423003375, 3.72391354120779, 2.56353688399906,
8.29779117320654, 9.52304006615339, 9.48174174807695, 0.859601950498583,
4.63141168677387, 7.92347302279951, 7.92067346608815, 5.23250024053785,
5.57671787587839, 1.85126003367584, 3.1097216702916, 7.72389534567839,
9.36144591805227, 2.70213603445334, 1.84811002303022, 6.82448971585652,
7.88336338096561, 3.84031339520175, 5.62874085650497, 4.0972590990481,
2.09535527965164, 2.22160757456982, 7.35862943664427, 7.41826702411403,
8.24309337727667, 4.7943847267765, 5.8840472004994, 7.02963322046381
), Response = c(-7.16922413711838, 143.482571506177, 16.45347120693,
25.022565770909, -55.8024015971315, -124.925019624537, -16.4000310854958,
40.9499232825204, 2.46651714407957, -34.3558611547229, -80.1711009500979,
-58.5220697399603, 17.6390452197579, -11.2077688506688, 87.0618648836916,
113.611468732, -27.1400972587652, -30.0256851366867, -111.149731873181,
-24.2689502403869, -16.2737794106996, -125.618994529607,
95.9640135688539, 46.4163972081548, 6.72470222784859, -0.148508667228167,
-118.897875455802, 28.6093848128793, -57.5632050845714, 31.390260468939,
27.6826377837027, -40.7112943346364, -53.5934755706868, 27.0754421268185,
165.146183257597, 39.6762439690417, -9.74912218853661, 18.3454700992841,
33.8006770750647, -18.6013173700368, 12.7360264627221, 178.646948999019,
93.5496871933183, -8.68468960982507, 2.86668462850576)), row.names = c(1L,
3L, 5L, 7L, 9L, 11L, 13L, 15L, 17L, 19L, 21L, 23L, 25L, 27L,
29L, 31L, 33L, 35L, 37L, 39L, 41L, 43L, 45L, 47L, 49L, 51L, 53L,
55L, 57L, 59L, 61L, 63L, 65L, 67L, 69L, 71L, 73L, 75L, 77L, 79L,
81L, 83L, 85L, 87L, 89L), class = c("tbl_df", "tbl", "data.frame"))
运行模型和 emmeans:
m <- lmer(Response ~ Exposure + poly(Time, 2) + (1|ID), data = df)
## this only gives one row instead of 8?
emmeans(m, c("Time", "Exposure"), at = list(Time = c(0,3,6,9)))
## when I specify the nesting myself, I get a "multiple actual arguments" error...
emmeans(m, c("Time", "Exposure"), at = list(Time = c(0,3,6,9)),
nesting = "Time %in% Exposure")