0

我有一个模型,它结合了一个虚拟变量和一个连续变量来描述干扰后的结果。因此,如果有干扰,我会在干扰后的 1:16 进行时间测量。如果最近没有干扰,则将结果编码为 -1 的假时间值。这是数据集的表示:

library(lme4)
library(ggplot2)

df <- data.frame(ID = rep(c("a", "b", "c"), each = 20),
            Time = c(1:16, -1, -1, -1, -1,
                    1:16, -1, -1, -1, -1,
                    1:16, -1, -1, -1, -1))
df$y <- 2 + 0.8*df$Time + 1*df$Time^2 + rnorm(30, 0, 3)
df[df$Time < 0,]$y <- rnorm(12, 5, 3)

df[df$ID == "b",]$y <-  df[df$ID == "b",]$y + 5
df[df$ID == "c",]$y <-  df[df$ID == "c",]$y - 5
df$Exposure <- "Before"
df[df$Time > 0,]$Exposure <- "After"
df$Exposure <- factor(df$Exposure, levels = c("Before", "After"))

ggplot(df[df$Time > 0,]) +
  geom_point(aes(x = Time, y = y, colour = ID)) +
  geom_point(data = df[df$Time < 0,], aes(x = -5, y = y, colour = ID)) 

我所追求的是将“无干扰”估计与干扰后的不同时间进行比较,以查看差异何时变得显着。

在建模之前,将“无干扰”数据分配给时间 0。

df[df$Time < 0,]$Time <- 0  
m <- lmer(y ~ Exposure + poly(Time, 2) + (1|ID), data = df) 

# output estimates
newdata <- data.frame(Exposure = c("Before", "After", "After", "After", "After", "After"),
                    Time = c(0, 1, 4, 8, 12, 16))
newdata$Pred <- predict(m, re.form = NA, newdata = newdata)

## plot looks good
ggplot(df[df$Time > 0,]) +
geom_point(aes(x = Time, y = y, colour = ID)) +
geom_point(data = df[df$Time == 0,], aes(x = -5, y = y, colour = ID)) +
geom_line(data = newdata[newdata$Exposure == "After",], 
   aes(x = Time, y = Pred)) +
geom_point(data = newdata[newdata$Exposure == "Before",], 
   aes(x = -5, y = Pred), colour = "red") 

然后,我将如何比较,例如,之前的估计与在Time==3Time == 6和处的估计之后Time == 9?这样的事情会很棒,但我不知道如何解决我遇到的错误。

library(contrast)
library(multcomp)

cc <- contrast(m, 
  a = list(Time = 0, Exposure = "Before"), 
  b = list(Time = c(3, 6, 9), Exposure = "After")) 
summary(glht(m, linfct = cc$X)) 

### 更新

在 rvl 的出色更改之后,我对我的实际数据进行了试运行,并遇到了一个新问题。我的实际时间变量不是整数,但我想在整数范围内进行预测。当我更新玩具示例时,嵌套似乎中断了:

df$Time <- df$Time + rnorm(60, 0, 0.5)
df[df$Exposure == "Before",]$Time <- -1.12
m <- lmer(y ~ Exposure + poly(Time, 2) + (1|ID), data = df)
# freshly installed emmeans from github
emm = emmeans(m, "Time", at = list(Time = c(0,3,6,9)))
emm ## no longer get the nesting info, and the preds aren't nested

在我自己的数据中(并使用at规范,我实际上只得到一行 for Time == 0and Exposure == Before,仅此而已 - 输出中没有其他内容......有什么建议吗??

## 更新2

出于某种原因,该解决方案适用于玩具示例,但不适用于我自己的数据......这是我数据集的一小部分。模型拟合是奇异的,但我遇到的问题emmeans与我的整个数据集相同......帮助?

df <- structure(list(ID = structure(c(2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 
2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 
1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 1L, 2L, 2L), .Label = c("B", "A"), class = "factor"), 
Exposure = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L), .Label = c("No exposure", "Exposure"
), class = "factor"), Time = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 4.78757545912946, 9.63531173739354, 5.47889766247861, 
7.17017886302881, 1.43155423003375, 3.72391354120779, 2.56353688399906, 
8.29779117320654, 9.52304006615339, 9.48174174807695, 0.859601950498583, 
4.63141168677387, 7.92347302279951, 7.92067346608815, 5.23250024053785, 
5.57671787587839, 1.85126003367584, 3.1097216702916, 7.72389534567839, 
9.36144591805227, 2.70213603445334, 1.84811002303022, 6.82448971585652, 
7.88336338096561, 3.84031339520175, 5.62874085650497, 4.0972590990481, 
2.09535527965164, 2.22160757456982, 7.35862943664427, 7.41826702411403, 
8.24309337727667, 4.7943847267765, 5.8840472004994, 7.02963322046381
), Response = c(-7.16922413711838, 143.482571506177, 16.45347120693, 
25.022565770909, -55.8024015971315, -124.925019624537, -16.4000310854958, 
40.9499232825204, 2.46651714407957, -34.3558611547229, -80.1711009500979, 
-58.5220697399603, 17.6390452197579, -11.2077688506688, 87.0618648836916, 
113.611468732, -27.1400972587652, -30.0256851366867, -111.149731873181, 
-24.2689502403869, -16.2737794106996, -125.618994529607, 
95.9640135688539, 46.4163972081548, 6.72470222784859, -0.148508667228167, 
-118.897875455802, 28.6093848128793, -57.5632050845714, 31.390260468939, 
27.6826377837027, -40.7112943346364, -53.5934755706868, 27.0754421268185, 
165.146183257597, 39.6762439690417, -9.74912218853661, 18.3454700992841, 
33.8006770750647, -18.6013173700368, 12.7360264627221, 178.646948999019, 
93.5496871933183, -8.68468960982507, 2.86668462850576)), row.names = c(1L, 
3L, 5L, 7L, 9L, 11L, 13L, 15L, 17L, 19L, 21L, 23L, 25L, 27L, 
29L, 31L, 33L, 35L, 37L, 39L, 41L, 43L, 45L, 47L, 49L, 51L, 53L, 
55L, 57L, 59L, 61L, 63L, 65L, 67L, 69L, 71L, 73L, 75L, 77L, 79L, 
81L, 83L, 85L, 87L, 89L), class = c("tbl_df", "tbl", "data.frame"))

运行模型和 emmeans:

m <- lmer(Response ~ Exposure + poly(Time, 2) + (1|ID), data = df)
## this only gives one row instead of 8?
emmeans(m, c("Time", "Exposure"),  at = list(Time = c(0,3,6,9)))
## when I specify the nesting myself, I get a "multiple actual arguments" error...
emmeans(m, c("Time", "Exposure"),  at = list(Time = c(0,3,6,9)), 
   nesting = "Time %in% Exposure")
4

1 回答 1

1

在您澄清之后,我认为这可以解决问题:

require(emmeans)
emm = emmeans(m, c("Time", "Exposure"),
    at = list(Time = c(0,3,6,9)))

这会创建八个预测:四个用于"After"在 0、3、6、0 时间的曝光,然后"Before是相同的四次“”(请注意,在因子级别的默认字母顺序中,After 在 Before 之前)。因此,我认为你的对比需要可以通过

contrast(emm, list(
    c3 = c(0, 1, 0, 0,  -1, 0, 0, 0),
    c6 = c(0, 0, 1, 0,  -1, 0, 0, 0),
    c9 = c(0, 0, 0, 1,  -1, 0, 0, 0)))

附录

实际上,此模型具有嵌套结构,其中Time嵌套在Exposure. 当嵌套的“因子”是协变量而不是常规因子时,我发现了一个emmeans::ref_grid无法检测到这种嵌套的错误。现在已修复此问题(您需要从 github 站点安装它),现在执行起来要简单得多,基本上恢复到我以前版本的这个答案:

> emm <- emmeans(m, "Time", cov.reduce = FALSE)
NOTE: A nesting structure was detected in the fitted model:
    Time %in% Exposure

指定cov.reduce = FALSE要求包括所有协变量的所有唯一级别。或者(如果周围有其他协变量,建议使用)是使用at = list(Time = 0:17).

> emm
 Time Exposure    emmean       SE   df  lower.CL  upper.CL
    0 Before     4.54321 2.817328 2.30  -6.18006  15.26648
    1 After      5.28918 2.907673 2.61  -4.80080  15.37916
    2 After      8.61589 2.823986 2.32  -2.05285  19.28462
    3 After     14.01341 2.776795 2.17   2.92581  25.10101
    4 After     21.48175 2.755698 2.11  10.18026  32.78323
    5 After     31.02091 2.751049 2.09  19.66982  42.37199
    6 After     42.63088 2.754742 2.10  31.31927  53.94250
    7 After     56.31168 2.760612 2.12  45.06163  67.56173
    8 After     72.06329 2.764565 2.13  60.85388  83.27270
    9 After     89.88572 2.764565 2.13  78.67631 101.09513
   10 After    109.77897 2.760612 2.12  98.52892 121.02903
   11 After    131.74304 2.754742 2.10 120.43143 143.05466
   12 After    155.77793 2.751049 2.09 144.42685 167.12901
   13 After    181.88363 2.755698 2.11 170.58215 193.18512
   14 After    210.06015 2.776795 2.17 198.97255 221.14776
   15 After    240.30750 2.823986 2.32 229.63876 250.97623
   16 After    272.62565 2.907673 2.61 262.53568 282.71563

Degrees-of-freedom method: kenward-roger 
Confidence level used: 0.95 

请注意,虽然我要求 just Time,但Exposure它也作为一种“by”变量出现,因为它嵌套了time. 现在,让我们将第一个与其他每个进行比较:

> contrast(emm, "trt.vs.ctrl1")
 contrast             estimate        SE df t.ratio p.value
 1,After - 0,Before    0.74597 1.3643132 54   0.547  0.9953
 2,After - 0,Before    4.07267 1.1754498 54   3.465  0.0137
 3,After - 0,Before    9.47020 1.0570597 54   8.959  <.0001
 4,After - 0,Before   16.93854 1.0003291 54  16.933  <.0001
 5,After - 0,Before   26.47770 0.9874492 54  26.814  <.0001
 6,After - 0,Before   38.08767 0.9976910 54  38.176  <.0001
 7,After - 0,Before   51.76847 1.0137883 54  51.064  <.0001
 8,After - 0,Before   67.52008 1.0245019 54  65.905  <.0001
 9,After - 0,Before   85.34251 1.0245019 54  83.301  <.0001
 10,After - 0,Before 105.23576 1.0137883 54 103.804  <.0001
 11,After - 0,Before 127.19983 0.9976910 54 127.494  <.0001
 12,After - 0,Before 151.23472 0.9874492 54 153.157  <.0001
 13,After - 0,Before 177.34042 1.0003291 54 177.282  <.0001
 14,After - 0,Before 205.51694 1.0570597 54 194.423  <.0001
 15,After - 0,Before 235.76429 1.1754498 54 200.574  <.0001
 16,After - 0,Before 268.08244 1.3643132 54 196.496  <.0001

P value adjustment: dunnettx method for 16 tests

附录 2

关于更新#2,问题是嵌套的东西不能正常工作,除非你提供数据中出现的实际值。为了说明(使用更新的数据和模型):

> emmeans(m, c("Time", "Exposure"),  at = list(Time = df$Time[c(1,15,25,35)]))
NOTE: A nesting structure was detected in the fitted model:
    Time %in% Exposure
     Time Exposure       emmean       SE    df  lower.CL upper.CL
 0.000000 No exposure -1.027749 22.90015 12.81 -50.57545 48.51995
 1.431554 Exposure    -3.001869 29.90185 22.16 -64.98937 58.98563
 5.232500 Exposure    19.464761 19.59438  5.42 -29.75007 68.67959
 3.840313 Exposure    17.361564 18.56171  4.03 -34.01995 68.74308

Degrees-of-freedom method: kenward-roger 
Confidence level used: 0.95

明确提供嵌套的另一部分似乎是一个错误,我需要修复它。

这是解决所有问题的一种方法:首先,获取 和 组合的参考网格ExposureTime抑制嵌套(这在对 的调用ref_grid()起作用:

rg = ref_grid(m, at = list(Time = c(0,3,6,9)), nesting = NULL)

然后挑出有意义的:

emm = rg[c(1,4,6,8)]
confint(emm)

...你得到:

 Exposure    Time prediction       SE    df  lower.CL upper.CL
 No exposure    0  -1.027749 22.90015 12.81 -50.57545 48.51995
 Exposure       3  12.665198 18.76906  4.18 -38.57825 63.90864
 Exposure       6  17.596368 19.07591  5.03 -31.35612 66.54885
 Exposure       9 -10.353097 24.21000 14.49 -62.11348 41.40728

Degrees-of-freedom method: kenward-roger 
Confidence level used: 0.95

然后,要获得您需要的比较:

contrast(emm, "trt.vs.ctrl1")

产生:

 contrast                    estimate       SE    df t.ratio p.value
 Exposure,3 - No exposure,0 13.692947 28.36206 40.29   0.483  0.9033
 Exposure,6 - No exposure,0 18.624117 28.68533 40.18   0.649  0.8257
 Exposure,9 - No exposure,0 -9.325349 32.59268 40.01  -0.286  0.9669

P value adjustment: dunnettx method for 3 tests

附录 3

这是一个更好的解决方法:创建一个具有所需Time值的假数据集,并在参数中指定该数据集data

fakedf = df[c(1,21,23,25), ]
fakedf$Time = c(0,3,6,9)

emmeans(m, trt.vs.ctrl1 ~ Time, data = fakedf, 
    covnest = TRUE, cov.reduce = FALSE)

...产生此输出:

NOTE: A nesting structure was detected in the fitted model:
    Time %in% Exposure

$`emmeans`
 Time Exposure        emmean       SE    df  lower.CL upper.CL
    0 No exposure  -1.027749 22.90015 12.81 -50.57545 48.51995
    3 Exposure     12.665198 18.76906  4.18 -38.57825 63.90864
    6 Exposure     17.596368 19.07591  5.03 -31.35612 66.54885
    9 Exposure    -10.353097 24.21000 14.49 -62.11348 41.40728

Degrees-of-freedom method: kenward-roger 
Confidence level used: 0.95 

$contrasts
 contrast                    estimate       SE    df t.ratio p.value
 3,Exposure - 0,No exposure 13.692947 28.36206 40.29   0.483  0.9033
 6,Exposure - 0,No exposure 18.624117 28.68533 40.18   0.649  0.8257
 9,Exposure - 0,No exposure -9.325349 32.59268 40.01  -0.286  0.9669

P value adjustment: dunnettx method for 3 tests
于 2018-11-18T23:37:34.717 回答