我有一个长格式的数据集,其中包含在 19 名患者中进行的 60 次重复测量 ( ID
)。患者进行了不同数量的测量(2 次测量 [n=11],然后是 5 次测量 [n=5]、3 次 [n=1]、4 次 [n=1] 和 6 次测量 [n=1],具有不同的时间间隔(fu_time
以年为单位)。数据如下所示:
library(tidyverse)
library(magrittr)
library(broom.mixed)
library(nlme)
library(ggeffects)
library(sjPlot)
mydata <-
structure(list(ID = c(2, 2, 2, 2, 2, 2, 3, 3, 4, 4, 4, 4, 4,
7, 7, 8, 8, 8, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 20, 20,
21, 21, 22, 22, 24, 24, 24, 24, 25, 25, 27, 27, 29, 29, 29, 29,
29, 30, 30, 31, 31, 34, 34, 34, 34, 34, 36, 36, 37, 37), fu_time = c(0,
0.545, 2.168, 2.68, 3.184, 5.695, 0, 1.892, 0, 0.939, 1.451,
1.955, 4.353, 0, 4.449, 0, 0.465, 4.005, 0, 0.364, 0.857, 1.361,
3.759, 0, 0.46, 0.953, 1.457, 4.03, 0, 0.268, 0, 0.383, 0, 0.556,
0, 0.665, 1.158, 1.662, 0, 5.558, 0, 1.207, 0, 1.339, 1.793,
2.335, 4.194, 0, 0.734, 0, 2.27, 0, 0.613, 1.106, 1.648, 4.197,
0, 0.556, 0, 0.468), sex = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 1L, 1L), .Label = c("Female", "Male"), class = "factor"),
age = c(56.6, 56.6, 56.6, 56.6, 56.6, 56.6, 43.2, 43.2, 52,
52, 52, 52, 52, 58.5, 58.5, 57.4, 57.4, 57.4, 57.8, 57.8,
57.8, 57.8, 57.8, 52.4, 52.4, 52.4, 52.4, 52.4, 70.8, 70.8,
61.4, 61.4, 59.2, 59.2, 61.5, 61.5, 61.5, 61.5, 48.9, 48.9,
54.2, 54.2, 50.1, 50.1, 50.1, 50.1, 50.1, 55.4, 55.4, 48.6,
48.6, 64.2, 64.2, 64.2, 64.2, 64.2, 68.3, 68.3, 66.7, 66.7
), age_standardized = c(-0.112074178471796, -0.112074178471796,
-0.112074178471796, -0.112074178471796, -0.112074178471796,
-0.112074178471796, -1.75787581301653, -1.75787581301653,
-0.677050858987151, -0.677050858987151, -0.677050858987151,
-0.677050858987151, -0.677050858987151, 0.121285754784546,
0.121285754784546, -0.0138173644691261, -0.0138173644691261,
-0.0138173644691261, 0.035311042532209, 0.035311042532209,
0.035311042532209, 0.035311042532209, 0.035311042532209,
-0.627922451985816, -0.627922451985816, -0.627922451985816,
-0.627922451985816, -0.627922451985816, 1.6319842700756,
1.6319842700756, 0.477466705544226, 0.477466705544226, 0.207260467036883,
0.207260467036883, 0.48974880729456, 0.48974880729456, 0.48974880729456,
0.48974880729456, -1.0577960132475, -1.0577960132475, -0.406844620479807,
-0.406844620479807, -0.910410792243494, -0.910410792243494,
-0.910410792243494, -0.910410792243494, -0.910410792243494,
-0.259459399475802, -0.259459399475802, -1.0946423184985,
-1.0946423184985, 0.821365554553573, 0.821365554553573, 0.821365554553573,
0.821365554553573, 0.821365554553573, 1.32493172631726, 1.32493172631726,
1.12841809831192, 1.12841809831192), hypertension = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L), .Label = c("No",
"Yes"), class = "factor"), total_cholesterol = c(3.8, 3.8,
3.8, 3.8, 3.8, 3.8, 3.6, 3.6, 5.208, 5.208, 5.208, 5.208,
5.208, 5.2, 5.2, 4.5, 4.5, 4.5, 3.9, 3.9, 3.9, 3.9, 3.9,
4.3, 4.3, 4.3, 4.3, 4.3, 3.94, 3.94, 5.468, 5.468, 4.76,
4.76, 4.2, 4.2, 4.2, 4.2, 3.1, 3.1, 4.796, 4.796, 5.4, 5.4,
5.4, 5.4, 5.4, 4.7, 4.7, 5.516, 5.516, 4.6, 4.6, 4.6, 4.6,
4.6, 4.096, 4.096, 7.2, 7.2), log_outcome = c(8.42915657313723,
8.7009137691239, 8.86077267189618, 8.96246948326419, 9.03130758075829,
9.12345262370784, 5.92634460299296, 6.50615330482121, 8.82605479483354,
9.28945716416388, 9.31314123590531, 9.29155486879811, 9.54380093520124,
6.3387281707835, 8.42119585511192, 8.09141312479266, 8.08323601190682,
8.08345205795634, 8.84248862871191, 8.79677243460523, 8.83902619880979,
8.88249368941836, 9.3739060089624, 9.21813577692571, 9.41892283483421,
9.51110081224594, 9.44403926910489, 9.90810686887857, 11.2473988053982,
11.3367621074836, 9.14484138974349, 9.13587164023184, 9.15404308484749,
9.29984347265724, 9.6945365587177, 9.51513026752813, 9.53564697229274,
9.59066055858977, 7.06643401737976, 7.7842004897582, 8.40862917559632,
8.74727168230293, 4.10135196585983, 5.72042611547146, 5.91627217786197,
6.60537958438705, 7.17379497306441, 9.02372811447485, 9.162613156671,
6.99925650048289, 7.38343760944252, 9.28357810757782, 9.35451537017142,
9.34029788589738, 9.34596520171378, 9.29507953850898, 10.4746495321547,
10.5224161779052, 8.94686953356746, 8.96847859420429)), row.names = c(NA,
60L), class = "data.frame")
head(mydata)
ID fu_time sex age age_standardized hypertension total_cholesterol log_outcome
1 2 0.000 Female 56.6 -0.1120742 No 3.8 8.429157
2 2 0.545 Female 56.6 -0.1120742 No 3.8 8.700914
3 2 2.168 Female 56.6 -0.1120742 No 3.8 8.860773
4 2 2.680 Female 56.6 -0.1120742 No 3.8 8.962469
5 2 3.184 Female 56.6 -0.1120742 No 3.8 9.031308
6 2 5.695 Female 56.6 -0.1120742 No 3.8 9.123453
目的是评估结果进展的风险因素。由于偏斜,结果已进行自然对数转换。我用相应的指数固定效应指定的线性混合模型:
ris <-
lme(fixed=log_outcome ~ fu_time + age*fu_time + sex*fu_time + hypertension*fu_time +
total_cholesterol*fu_time,
random=~ 1 + fu_time|ID,
data=mydata,
method="ML")
# get coefficients
fixef_coefs <-
tidy(ris, conf.int=TRUE) %>%
select(term, estimate, conf.low, conf.high, p.value) %>%
mutate_at(vars(estimate, conf.low, conf.high), ~round(exp(.), 2)) %>%
mutate(p.value=round(p.value, 3))
fixef_coefs <- fixef_coefs[1:10, ] # remove random effects from dataset
fixef_coefs
# A tibble: 10 x 5
term estimate conf.low conf.high p.value
<chr> <dbl> <dbl> <dbl> <dbl>
1 (Intercept) 0.67 0.03 16.8 0.82
2 fu_time 2.21 1.1 4.45 0.043
3 age 1.23 1.15 1.3 0
4 sexMale 2.02 0.8 5.15 0.162
5 hypertensionYes 0.4 0.16 1 0.07
6 total_cholesterol 0.58 0.37 0.91 0.032
7 fu_time:age 0.98 0.97 0.99 0.001
8 fu_time:sexMale 1.08 0.9 1.29 0.42
9 fu_time:hypertensionYes 1.22 1.04 1.43 0.03
10 fu_time:total_cholesterol 1.12 1.03 1.22 0.018
因此,年龄、高血压和总胆固醇与结果随时间的变化有关,因为它们与fu_time
. 我的下一步是绘制例如高血压的边际效应,最好使用 ggemmeans/sjplot。
然而,我坚持如何实现这一目标。我已经阅读了关于 ggeffects 的小插曲、ggeffects和 ggemmeans之间的区别,以及在 sjplot 中绘制交互效果。
我已经尝试了几件事。
(1) Running ggemmeans(ris, type="random", terms="fu_time:age")
,它给出的错误是模型中没有具有该名称的术语。
(2) 跑步plot_model(ris, type = "int")
。这会为协变量产生罐子,但这有两个问题:首先,这些图在对数尺度上,而我希望它们在反变换/指数尺度上,还有两个我不确定这些是否是实际的与 fu_time 的交互项。
最好我想使用 ggemmeans 制作我自己的/一个新的数据集(因此将连续变量保持在它们的平均值,将分类变量保持在它们观察到的样本比例),这样我就可以对变量进行计算,然后自己制作图。