1

我是 R 的初学者。我需要您的帮助来自动化这些分析并获得带有结果的摘要输出。

我有 4 个像这样的不同数据框(见下文),在阈值列中具有相同的标题和相同的值:

   Set   Threshold  R2           P          Coefficient  Standard.Error  Num_SNP
Base  0.0001     0.000233304  0.66047    0.0332613    0.0757204       47
Base  0.001      0.000387268  0.571772   -0.0438782   0.0775996       475
Base  0.05       0.00302399   0.114364   0.129474     0.082004        14164
Base  0.1        0.00252797   0.14897    0.117391     0.0813418       24616
Base  0.2        0.00481908   0.0465384  0.163571     0.0821767       41524
Base  0.3        0.00514761   0.0398082  0.170058     0.0827237       55307
Base  0.4        0.00699506   0.0166685  0.200571     0.083783        66943
Base  0.5        0.00634181   0.0226301  0.192314     0.0843623       76785

对于阈值列中的每个匹配值,我想使用 R 中的 metafor 包对 4 个数据帧的相应效应大小(在系数列中)和标准误差进行元分析。

使用 metafor 包:

rma.uni(yi=c(Coefficient_1,Coefficient_2,Coefficient_3,Coefficient_4),sei=c(Standard.Error_1,Standard.Error_2,Standard.Error_3,Standard.Error_4), measure="GEN", method='FE',intercept=T,weights=c(sample_size1,sample_size2,sample_size3,sample_size4))

如何自动化分析并获得包含每个阈值结果的摘要数据框?

4

1 回答 1

0

嗨,这应该让你开始。本质上,您可以遍历所有阈值,从所有 4 个数据帧中提取与每个阈值匹配的行到一个新数据帧中并运行您的元分析

library(metafor)
# Make some fake data resembling your own
df1 = data.frame(Set=rep("Base",8), Threshold=c(0.0001,0.001,0.05,seq(0.1,0.5,0.1)),
                 R2=runif(8,0.001,0.005),P=runif(8,0.001,1),Coefficient=runif(8,-0.1,0.2),
                 Standard.Error=runif(8,0.07,0.08),Num_SNP=sample(1:1000,8))
df2 = data.frame(Set=rep("Base",8), Threshold=c(0.0001,0.001,0.05,seq(0.1,0.5,0.1)),
                 R2=runif(8,0.001,0.005),P=runif(8,0.001,1),Coefficient=runif(8,-0.1,0.2),
                 Standard.Error=runif(8,0.07,0.08),Num_SNP=sample(1:1000,8))
df3 = data.frame(Set=rep("Base",8), Threshold=c(0.0001,0.001,0.05,seq(0.1,0.5,0.1)),
                 R2=runif(8,0.001,0.005),P=runif(8,0.001,1),Coefficient=runif(8,-0.1,0.2),
                 Standard.Error=runif(8,0.07,0.08),Num_SNP=sample(1:1000,8))
df4 = data.frame(Set=rep("Base",8), Threshold=c(0.0001,0.001,0.05,seq(0.1,0.5,0.1)),
                 R2=runif(8,0.001,0.005),P=runif(8,0.001,1),Coefficient=runif(8,-0.1,0.2),
                 Standard.Error=runif(8,0.07,0.08),Num_SNP=sample(1:1000,8))

Thresholds = unique(df1$Threshold)

Results <- NULL
for(i in 1:length(Thresholds)){
  idf = rbind(df1[df1$Threshold==Thresholds[i],],
              df2[df2$Threshold==Thresholds[i],],
              df3[df3$Threshold==Thresholds[i],],
              df4[df4$Threshold==Thresholds[i],])
  i.meta <- rma.uni(yi=idf$Coefficient,sei=idf$Standard.Error, measure="GEN", method='FE',intercept=T,
                    weights=idf$Num_SNP)
  Results <- rbind(Results, c(Threshold=Thresholds[i],beta=i.meta$beta,se=i.meta$se,
                              zval=i.meta$zval,pval=i.meta$pval,ci.lb=i.meta$ci.lb,
                              ci.ub=i.meta$ci.ub,QEp=i.meta$QEp))
}
Results <- data.frame(Results)
Results

应该给你:

  Threshold         beta         se       zval       pval        ci.lb     ci.ub        QEp
1     1e-04 -0.012079013 0.04715546 -0.2561530 0.79783270 -0.104502022 0.0803440 0.08700919
2     1e-03  0.068932388 0.04006086  1.7206917 0.08530678 -0.009585452 0.1474502 0.22294419
3     5e-02  0.050069503 0.04094881  1.2227340 0.22143020 -0.030188694 0.1303277 0.07342661
4     1e-01  0.102598016 0.04188183  2.4497022 0.01429744  0.020511132 0.1846849 0.07380669
5     2e-01  0.069482160 0.04722693  1.4712401 0.14122619 -0.023080930 0.1620452 0.95494364
6     3e-01  0.009793206 0.05098346  0.1920859 0.84767489 -0.090132542 0.1097190 0.12191340
7     4e-01  0.030432884 0.03967771  0.7670021 0.44308028 -0.047333994 0.1081998 0.86270334
8     5e-01  0.073511575 0.03997485  1.8389458 0.06592316 -0.004837683 0.1518608 0.12333557
于 2020-05-06T12:11:09.330 回答