0

我想找到 FDR 调整后 p 值为 p<.05 的 SNPS 的数量。然而,我的for loopif声明没有有效地找到 p<.05 的 SNP 数。

我的数据集有一P列表示 p 值和 1422 个观察值。

> dput(dat[1:5,])
structure(list(CHR = c(6L, 6L, 6L, 6L, 6L), SNP = c("rs9257319", 
"rs2269553", "rs2894066", "rs3763338", "rs1233508"), BP = c(28959616L, 
28984488L, 29001906L, 29002290L, 29005612L), A1 = c(2L, 1L, 1L, 
1L, 2L), A2 = c(1L, 2L, 2L, 2L, 1L), T = c(6L, 9L, 13L, 4L, 8L
), U = c(7L, 9L, 9L, 3L, 13L), OR = c(0.8571, 1, 1.444, 1.333, 
0.6154), L95 = c(0.2881, 0.397, 0.6174, 0.2984, 0.2551), U95 = c(2.55, 
2.519, 3.379, 5.957, 1.485), CHISQ = c(0.07692, 0, 0.7273, 0.1429, 
1.19), P = c(0.7815, 1, 0.3938, 0.7055, 0.2752)), row.names = c(NA, 
5L), class = "data.frame")

qvalue我使用库计算了 q 值。

library(qvalue)
library(dplyr)

fdr <- qvalue(dat$P, fdr.level=0.05)

最后,我想找到 FDR 调整后 p 值 p<.05 的 SNP 数量。

# SNPs that have FDR adjusted p-values of p<.05

for(i in fdr$qvalues){
  if(i>0.05){
    fdr[!fdr$qvalues %in% i]
  }
}

发现有一个 q-value > 0.05 就去掉了。但是,如下所示,长度fdr$qvalues保持不变,这意味着我没有删除 q-value > 0.05 的元素。

length(fdr$qvalues)
[1] 1422
4

1 回答 1

0
library(tidyverse)

# slightly modified p values to see the result
data <- structure(list(CHR = c(6L, 6L, 6L, 6L, 6L), SNP = c(
  "rs9257319",
  "rs2269553", "rs2894066", "rs3763338", "rs1233508"
), BP = c(
  28959616L,
  28984488L, 29001906L, 29002290L, 29005612L
), A1 = c(
  2L, 1L, 1L,
  1L, 2L
), A2 = c(1L, 2L, 2L, 2L, 1L), T = c(6L, 9L, 13L, 4L, 8L), U = c(7L, 9L, 9L, 3L, 13L), OR = c(
  0.8571, 1, 1.444, 1.333,
  0.6154
), L95 = c(0.2881, 0.397, 0.6174, 0.2984, 0.2551), U95 = c(
  2.55,
  2.519, 3.379, 5.957, 1.485
), CHISQ = c(
  0.07692, 0, 0.7273, 0.1429,
  1.19
), P = c(0.001, 1, 0.3, 0.01, 0.5)), row.names = c(
  NA,
  5L
), class = "data.frame")

data
#>   CHR       SNP       BP A1 A2  T  U     OR    L95   U95   CHISQ     P
#> 1   6 rs9257319 28959616  2  1  6  7 0.8571 0.2881 2.550 0.07692 0.001
#> 2   6 rs2269553 28984488  1  2  9  9 1.0000 0.3970 2.519 0.00000 1.000
#> 3   6 rs2894066 29001906  1  2 13  9 1.4440 0.6174 3.379 0.72730 0.300
#> 4   6 rs3763338 29002290  1  2  4  3 1.3330 0.2984 5.957 0.14290 0.010
#> 5   6 rs1233508 29005612  2  1  8 13 0.6154 0.2551 1.485 1.19000 0.500

data %>%
  mutate(q = p.adjust(P, method = "fdr")) %>%
  filter(q < 0.05)
#>   CHR       SNP       BP A1 A2 T U     OR    L95   U95   CHISQ     P     q
#> 1   6 rs9257319 28959616  2  1 6 7 0.8571 0.2881 2.550 0.07692 0.001 0.005
#> 2   6 rs3763338 29002290  1  2 4 3 1.3330 0.2984 5.957 0.14290 0.010 0.025


data %>%
  mutate(q = p.adjust(P, method = "fdr")) %>%
  filter(q < 0.05) %>%
  count()
#>   n
#> 1 2

reprex 包于 2022-02-10 创建 (v2.0.0 )

于 2022-02-10T09:34:13.560 回答