像这样的东西:
library(dplyr)
t_tests = df %>%
split(.$type) %>%
lapply(function(x){
t(x[3:5]) %>%
data.frame %>%
setNames(x$treatment) %>%
combn(2, simplify = FALSE) %>%
lapply(function(x){
data.frame(treatment = paste0(names(x), collapse = ", "),
p_value = t.test(x[,1], x[,2])$p.value)
}) %>%
do.call(rbind, .)
}) %>%
do.call(rbind, .) %>%
mutate(type = sub("[.].+", "", row.names(.)))
结果:
> head(t_tests, 10)
treatment p_value type
1 C, P 0.6112274 T1
2 C, N 0.6630060 T1
3 C, S 0.5945135 T1
4 P, N 0.9388568 T1
5 P, S 0.8349370 T1
6 N, S 0.9049995 T1
7 C, P 0.3274583 T2
8 C, N 0.9755364 T2
9 C, S 0.7391661 T2
10 P, N 0.3177871 T2
编辑(向数据集添加了额外级别的“文件”):
library(dplyr)
t_tests = df %>%
split(.$file) %>%
lapply(function(y){
split(y, y$type) %>%
lapply(function(x){
t(x[4:6]) %>%
data.frame %>%
setNames(x$treatment) %>%
combn(2, simplify = FALSE) %>%
lapply(function(x){
data.frame(treatment = paste0(names(x), collapse = ", "),
p_value = t.test(x[,1], x[,2])$p.value)
}) %>%
do.call(rbind, .)
}) %>%
do.call(rbind, .) %>%
mutate(type = sub("[.].+", "", row.names(.)))
}) %>%
do.call(rbind, .) %>%
mutate(file = sub("[.].+", "", row.names(.)))
结果:
treatment p_value type file
1 C, P 0.3903450 T1 file1
2 C, N 0.3288727 T1 file1
3 C, S 0.0638599 T1 file1
4 P, N 0.6927599 T1 file1
5 P, S 0.1159615 T1 file1
6 N, S 0.2184015 T1 file1
7 C, P 0.1147805 T2 file1
8 C, N 0.4961888 T2 file1
9 C, S 0.9048607 T2 file1
10 P, N 0.4203666 T2 file1
11 P, S 0.3425908 T2 file1
12 N, S 0.7262478 T2 file1
13 C, P 0.6300293 T3 file1
14 C, N 0.8255837 T3 file1
15 C, S 0.7140522 T3 file1
16 P, N 0.4768694 T3 file1
17 P, S 0.3992130 T3 file1
18 N, S 0.8740219 T3 file1
19 C, P 0.2434270 T4 file1
20 C, N 0.2713622 T4 file1
关于编辑的注意事项:
OP 希望在数据中添加一个额外的顶层file
,可以简单地在末尾添加另一个split
+lapply
和。do.call
新数据:
m <- matrix(rnorm(324, mean = 5000, sd = 1000), nrow = 108)
colnames(m) <- paste('V', 1:3, sep = '')
df <- data.frame(type = factor(rep(c('T1', 'T2', 'T3', 'T4', 'T5', 'T6', 'T7', 'T8', 'T9'), each = 4)),
treatment = factor(rep(rep(c('C','P', 'N', 'S'), each = 1), 9)),
file = factor(rep(c("file1", "file2", "file3"), each = 36)),
as.data.frame(m))