使用combinat
包的解决方案是:
library(combinat)
#random data
DF <- data.frame(Disease = LETTERS[1:10], Gene = sample(letters[1:4], 10, T))
#> DF
# Disease Gene
#1 A a
#2 B a
#3 C c
#4 D b
#5 E d
#6 F b
#7 G c
#8 H d
#9 I b
#10 J d
#all possible combinations of diseases
dis_combns <- combn(DF$Disease, 2) #see `?combn`
#find common genes between each pair of diseases
commons <- apply(dis_combns, 2,
function(x) union(DF$Gene[DF$Disease == x[1]], DF$Gene[DF$Disease == x[2]]))
#format the list of common genes for easier manipulation later
commons <- unlist(lapply(commons, paste, collapse = " and "))
#result
resultDF <- data.frame(Disease1 = dis_combns[1,],
Common_genes = commons, Disease2 = dis_combns[2,])
#> resultDF
# Disease1 Common_genes Disease2
#1 A a B
#2 A a and c C
#3 A a and b D
#4 A a and d E
#5 A a and b F
#6 A a and c G
#7 A a and d H
#8 A a and b I
#9 A a and d J
#10 B a and c C
#11 B a and b D
#12 B a and d E
#13 B a and b F
#14 B a and c G
#....