df2$clust_mean = sapply(df2$genes,
function(x) mean(df1[match(strsplit(as.character(x), split="\\s")[[1]], df1$gene),
"value"], na.rm=T))
df2
输出是:
cluster genes clust_mean
1 cluster 1 gene1 gene2 gene3 2.566667
2 cluster 2 gene1 2.400000
3 cluster 3 gene1 gene2 gene3 gene4 1.975000
#sample data - I have slightly modified the values in your sample data to make the illustration simple
> dput(df1)
structure(list(gene = structure(1:4, .Label = c("gene1", "gene2",
"gene3", "gene4"), class = "factor"), value = c(2.4, 5.2, 0.1,
0.2)), .Names = c("gene", "value"), class = "data.frame", row.names = c(NA,
-4L))
> dput(df2)
structure(list(cluster = structure(1:3, .Label = c("cluster 1",
"cluster 2", "cluster 3"), class = "factor"), genes = structure(c(2L,
1L, 3L), .Label = c("gene1", "gene1 gene2 gene3", "gene1 gene2 gene3 gene4"
), class = "factor")), .Names = c("cluster", "genes"), class = "data.frame", row.names = c(NA,
-3L))