我有一张大桌子,可以在这里作为Renvironment使用,它看起来像这样:
基因 | s1 | s2 | s3 | s4 | s5 | s6 | s7 | s8 | s9 | s10 | s11 | s12 | 类型 |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
电车2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 蛋白质编码 |
CLIC5 | 0 | 0 | 1 | 0 | 1 | 0.2 | 0 | 0 | 1.3 | 1 | 0 | 0.7 | 蛋白质编码 |
GAL3ST2 | 0 | 0.5 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 特纳 |
超高频1BP | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 特纳 |
OSTM1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 特纳 |
IMPG2 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 小RNA |
OXCT1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.3 | 小RNA |
CPNE3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 小RNA |
PPP1R15 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 小RNA |
亚当11 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | snoRNA |
PTHLH | 0 | 0 | 0 | 0 | 0.1 | 0.5 | 0 | 0 | 0.1 | 0.2 | 0 | 0.5 | snoRNA |
通过使用以下代码,我可以获得所有单元格的唯一元素 OF TYPE(最后一列)“
table <-read.delim("smalRNAseq/counts_molc_NoLengthCut_mirnaCollapsed2.txt", header = T, sep = "\t")
genecodev22 <-read.table("genecodev22.csv")
#assign new names to the coluns so I can merge them
colnames(genecodev22)[colnames(genecodev22) %in% c("V1", "V2")] <- c("ENSEMBLE", "TYPE")
colnames(table)[colnames(table) %in% c("X", "X.1")] <- c("ENSEMBLE1", "ENSEMBLE")
#mergethem by ENSEMBLE so that the type will be a new entry at the back
mergetable <- merge(table,genecodev22, match = "first", by="ENSEMBLE")
#assign the first column as column names, the one with the true ensemble names
row.names(mergetable) <- mergetable[[1]]
#and remove it
mergetable2 <- mergetable[,-2:-1]
#get all entries with no 0 value
mergetable3 <- mergetable2[rowSums(mergetable2[1:nrow(mergetable2),1:95])>0,]
# Total number of unique element and occurancy p
list_of_elements <- aggregate(data.frame(count = mergetable3$TYPE),
list(value = mergetable3$TYPE),
length)
#plot it
row.names(list_of_elements) <- list_of_elements[[1]]
list_of_elements <- list_of_elements[-1]
list_of_elementsOrd<- list_of_elements[order(list_of_elements$count, decreasing = T),]
library(ggplot2)
ggplot(as.data.frame(list_of_elementsOrd),
aes(x=reorder(value, -count), y=count, fill=value)) +
geom_bar(stat = "identity") +
coord_flip() +
geom_text(aes(label=count), vjust=-1, color="black", size=3.5)+
theme(axis.text.x = element_text(angle = 90), legend.position = "none")
我想要什么我 希望每个 s# 绘制一个堆叠箱形图,其具有唯一的类型(不应该计算 0)。
非常感谢
编辑:我设法使用以下循环创建所有“聚合”元素的列表:
i=1
list_of_elementsOrd <- c()
mergedElements <- list_of_elements[1:2]
for (i in 1:length(mergetable2[-1])) {
function(row) all(row !=0 )), ]
mergetable2[mergetable2 == 0] <- NA
list_of_elements <- aggregate(data.frame(count = mergetable2$TYPE),
list(value = mergetable2$TYPE),
length)
list_of_elementsOrd[[i]]<- list_of_elements[order(list_of_elements$count, decreasing = T),]
}
但我当然无法完成剧情。当我将其转换为数据框时,我得到 column_names 为:
value.70 count.70 value.71 count.71