我有一个这样的数据集(但有数百个样本):
data <- structure(list(sample = c("C001", "C001", "C001", "C001", "C001",
"C001", "C001", "C001", "C001", "C001", "C001", "C001", "C001",
"C002", "C002", "C002", "C002", "C002", "C002", "C002", "C002",
"C002", "C002", "C002", "C002", "C002", "C003", "C003", "C003",
"C003", "C003", "C003", "C003", "C003", "C003", "C003", "C003",
"C003", "C003", "C004", "C004", "C004", "C004", "C004", "C004",
"C004", "C004", "C004", "C004", "C004", "C004", "C004", "C007",
"C007", "C007", "C007", "C007", "C007", "C007", "C007", "C007",
"C007", "C007", "C007", "C007", "C009", "C009", "C009", "C009",
"C009", "C009", "C009", "C009", "C009", "C009", "C009", "C009",
"C009", "C011", "C011", "C011", "C011", "C011", "C011", "C011",
"C011", "C011", "C011", "C011", "C011", "C011", "C012", "C012",
"C012", "C012", "C012", "C012", "C012", "C012", "C012", "C012",
"C012", "C012", "C012", "C014", "C014", "C014", "C014", "C014",
"C014", "C014", "C014", "C014", "C014", "C014", "C014", "C014",
"C015", "C015", "C015", "C015", "C015", "C015", "C015", "C015",
"C015", "C015", "C015", "C015", "C015", "C016", "C016", "C016",
"C016", "C016", "C016", "C016", "C016", "C016", "C016", "C016",
"C016", "C016", "C018", "C018", "C018", "C018", "C018", "C018",
"C018", "C018", "C018", "C018", "C018", "C018", "C018"), count = c(0L,
130L, 0L, 10L, 0L, 20L, 568L, 23L, 6L, 77L, 616L, 230734L, 177L,
10L, 6396L, 0L, 5747L, 0L, 208L, 115189L, 13130L, 1L, 38L, 200L,
2604L, 3104L, 0L, 95476L, 0L, 3591L, 0L, 7L, 26359L, 83L, 5L,
1L, 1521L, 36004L, 9779L, 12L, 852L, 0L, 13L, 5L, 329L, 152053L,
288L, 2L, 0L, 0L, 530L, 1023L, 57L, 84L, 98060L, 122L, 0L, 8552L,
668L, 209L, 7L, 0L, 155L, 10159L, 4934L, 15L, 47L, 83L, 1L, 0L,
54L, 462L, 89L, 43L, 0L, 127476L, 2614L, 3659L, 12L, 1L, 1L,
1061L, 0L, 84199L, 845L, 898L, 0L, 29L, 10L, 63L, 1834L, 87L,
36L, 7L, 407L, 20167L, 39969L, 1429L, 51072L, 0L, 0L, 27L, 9560L,
3643L, 2899L, 10L, 0L, 380L, 0L, 82L, 1543L, 55L, 765L, 25172L,
29791L, 39805L, 922L, 6L, 843L, 5L, 110L, 0L, 174L, 134582L,
575L, 15L, 65L, 37L, 19240L, 830L, 1L, 1L, 0L, 0L, 0L, 63L, 156446L,
22L, 1L, 15L, 76L, 9710L, 793L, 128L, 4L, 1L, 2L, 0L, 1904L,
199L, 98779L, 0L, 0L, 11436L, 91L, 1813L), class = structure(c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 11L, 12L, 13L, 14L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 11L, 12L, 13L, 14L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 11L, 12L, 13L, 14L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 11L, 12L, 13L, 14L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
11L, 12L, 13L, 14L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 11L,
12L, 13L, 14L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 11L, 12L,
13L, 14L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 11L, 12L, 13L,
14L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 11L, 12L, 13L, 14L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 11L, 12L, 13L, 14L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 11L, 12L, 13L, 14L, 1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 11L, 12L, 13L, 14L), .Label = c("a", "b",
"c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n"), class = "factor")), .Names = c("sample",
"count", "class"), row.names = c(NA, -156L), class = c("tbl_df",
"tbl", "data.frame"))
我想绘制这个数据的直方图:
library(tidyverse)
ggplot(data = data, aes(x = sample)) +
geom_bar(aes(y = count, fill = class), color = "black",
position = "fill", stat = "identity")
但正如您所看到的,条形图不是按顺序排列的,因此比较不同的样本并不容易。
所以我在手边重新组织它,让它更“漂亮”(在某些方面)
data$sample <- factor(data$sample, levels = c("C001", "C014", "C009", "C018",
"C012", "C004", "C016", "C002", "C015", "C011", "C003", "C007"))
ggplot(data = data, aes(x = sample)) +
geom_bar(aes(y = count, fill = class), color = "black",
position = "fill", stat = "identity")
这可能不是最好的顺序,但比较相似样本之间的比例更容易。
最后,我想制作这样的情节(使用facet_grid
),但让我们从头开始。