我的 OTU 表和 TAX 表有一个 Phyloseq 对象。我想创建一个条形图,例如在家庭级别,但属于同一门的家庭将以相同的颜色显示,并通过这种颜色的渐变来区分。
最终结果应该与此类似:
我使用将我的 phyloseq 对象转换为数据框psmelt()
,并尝试调整这篇文章中的代码:Stacked barplot with color gradients for each bar
但我目前无法创建正确的图表。
library(phyloseq)
library(ggplot2)
df <- psmelt(GlobalPatterns)
df$group <- paste0(df$Phylum, "-", df$Family, sep = "")
colours <-ColourPalleteMulti(df, "Phylum", "Family")
ggplot(df, aes(Sample)) +
geom_bar(aes(fill = group), colour = "grey") +
scale_fill_manual("Subject", values=colours, guide = "none")
Erreur : 手动刻度中的值不足。需要 395 个,但只提供了 334 个。
预先感谢您的任何帮助 !
编辑:这里是数据的输入
dput(head(df, 10))
structure(list(OTU = c("549656", "279599", "549656", "549656",
"360229", "331820", "94166", "331820", "329744", "189047"), Sample = c("AQC4cm",
"LMEpi24M", "AQC7cm", "AQC1cm", "M31Tong", "M11Fcsw", "M31Tong",
"M31Fcsw", "SLEpi20M", "TS29"), Abundance = c(1177685, 914209,
711043, 554198, 540850, 452219, 396201, 354695, 323914, 251215
), X.SampleID = structure(c(2L, 10L, 3L, 1L, 16L, 11L, 16L, 14L,
20L, 26L), .Label = c("AQC1cm", "AQC4cm", "AQC7cm", "CC1", "CL3",
"Even1", "Even2", "Even3", "F21Plmr", "LMEpi24M", "M11Fcsw",
"M11Plmr", "M11Tong", "M31Fcsw", "M31Plmr", "M31Tong", "NP2",
"NP3", "NP5", "SLEpi20M", "SV1", "TRRsed1", "TRRsed2", "TRRsed3",
"TS28", "TS29"), class = "factor"), Primer = structure(c(14L,
11L, 15L, 13L, 9L, 5L, 9L, 4L, 12L, 23L), .Label = c("ILBC_01",
"ILBC_02", "ILBC_03", "ILBC_04", "ILBC_05", "ILBC_07", "ILBC_08",
"ILBC_09", "ILBC_10", "ILBC_11", "ILBC_13", "ILBC_15", "ILBC_16",
"ILBC_17", "ILBC_18", "ILBC_19", "ILBC_20", "ILBC_21", "ILBC_22",
"ILBC_23", "ILBC_24", "ILBC_25", "ILBC_26", "ILBC_27", "ILBC_28",
"ILBC_29"), class = "factor"), Final_Barcode = structure(c(14L,
11L, 15L, 13L, 9L, 5L, 9L, 4L, 12L, 23L), .Label = c("AACGCA",
"AACTCG", "AACTGT", "AAGAGA", "AAGCTG", "AATCGT", "ACACAC", "ACACAT",
"ACACGA", "ACACGG", "ACACTG", "ACAGAG", "ACAGCA", "ACAGCT", "ACAGTG",
"ACAGTT", "ACATCA", "ACATGA", "ACATGT", "ACATTC", "ACCACA", "ACCAGA",
"ACCAGC", "ACCGCA", "ACCTCG", "ACCTGT"), class = "factor"), Barcode_truncated_plus_T = structure(c(6L,
10L, 8L, 25L, 19L, 9L, 19L, 20L, 14L, 16L), .Label = c("AACTGT",
"ACAGGT", "ACAGTT", "ACATGT", "ACGATT", "AGCTGT", "ATGTGT", "CACTGT",
"CAGCTT", "CAGTGT", "CCGTGT", "CGAGGT", "CGAGTT", "CTCTGT", "GAATGT",
"GCTGGT", "GTGTGT", "TCATGT", "TCGTGT", "TCTCTT", "TCTGGT", "TGATGT",
"TGCGGT", "TGCGTT", "TGCTGT", "TGTGGT"), class = "factor"), Barcode_full_length = structure(c(4L,
7L, 3L, 13L, 26L, 8L, 26L, 21L, 2L, 11L), .Label = c("AGAGAGACAGG",
"AGCCGACTCTG", "ATGAAGCACTG", "CAAGCTAGCTG", "CACGTGACATG", "CATCGACGAGT",
"CATGAACAGTG", "CGACTGCAGCT", "CGAGTCACGAT", "CTAGCGTGCGT", "CTAGTCGCTGG",
"GAACGATCATG", "GACCACTGCTG", "GATGTATGTGG", "GCATCGTCTGG", "GCCATAGTGTG",
"GCTAAGTGATG", "GTACGCACAGT", "GTAGACATGTG", "TAGACACCGTG", "TCGACATCTCT",
"TCGCGCAACTG", "TCTGATCGAGG", "TGACTCTGCGG", "TGCGCTGAATG", "TGTGGCTCGTG"
), class = "factor"), SampleType = structure(c(3L, 2L, 3L, 3L,
9L, 1L, 9L, 1L, 2L, 1L), .Label = c("Feces", "Freshwater", "Freshwater (creek)",
"Mock", "Ocean", "Sediment (estuary)", "Skin", "Soil", "Tongue"
), class = "factor"), Description = structure(c(2L, 10L, 3L,
1L, 16L, 11L, 16L, 14L, 21L, 25L), .Label = c("Allequash Creek, 0-1cm depth",
"Allequash Creek, 3-4 cm depth", "Allequash Creek, 6-7 cm depth",
"Calhoun South Carolina Pine soil, pH 4.9", "Cedar Creek Minnesota, grassland, pH 6.1",
"Even1", "Even2", "Even3", "F1, Day 1, right palm, whole body study ",
"Lake Mendota Minnesota, 24 meter epilimnion ", "M1, Day 1, fecal swab, whole body study ",
"M1, Day 1, right palm, whole body study ", "M1, Day 1, tongue, whole body study ",
"M3, Day 1, fecal swab, whole body study", "M3, Day 1, right palm, whole body study",
"M3, Day 1, tongue, whole body study ", "Newport Pier, CA surface water, Time 1",
"Newport Pier, CA surface water, Time 2", "Newport Pier, CA surface water, Time 3",
"Sevilleta new Mexico, desert scrub, pH 8.3", "Sparkling Lake Wisconsin, 20 meter eplimnion",
"Tijuana River Reserve, depth 1", "Tijuana River Reserve, depth 2",
"Twin #1", "Twin #2"), class = "factor"), Kingdom = c("Bacteria",
"Bacteria", "Bacteria", "Bacteria", "Bacteria", "Bacteria", "Bacteria",
"Bacteria", "Bacteria", "Bacteria"), Phylum = c("Cyanobacteria",
"Cyanobacteria", "Cyanobacteria", "Cyanobacteria", "Proteobacteria",
"Bacteroidetes", "Proteobacteria", "Bacteroidetes", "Actinobacteria",
"Firmicutes"), Class = c("Chloroplast", "Nostocophycideae", "Chloroplast",
"Chloroplast", "Betaproteobacteria", "Bacteroidia", "Gammaproteobacteria",
"Bacteroidia", "Actinobacteria", "Clostridia"), Order = c("Stramenopiles",
"Nostocales", "Stramenopiles", "Stramenopiles", "Neisseriales",
"Bacteroidales", "Pasteurellales", "Bacteroidales", "Actinomycetales",
"Clostridiales"), Family = c(NA, "Nostocaceae", NA, NA, "Neisseriaceae",
"Bacteroidaceae", "Pasteurellaceae", "Bacteroidaceae", "ACK-M1",
"Ruminococcaceae"), Genus = c(NA, "Dolichospermum", NA, NA, "Neisseria",
"Bacteroides", "Haemophilus", "Bacteroides", NA, NA), Species = c(NA,
NA, NA, NA, NA, NA, "Haemophilusparainfluenzae", NA, NA, NA),
group = c("Cyanobacteria-NA", "Cyanobacteria-Nostocaceae",
"Cyanobacteria-NA", "Cyanobacteria-NA", "Proteobacteria-Neisseriaceae",
"Bacteroidetes-Bacteroidaceae", "Proteobacteria-Pasteurellaceae",
"Bacteroidetes-Bacteroidaceae", "Actinobacteria-ACK-M1",
"Firmicutes-Ruminococcaceae"), group = c("Cyanobacteria-NA",
"Cyanobacteria-Nostocaceae", "Cyanobacteria-NA", "Cyanobacteria-NA",
"Proteobacteria-Neisseriaceae", "Bacteroidetes-Bacteroidaceae",
"Proteobacteria-Pasteurellaceae", "Bacteroidetes-Bacteroidaceae",
"Actinobacteria-ACK-M1", "Firmicutes-Ruminococcaceae")), row.names = c(406582L,
241435L, 406580L, 406574L, 329873L, 300794L, 494797L, 300772L,
298689L, 114279L), class = "data.frame")
编辑2:我们走在好的路上
因此,您的代码在颜色方面似乎工作得很好,但我对条形图的值(每个家庭的百分比)有一些疑问。
我使用以下代码绘制了数据的比例条形图:
GlobalPatterns_prop = transform_sample_counts(GlobalPatterns, function(x) 100 * x/sum(x))
plot_bar(GlobalPatterns_prop , fill = "Phylum")
并获得了这个:
如果我理解得很好,使用您的方法,大部分门和栏的高度应该是“其他”。我对我的数据做了同样的事情,我清楚地看到了 Phylum 比例丰度的差异。
我暂时不知道发生了什么……