1

我想创建一个堆叠的条形图,就像一个结构图(使用程序 distruct)。如何按一个公因子对 X 标签进行分组,并且只显示该因子一次?例如,下面有来自 2 个群体的 6 个个体,我希望只有 2 个以群体为中心的标签。另外,有没有办法在每个组周围放置一个盒子?

这是我所拥有的:

df <- data.frame(A1=c(0.000, 0.000, 0.020, 0.000, 0.000, 0.000),
       A2=c(0.000, 0.000, 0.235, 0.195, 0.166, 0.205),
       A3=c(0.065, 0.027, 0.000, 0.027, 0.000, 0.036),
       A4=c(0.000, 0.000, 0.007, 0.011, 0.000, 0.000),
       A5=c(0.000, 0.000, 0.000, 0.002, 0.028, 0.000),
       A6=c(0.000, 0.041, 0.021, 0.068, 0.106, 0.105),
       A7=c(0.093, 0.085, 0.001, 0.056, 0.110, 0.000),
       A8=c(0.000, 0.000, 0.000, 0.000, 0.000, 0.029),
       A9=c(0.000, 0.000, 0.058, 0.027, 0.096, 0.156),
       A10=c(0.000, 0.023, 0.129, 0.012, 0.074, 0.117),
       A11=c(0.000, 0.041, 0.000, 0.000, 0.000, 0.000),
       A12=c(0.024, 0.000, 0.000, 0.000, 0.000, 0.000),
       A13=c(0.817, 0.783, 0.527, 0.446, 0.258, 0.321),
       A14=c(0.000, 0.000, 0.000, 0.006, 0.000, 0.000),
       A15=c(0.000, 0.000, 0.000, 0.054, 0.143, 0.027),
       A16=c(0.000, 0.000, 0.000, 0.000, 0.000, 0.003),
       A17=c(0.000, 0.000, 0.000, 0.097, 0.019, 0.000))

barplot(t(df), col=rainbow(17), border=NA,space=0.05,
        names.arg=c("1","1","2","2","2","2"),
        xlab="Population", ylab="Ancestry")

前

这大致是我想要的:

后

如果可能的话,最好设置space=参数并且仍然独立地为组之间的黑色边框选择宽度。

这是我引用的程序:http: //pritchardlab.stanford.edu/structure.html

也许 ggplot 更适合这个?对不起,如果答案很明显,但我无法弄清楚。

注意我意识到您可以手动填写""特定names.arg值,但这对于庞大的数据集来说很费力,不能很好地居中标签,也不能解决边界问题。

4

1 回答 1

3

这是使用 ggplot2 的解决方案。我已经为主题和人口 ID 添加了列,并且我已经融化了数据。我使用分面来对人群进行分组。不幸的是,ggplot2 不能在图的底部放置分面标签。为此,我只能推荐使用 Inkscape 或 Adob​​e Illustrator 编辑 pdf 文件。

在此处输入图像描述

library(ggplot2)
library(reshape2)

# Add id and population label columns. Needed for melting and plotting.
df$population = c("p1", "p1", "p2", "p2", "p2", "p2")
df$subject_id = paste("id", 1:6, sep="")

# Melt (reshape data from wide format to long format).
mdat = melt(df, id.vars=c("subject_id", "population"), 
            variable.name="Ancestry", value.name="Fraction")

# Simple stacked bar plot:
p = ggplot(mdat, aes(x=subject_id, y=Fraction, fill=Ancestry)) +
    geom_bar(stat="identity", position="stack") +
    facet_grid(. ~ population, drop=TRUE, space="free", scales="free")

# Customized stacked bar plot:

# Sort ancestry order by overall 'abundance' of each ancestry.
mdat$Ancestry = factor(mdat$Ancestry, 
                       levels=names(sort(colSums(df[, 1:17]), decreasing=TRUE)))

# Colors taken from:
# https://github.com/mbostock/d3/wiki/Ordinal-Scales#category20
col17 = c("#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd",
          "#8c564b", "#e377c2", "#7f7f7f", "#bcbd22", "#17becf", 
          "#aec7e8", "#ffbb78", "#98df8a", "#ff9896", "#c5b0d5", 
          "#c49c94", "#c7c7c7")
names(col17) = levels(mdat$Ancestry)

p2 = ggplot(mdat, aes(x=subject_id, y=Fraction, fill=Ancestry, order=Ancestry)) +
     geom_bar(stat="identity", position="fill", width=1, colour="grey25") +
     facet_grid(. ~ population, drop=TRUE, space="free", scales="free") +
     theme(panel.grid=element_blank()) +
     theme(panel.background=element_rect(fill=NA, colour="grey25")) +
     theme(panel.margin.x=grid:::unit(0.5, "lines")) +
     theme(axis.title.x=element_blank()) +
     theme(axis.text.x=element_blank()) +
     theme(axis.ticks.x=element_blank()) +
     theme(strip.background=element_blank()) +
     theme(strip.text=element_text(size=12)) +
     theme(legend.key=element_rect(colour="grey25")) +
     scale_x_discrete(expand=c(0, 0)) +
     scale_y_continuous(expand=c(0, 0)) +
     scale_fill_manual(values=col17) +
     guides(fill=guide_legend(override.aes=list(colour=NULL)))


library(gridExtra)
png("bar_plots.png", width=10, height=5, units="in", res=100)
grid.arrange(p, p2, nrow=1)
dev.off()
于 2015-07-23T00:04:08.863 回答