3

我一直在对我的一些价值观进行聚类,然后对它们进行分组。ggplot2然后我使用并覆盖集群绘制一些密度图。示例图像如下: 在此处输入图像描述

对于集群中的每个组,我绘制一个密度图并覆盖它们。密度图中的颜色对应于聚类中的分组。

我的问题是,我已经根据分组手动拆分数据并将它们放在自己的单独文本表中(参见下面的代码)。这是非常低效的,并且对于大型数据集可能会非常乏味。如何动态绘制密度图ggplot2而不将集群分成各自的文本表?

原始输入表在拆分之前如下所示:

scores <- read.table(textConnection("
file        max        min        avg               lowest
132         5112.0     6520.0     5728.0            5699.0
133         4720.0     6064.0     5299.0            5277.0
5           4617.0     5936.0     5185.0            5165.0
1           4384.0     5613.0     4917.0            4895.0
1010        5008.0     6291.0     5591.0            5545.0
104         4329.0     5554.0     4858.0            4838.0
105         4636.0     5905.0     5193.0            5165.0
35          4304.0     5578.0     4842.0            4831.0
36          4360.0     5580.0     4891.0            4867.0
37          4444.0     5663.0     4979.0            4952.0
31          4328.0     5559.0     4858.0            4839.0
39          4486.0     5736.0     5031.0            5006.0
32          4334.0     5558.0     4864.0            4843.0
"), header=TRUE)

我用来生成绘图的代码:请注意将基本图形与网格组合仍然无法正常工作

library(ggplot2)
library(grid)

layout(matrix(c(1,2,3,1,4,5), 2, 3, byrow = TRUE))

# define function to create multi-plot setup (nrow, ncol)
vp.setup <- function(x,y){
grid.newpage()
pushViewport(viewport(layout = grid.layout(x,y)))
}

# define function to easily access layout (row, col)
vp.layout <- function(x,y){
viewport(layout.pos.row=x, layout.pos.col=y)
}

vp.setup(2,3)

file_vals <- read.table(textConnection("
file        avg_vals
133         1.5923
132         1.6351
1010        1.6532
104         1.6824
105         1.6087
39          1.8694
32          1.9934
31          1.9919
37          1.8638
36          1.9691
35          1.9802
1           1.7283
5           1.7637
"), header=TRUE)

red <- read.table(textConnection("
file        max        min        avg               lowest
31          4328.0     5559.0     4858.0            4839.0
32          4334.0     5558.0     4864.0            4843.0
36          4360.0     5580.0     4891.0            4867.0
35          4304.0     5578.0     4842.0            4831.0
"), header=TRUE)

blue <- read.table(textConnection("
file        max        min        avg               lowest
133         4720.0     6064.0     5299.0            5277.0
105         4636.0     5905.0     5193.0            5165.0
104         4329.0     5554.0     4858.0            4838.0
132         5112.0     6520.0     5728.0            5699.0
1010        5008.0     6291.0     5591.0            5545.0
"), header=TRUE)

green <- read.table(textConnection("
file        max        min        avg               lowest
39          4486.0     5736.0     5031.0            5006.0
37          4444.0     5663.0     4979.0            4952.0
5           4617.0     5936.0     5185.0            5165.0
1           4384.0     5613.0     4917.0            4895.0
"), header=TRUE)


# Perform Cluster
d <- dist(file_vals$avg_vals, method = "euclidean")
fit <- hclust(d, method="ward")
plot(fit, labels=file_vals$file)
groups <- cutree(fit, k=3)

cols = c('red', 'blue', 'green', 'purple', 'orange', 'magenta', 'brown', 'chartreuse4','darkgray','cyan1')
rect.hclust(fit, k=3, border=cols)


# Desnity plots
dat = rbind(data.frame(Cluster='Red', max_vals = red$max), data.frame(Cluster='Blue', max_vals = blue$max), data.frame(Cluster='Green', max_vals = green$max))
max = (ggplot(dat,aes(x=max_vals)))
max = max + geom_density(aes(fill=factor(Cluster)), alpha=.3) + xlim(c(3500, 5500)) + scale_fill_manual(values=c("red",'blue',"green"))
max = max + labs(fill = 'Clusters')
print(max, vp=vp.layout(1,2))

dat = rbind(data.frame(Cluster='Red', min_vals = red$min), data.frame(Cluster='Blue', min_vals = blue$min), data.frame(Cluster='Green', min_vals = green$min))
min = (ggplot(dat,aes(x=min_vals)))
min = min + geom_density(aes(fill=factor(Cluster)), alpha=.3) + xlim(c(5000, 7000)) + scale_fill_manual(values=c("red",'blue',"green"))
min = min + labs(fill = 'Clusters')
print(min, vp=vp.layout(1,3))

dat = rbind(data.frame(Cluster='Red', avg_vals = red$avg), data.frame(Cluster='Blue', avg_vals = blue$avg), data.frame(Cluster='Green', avg_vals = green$avg))
avg = (ggplot(dat,aes(x=avg_vals)))
avg = avg + geom_density(aes(fill=factor(Cluster)), alpha=.3) + xlim(c(4000, 6000)) + scale_fill_manual(values=c("red",'blue',"green"))
avg = avg + labs(fill = 'Clusters')
print(avg, vp=vp.layout(2,2))

dat = rbind(data.frame(Cluster='Red', lowest_vals = red$lowest), data.frame(Cluster='Blue', lowest_vals = blue$lowest), data.frame(Cluster='Green', lowest_vals = green$lowest))
lowest = (ggplot(dat,aes(x=lowest_vals)))
lowest = lowest + geom_density(aes(fill=factor(Cluster)), alpha=.3) + xlim(c(4000, 6000)) + scale_fill_manual(values=c("red",'blue',"green"))
lowest = lowest + labs(fill = 'Clusters')
print(lowest, vp=vp.layout(2,3))
4

1 回答 1

1

通过这种方式,您可以使用 4 个面板自动创建所需的绘图。

一、数据:

scores <- read.table(textConnection("
file        max        min        avg               lowest
132         5112.0     6520.0     5728.0            5699.0
133         4720.0     6064.0     5299.0            5277.0
5           4617.0     5936.0     5185.0            5165.0
1           4384.0     5613.0     4917.0            4895.0
1010        5008.0     6291.0     5591.0            5545.0
104         4329.0     5554.0     4858.0            4838.0
105         4636.0     5905.0     5193.0            5165.0
35          4304.0     5578.0     4842.0            4831.0
36          4360.0     5580.0     4891.0            4867.0
37          4444.0     5663.0     4979.0            4952.0
31          4328.0     5559.0     4858.0            4839.0
39          4486.0     5736.0     5031.0            5006.0
32          4334.0     5558.0     4864.0            4843.0
"), header=TRUE)

file_vals <- read.table(textConnection("
file        avg_vals
                                   133         1.5923
                                   132         1.6351
                                   1010        1.6532
                                   104         1.6824
                                   105         1.6087
                                   39          1.8694
                                   32          1.9934
                                   31          1.9919
                                   37          1.8638
                                   36          1.9691
                                   35          1.9802
                                   1           1.7283
                                   5           1.7637
                                   "), header=TRUE)

两个数据框可以合并为一个:

dat <- merge(scores, file_vals, by = "file")

合身:

d <- dist(dat$avg_vals, method = "euclidean")
fit <- hclust(d, method="ward")
groups <- cutree(fit, k=3)
cols <- c('red', 'blue', 'green', 'purple', 'orange', 'magenta', 'brown', 'chartreuse4','darkgray','cyan1')

添加带有颜色名称的列(基于拟合):

dat$group <- cols[groups]

将数据从宽格式重塑为长格式:

dat_re <- reshape(dat, varying = c("max", "min", "avg", "lowest"), direction = "long", drop = c("file", "avg_vals"), v.names = "value", idvar = "group", times = c("max", "min", "avg", "lowest"), new.row.names = seq(nrow(scores) * 4))

阴谋:

p <- (ggplot(dat_re ,aes(x = value))) +
geom_density(aes(fill = group), alpha=.3) +
scale_fill_manual(values=cols) +
labs(fill = 'Clusters') +
facet_wrap( ~ time)

print(p)

在此处输入图像描述

于 2012-09-10T19:53:57.797 回答