8

我试图将层次聚类的结果绘制R为树状图,矩形标识聚类。

以下代码对垂直树状图进行了处理,但对于水平树状图 ( horiz=TRUE),不绘制矩​​形。有没有办法对水平树状图做同样的事情。

library("cluster")
dst <- daisy(iris, metric = c("gower"), stand = FALSE)
hca <- hclust(dst, method = "average")
plot(as.dendrogram(hca), horiz = FALSE)
rect.hclust(hca, k = 3, border = "red")

此外,我想绘制一条线以在所需的距离值处切割树。如何在 R 中绘制它。该cutree函数返回集群,但也可以绘制它。

cutree(hca, k = 3)

我正在寻找的所需输出是这样的。

树状图

如何在 R 中完成这项工作?

4

3 回答 3

6

jlhoward 和 Backlin 的答案都很好。

您还可以尝试使用dendextend专为此类事情设计的包。它有一个rect.dendrogram类似 的函数rect.hclust,但带有一个 horiz 参数(加上对矩形边缘位置的更多控制)。为了找到相关的高度,您可以使用该功能(同时使用包heights_per_k.dendrogram时要快得多)dendextendRcpp

这是一个简单的示例,说明如何获得与上述示例相同的结果(增加了彩色分支,只是为了好玩):

install.packages("dendextend")
install.packages("dendextendRcpp")

library("dendextend")
library("dendextendRcpp")

# using piping to get the dend
dend <- iris[,-5] %>% dist %>% hclust %>% as.dendrogram

# plot + color the dend's branches before, based on 3 clusters:
dend %>% color_branches(k=3) %>% plot(horiz=TRUE, main = "The dendextend package \n Gives extended functionality to R's dendrogram object")

# add horiz rect
dend %>% rect.dendrogram(k=3,horiz=TRUE)

# add horiz (well, vertical) line:
abline(v = heights_per_k.dendrogram(dend)["3"] + .6, lwd = 2, lty = 2, col = "blue")

在此处输入图像描述

于 2014-08-11T08:38:27.340 回答
5

这是使用ggplotggdendro包的解决方案。作为额外的奖励,我们可以按集群为标签着色......

library(cluster)
dst   <- daisy(iris, metric = c("gower"), stand = FALSE)
hca   <- hclust(dst, method = "average")
k     <- 3
clust <- cutree(hca,k=k)  # k clusters

library(ggplot2)
library(ggdendro)     # for dendro_data(...)
dendr    <- dendro_data(hca, type="rectangle") # convert for ggplot
clust.df <- data.frame(label=rownames(iris), cluster=factor(clust))
dendr[["labels"]]   <- merge(dendr[["labels"]],clust.df, by="label")
rect <- aggregate(x~cluster,label(dendr),range)
rect <- data.frame(rect$cluster,rect$x)
ymax <- mean(hca$height[length(hca$height)-((k-2):(k-1))])

ggplot() + 
  geom_segment(data=segment(dendr), aes(x=x, y=y, xend=xend, yend=yend)) + 
  geom_text(data=label(dendr), aes(x, y, label=label, hjust=0, color=cluster), 
            size=3) +
  geom_rect(data=rect, aes(xmin=X1-.3, xmax=X2+.3, ymin=0, ymax=ymax), 
            color="red", fill=NA)+
  geom_hline(yintercept=0.33, color="blue")+
  coord_flip() + scale_y_reverse(expand=c(0.2, 0)) + 
  theme_dendro()
于 2014-06-10T17:39:48.543 回答
4

为了完成工作(尽管以一种非常丑陋的方式),您可以手动将调用中的坐标交换到rectin rect.hclust

rhc <- function (tree, k = NULL, which = NULL, x = NULL, h = NULL, border = 2, 
    cluster = NULL) 
{
    if (length(h) > 1L | length(k) > 1L) 
        stop("'k' and 'h' must be a scalar")
    if (!is.null(h)) {
        if (!is.null(k)) 
            stop("specify exactly one of 'k' and 'h'")
        k <- min(which(rev(tree$height) < h))
        k <- max(k, 2)
    }
    else if (is.null(k)) 
        stop("specify exactly one of 'k' and 'h'")
    if (k < 2 | k > length(tree$height)) 
        stop(gettextf("k must be between 2 and %d", length(tree$height)), 
            domain = NA)
    if (is.null(cluster)) 
        cluster <- cutree(tree, k = k)
    clustab <- table(cluster)[unique(cluster[tree$order])]
    m <- c(0, cumsum(clustab))
    if (!is.null(x)) {
        if (!is.null(which)) 
            stop("specify exactly one of 'which' and 'x'")
        which <- x
        for (n in seq_along(x)) which[n] <- max(which(m < x[n]))
    }
    else if (is.null(which)) 
        which <- 1L:k
    if (any(which > k)) 
        stop(gettextf("all elements of 'which' must be between 1 and %d", 
            k), domain = NA)
    border <- rep_len(border, length(which))
    retval <- list()
    for (n in seq_along(which)) {
        rect(
             ybottom = m[which[n]] + 0.66,
             xright = par("usr")[3L],
             ytop = m[which[n] + 1] + 0.33,
             xleft = mean(rev(tree$height)[(k - 1):k]),
             border = border[n])
        retval[[n]] <- which(cluster == as.integer(names(clustab)[which[n]]))
    }
    invisible(retval)
}

rhc像你打电话一样打电话rect.hclust

rhc(hca, k = 3, border = "red")

在此处输入图像描述

于 2014-06-10T12:33:23.563 回答