Tree cut and Rectangles around clusters for a horizontal dendrogram in R

后端 未结 3 974
梦毁少年i
梦毁少年i 2020-12-29 14:25

I am trying to plot the results of a hierarchical clustering in R as a dendrogram, with rectangles identifying clusters.

The following code does the tr

相关标签:
3条回答
  • 2020-12-29 14:39

    To just get the job done (although in a quite ugly way) you could just manually swap the coordinates in the call to rect in rect.hclust:

    rhc <- function (tree, k = NULL, which = NULL, x = NULL, h = NULL, border = 2, 
        cluster = NULL) 
    {
        if (length(h) > 1L | length(k) > 1L) 
            stop("'k' and 'h' must be a scalar")
        if (!is.null(h)) {
            if (!is.null(k)) 
                stop("specify exactly one of 'k' and 'h'")
            k <- min(which(rev(tree$height) < h))
            k <- max(k, 2)
        }
        else if (is.null(k)) 
            stop("specify exactly one of 'k' and 'h'")
        if (k < 2 | k > length(tree$height)) 
            stop(gettextf("k must be between 2 and %d", length(tree$height)), 
                domain = NA)
        if (is.null(cluster)) 
            cluster <- cutree(tree, k = k)
        clustab <- table(cluster)[unique(cluster[tree$order])]
        m <- c(0, cumsum(clustab))
        if (!is.null(x)) {
            if (!is.null(which)) 
                stop("specify exactly one of 'which' and 'x'")
            which <- x
            for (n in seq_along(x)) which[n] <- max(which(m < x[n]))
        }
        else if (is.null(which)) 
            which <- 1L:k
        if (any(which > k)) 
            stop(gettextf("all elements of 'which' must be between 1 and %d", 
                k), domain = NA)
        border <- rep_len(border, length(which))
        retval <- list()
        for (n in seq_along(which)) {
            rect(
                 ybottom = m[which[n]] + 0.66,
                 xright = par("usr")[3L],
                 ytop = m[which[n] + 1] + 0.33,
                 xleft = mean(rev(tree$height)[(k - 1):k]),
                 border = border[n])
            retval[[n]] <- which(cluster == as.integer(names(clustab)[which[n]]))
        }
        invisible(retval)
    }
    

    and call rhc like you called rect.hclust:

    rhc(hca, k = 3, border = "red")
    

    enter image description here

    0 讨论(0)
  • 2020-12-29 14:49

    Both jlhoward and Backlin answers are good.

    What you could also try is using the dendextend package, designed exactly for this sort of thing. It has a rect.dendrogram function which works like rect.hclust, but with a horiz parameter (plus some more control over the location of the edge of the rect). For finding the relevant height you can use the heights_per_k.dendrogram function (which is much faster when also using the dendextendRcpp package)

    Here is a simple example for how you would get the same result as in the above examples (with an added bonus of colored branches, just for fun):

    install.packages("dendextend")
    install.packages("dendextendRcpp")
    
    library("dendextend")
    library("dendextendRcpp")
    
    # using piping to get the dend
    dend <- iris[,-5] %>% dist %>% hclust %>% as.dendrogram
    
    # plot + color the dend's branches before, based on 3 clusters:
    dend %>% color_branches(k=3) %>% plot(horiz=TRUE, main = "The dendextend package \n Gives extended functionality to R's dendrogram object")
    
    # add horiz rect
    dend %>% rect.dendrogram(k=3,horiz=TRUE)
    
    # add horiz (well, vertical) line:
    abline(v = heights_per_k.dendrogram(dend)["3"] + .6, lwd = 2, lty = 2, col = "blue")
    

    enter image description here

    0 讨论(0)
  • 2020-12-29 14:59

    Here's a solution using ggplot and the ggdendro package. As an added bonus, we can color the labels by cluster...

    library(cluster)
    dst   <- daisy(iris, metric = c("gower"), stand = FALSE)
    hca   <- hclust(dst, method = "average")
    k     <- 3
    clust <- cutree(hca,k=k)  # k clusters
    
    library(ggplot2)
    library(ggdendro)     # for dendro_data(...)
    dendr    <- dendro_data(hca, type="rectangle") # convert for ggplot
    clust.df <- data.frame(label=rownames(iris), cluster=factor(clust))
    dendr[["labels"]]   <- merge(dendr[["labels"]],clust.df, by="label")
    rect <- aggregate(x~cluster,label(dendr),range)
    rect <- data.frame(rect$cluster,rect$x)
    ymax <- mean(hca$height[length(hca$height)-((k-2):(k-1))])
    
    ggplot() + 
      geom_segment(data=segment(dendr), aes(x=x, y=y, xend=xend, yend=yend)) + 
      geom_text(data=label(dendr), aes(x, y, label=label, hjust=0, color=cluster), 
                size=3) +
      geom_rect(data=rect, aes(xmin=X1-.3, xmax=X2+.3, ymin=0, ymax=ymax), 
                color="red", fill=NA)+
      geom_hline(yintercept=0.33, color="blue")+
      coord_flip() + scale_y_reverse(expand=c(0.2, 0)) + 
      theme_dendro()
    
    0 讨论(0)
提交回复
热议问题