How to create and label dendrograms in R

Today, we will see how to create and label dendrograms in R with base R functions.

How to make dendrograms

First, we will make a dendrogram on the same data as this question:

data(mtcars)
hc = hclust(dist(mtcars[1:10,]))
plot(as.dendrogram(hc), ylab = "Height")
Dendrogram produced in R

How to annotate dendrograms

Then, we will reimplement the use of the pvclust library. We will start by looking in which group the labels are found in each level of the dendrogram:

nodes = data.frame(label = hc$labels)

for (level in length(hc$labels):1){
  node = data.frame(cutree(hc, level))
  names(node) = as.character(level)
  node[, "label"] = row.names(node)
  nodes = merge(nodes, node, by = "label")
}

View(nodes)

Here is the content of the nodes data.frame:

Screenshot of the content of the nodes data.frame

After that, we will compute the x and y values of each node of the dendrogram:

already_seen = data.frame(x = numeric(0), seen = character(0))
results = NULL

for (level in length(hc$labels):1){
  
  if (level == length(hc$labels)){
    
    for (label in hc$labels){
      x = which(hc$labels[hc$order] == label)
      y = 0
      ancestor = nodes[nodes[, "label"] == label, as.character(level)]
      results = rbind(results, data.frame(level, x, y, label))
    }
    
  } else {
    ancestors = unique(nodes[duplicated(nodes[, as.character(level)]), as.character(level)])
    
    for (ancestor in ancestors){
      seen = nodes[nodes[, as.character(level)] == ancestor, "label"]
      
      if (! paste(seen, collapse = ":") %in% already_seen[, "seen"]){
        items = NULL
        
        for (item in already_seen[, "seen"]){
          
          if (any(seen %in% strsplit(item, ":")[[1]])){
            items = c(items, item)
          }
        }
        
        if (length(items) == 0){
          x = mean(which(hc$labels[hc$order] %in% seen))
          
        } else if (length(items) == 1){
          x = mean(c(which(hc$labels[hc$order] %in% seen[! seen %in% strsplit(items, ":")[[1]]]),
                     already_seen[already_seen[, "seen"] %in% items, "x"]))
          
        } else if (length(items) == 2){
          x = mean(already_seen[already_seen[, "seen"] %in% items, "x"])
        }
        
        y = rev(hc$height)[level]
        already_seen = already_seen[! already_seen[, "seen"] %in% items,]
        already_seen = rbind(already_seen, data.frame(x, seen = paste(seen, collapse = ":")))
        results = rbind(results, data.frame(level, x, y, label = paste(seen, collapse = ":")))
      }
    }
  }
}

View(results)

Here is the content of the results data.frame:

Screenshot of the content of the results data.frame

We also see which labels belong to which node.

1) Annotate the dendrogram with the x values

plot(as.dendrogram(hc), ylab = "Height")

for (level in 1:length(hc$labels)){
  labels = results[results[, "level"] == level, "label"]
  
  for (label in labels){
    x = results[results[, "level"] == level & results[, "label"] == label, "x"]
    y = results[results[, "level"] == level & results[, "label"] == label, "y"]
    text(x, y, round(x, 2), col = "blue", adj = c(0.5, 1), xpd = NA)
  }
}
Dendrogram produced in R with the x value written on each node

2) Annotate the dendrogram with the y values

plot(as.dendrogram(hc), ylab = "Height")

for (level in 1:length(hc$labels)){
  labels = results[results[, "level"] == level, "label"]
  
  for (label in labels){
    x = results[results[, "level"] == level & results[, "label"] == label, "x"]
    y = results[results[, "level"] == level & results[, "label"] == label, "y"]
    text(x, y, round(x, 2), col = "blue", adj = c(0.5, 1), xpd = NA)
  }
}
Dendrogram produced in R with the y value written on each node

Conclusion

In conclusion, we have seen how to create and label dendrograms in R. You can add a column to the results data.frame if you want to plot something more interesting than the x or y values of each node in the dendrogram. Did this blog post help you?

Related posts

Comments

No comments yet. Why don’t you start the discussion?

Leave a Reply