Today, we will see how to create and label dendrograms in R with base R functions.
How to make dendrograms
First, we will make a dendrogram on the same data as this question:
data(mtcars)
hc = hclust(dist(mtcars[1:10,]))
plot(as.dendrogram(hc), ylab = "Height")
How to annotate dendrograms
Then, we will reimplement the use of the pvclust library. We will start by looking in which group the labels are found in each level of the dendrogram:
nodes = data.frame(label = hc$labels)
for (level in length(hc$labels):1){
node = data.frame(cutree(hc, level))
names(node) = as.character(level)
node[, "label"] = row.names(node)
nodes = merge(nodes, node, by = "label")
}
View(nodes)
Here is the content of the nodes data.frame:
After that, we will compute the x and y values of each node of the dendrogram:
already_seen = data.frame(x = numeric(0), seen = character(0))
results = NULL
for (level in length(hc$labels):1){
if (level == length(hc$labels)){
for (label in hc$labels){
x = which(hc$labels[hc$order] == label)
y = 0
ancestor = nodes[nodes[, "label"] == label, as.character(level)]
results = rbind(results, data.frame(level, x, y, label))
}
} else {
ancestors = unique(nodes[duplicated(nodes[, as.character(level)]), as.character(level)])
for (ancestor in ancestors){
seen = nodes[nodes[, as.character(level)] == ancestor, "label"]
if (! paste(seen, collapse = ":") %in% already_seen[, "seen"]){
items = NULL
for (item in already_seen[, "seen"]){
if (any(seen %in% strsplit(item, ":")[[1]])){
items = c(items, item)
}
}
if (length(items) == 0){
x = mean(which(hc$labels[hc$order] %in% seen))
} else if (length(items) == 1){
x = mean(c(which(hc$labels[hc$order] %in% seen[! seen %in% strsplit(items, ":")[[1]]]),
already_seen[already_seen[, "seen"] %in% items, "x"]))
} else if (length(items) == 2){
x = mean(already_seen[already_seen[, "seen"] %in% items, "x"])
}
y = rev(hc$height)[level]
already_seen = already_seen[! already_seen[, "seen"] %in% items,]
already_seen = rbind(already_seen, data.frame(x, seen = paste(seen, collapse = ":")))
results = rbind(results, data.frame(level, x, y, label = paste(seen, collapse = ":")))
}
}
}
}
View(results)
Here is the content of the results data.frame:
We also see which labels belong to which node.
1) Annotate the dendrogram with the x values
plot(as.dendrogram(hc), ylab = "Height")
for (level in 1:length(hc$labels)){
labels = results[results[, "level"] == level, "label"]
for (label in labels){
x = results[results[, "level"] == level & results[, "label"] == label, "x"]
y = results[results[, "level"] == level & results[, "label"] == label, "y"]
text(x, y, round(x, 2), col = "blue", adj = c(0.5, 1), xpd = NA)
}
}
2) Annotate the dendrogram with the y values
plot(as.dendrogram(hc), ylab = "Height")
for (level in 1:length(hc$labels)){
labels = results[results[, "level"] == level, "label"]
for (label in labels){
x = results[results[, "level"] == level & results[, "label"] == label, "x"]
y = results[results[, "level"] == level & results[, "label"] == label, "y"]
text(x, y, round(x, 2), col = "blue", adj = c(0.5, 1), xpd = NA)
}
}
Conclusion
In conclusion, we have seen how to create and label dendrograms in R. You can add a column to the results data.frame if you want to plot something more interesting than the x or y values of each node in the dendrogram. Did this blog post help you?