In this tutorial, we will see how to make bar plots with error bars in R using the ggplot2 package. For that, we will go through two examples using the iris and ToothGrowth built-in datasets.
Load the ggplot2 package
library(ggplot2)
Example 1: no facet variable
First, we will use the iris dataset to create a barplot showing the petal length for 50 flowers from three different species. Then, we will create the same barplot but with error bars.
a) Load the dataset
data("iris")
b) Variables assignment
We will use the Petal.Length as y variable and the species as fill variable.
dataset = iris
y_column = "Petal.Length"
fill_column = "Species"
c) Bar plot with all samples (facultative)
The x variable called Flowers will be created to identify the 50 flowers of each species that were analyzed in that study.
x_column = "Flowers"
dataset = dataset[order(dataset[, fill_column]),]
dataset[, x_column] = seq(1, nrow(dataset))
ggplot(dataset, aes_string(x = x_column, y = y_column, fill = fill_column))+
geom_bar(stat = "identity")
d) Bar plot with error bars
To create error bars, we need to compute:
- the mean
- the mean minus the standard deviation
- the mean plus the standard deviation
fill_levels = unique(dataset[, fill_column])
dataset_stats = NULL
for (fill_level in fill_levels){
dataset_subset = dataset[dataset[, fill_column] == fill_level, y_column]
line = data.frame(fill_level,
mean = mean(dataset_subset),
sd_neg = mean(dataset_subset) - sd(dataset_subset),
sd_pos = mean(dataset_subset) + sd(dataset_subset))
dataset_stats = rbind(dataset_stats, line)
}
names(dataset_stats)[names(dataset_stats) == "mean"] = y_column
names(dataset_stats)[names(dataset_stats) == "fill_level"] = fill_column
ggplot(dataset_stats, aes_string(x = fill_column, y = y_column,
fill = fill_column))+
geom_bar(stat = "identity")+
geom_errorbar(aes_string(ymin = "sd_neg", ymax = "sd_pos"))
If we want to change the error bar width from 1 (default) to 0.2:
ggplot(dataset_stats, aes_string(x = fill_column, y = y_column,
fill = fill_column))+
geom_bar(stat = "identity")+
geom_errorbar(aes_string(ymin = "sd_neg", ymax = "sd_pos"), width = 0.2)
If we want to only show the upper part of the error bar:
ggplot(dataset_stats, aes_string(x = fill_column, y = y_column,
fill = fill_column))+
geom_bar(stat = "identity", color = "black")+
geom_errorbar(aes_string(ymin = y_column, ymax = "sd_pos"), width = 0.2)
Example 2: one facet variable
Now, we will use the ToothGrowth dataset with the supplement type as facet variable and the dose of vitamin C as fill variable:
a) Load the dataset
data("ToothGrowth")
b) Variables assignment
In this case, we will define the tooth length as y column, the supplement type as facet variable and the dose of vitamine C as fill variable.
dataset = ToothGrowth
y_column = "len"
fill_column = "supp"
facet_column = "dose"
c) Bar plot with all samples (facultative)
The x variable will be created to identify the teeth from 30 guinea pigs that received one of the two supplement types: orange juice (OJ) or ascorbic acid (VC).
x_column = "teeth"
dataset = dataset[order(dataset[, fill_column]),]
dataset = dataset[order(dataset[, facet_column]),]
facet_levels = unique(dataset[, facet_column])
dataset[, x_column] = rep(seq(1, nrow(dataset)/length(facet_levels)),
length(facet_levels))
ggplot(dataset, aes_string(x = x_column, y = y_column, fill = fill_column))+
geom_bar(stat = "identity")+
facet_wrap(facet_column)
d) Bar plot with error bars
To add the error bars, we again need to compute:
- the mean
- the mean minus the standard deviation
- the mean plus the standard deviation
The difference with the previous example is that we want to compute them for each dose level of vitamin C.
fill_levels = unique(dataset[, fill_column])
facet_levels = unique(dataset[, facet_column])
dataset_stats = NULL
for (fill_level in fill_levels){
for (facet_level in facet_levels){
dataset_subset = dataset[dataset[, fill_column] == fill_level &
dataset[, facet_column] == facet_level, y_column]
line = data.frame(fill_level, facet_level,
mean = mean(dataset_subset),
sd_neg = mean(dataset_subset) - sd(dataset_subset),
sd_pos = mean(dataset_subset) + sd(dataset_subset))
dataset_stats = rbind(dataset_stats, line)
}
}
names(dataset_stats)[names(dataset_stats) == "mean"] = y_column
names(dataset_stats)[names(dataset_stats) == "fill_level"] = fill_column
names(dataset_stats)[names(dataset_stats) == "facet_level"] = facet_column
ggplot(dataset_stats, aes_string(x = fill_column, y = y_column,
fill = fill_column))+
geom_bar(stat = "identity")+
geom_errorbar(aes_string(ymin = "sd_neg", ymax = "sd_pos"), width = 0.2)+
facet_wrap(facet_column)
e) Inversion of fill and facet variables
dataset = ToothGrowth
y_column = "len"
fill_column = "dose"
facet_column = "supp"
Conclusion
To make bar plots with error bars in R using ggplot2, we need to create a new dataset containing the mean, the mean minus the standard deviation, the mean plus the standard deviation and to use the geom_errorbar function.