Use ggplot2 to Visualize Increases in Human Life Expectancy and Population Since 1800s

In this work, we’ll use ggplot2 to create a line plot combined with scatterplot to visualize the increases in the human life expectancy and population size from 1800 to 2015. Major techniques covered in this work include:

This work is a ggplot2 reproduction of the demo graphic by Datawrapper by Lisa Charlotte Rost.


Packages and data cleanup

The dataset used in this work is sourced from gapminder, and can be downloaded here. It contains the population and economic data in year 1800 and 2015.

library(ggplot2)library(dplyr)library(tidyr)library(stringr)theme_set(theme_classic(base_size = 14))
d <- read.csv("/Users/boyuan/Desktop/R/gallery/DATASETS/point_line_gapminder_rising life Exp.csv")
# "population" variable is recognized as "character" due to "null" texts# here we force it to numerical (and the "null" values beome NA)d <- d %>% as_tibble() %>% mutate( population = as.double(population), country = str_replace_all(country, "United States", "US"))
head(d, n = 4)

Output:

# A tibble: 4 × 6
country Life.expectancy.in.y…¹ GDP.per.capita population continent year
<chr> <dbl> <int> <dbl> <chr> <int>
1 Malawi 30.3 350 737000 Africa 1800
2 Equatorial G… 29.8 356 80377 Africa 1800
3 Solomon Isla… 25.1 363 56998 Australia 1800
4 Mozambique 30.3 390 1982324 Africa 1800
# ℹ abbreviated name: ¹​Life.expectancy.in.years

Visualization

Create a scatterplot, and connect the two points of the year 1800 and 2015 of the same country.

p1 <- d %>%   ggplot(aes(x = GDP.per.capita, y = Life.expectancy.in.years)) +  geom_point(aes(size = population), color = "snow4", alpha = .8) +   geom_line(aes(group = country), color = "snow4", alpha = .8, linewidth = .05)  +  coord_cartesian(expand = 0) p1

Modify the scale of the x and size aesthetics.

  • Transform the x-axis to logarithmic scale of base 10. This renders the scatterplot more spread apart horizontally to gives better readability. Such transformation is very efficient in unveiling the pattern of highly skewed data distribution – also check out log-transform on y-axis, and pseudo-log transform in color scale.

  • The population variable has been mapped to the size aesthetic. Here we increase the maximum size at the upper end of the size scale, and use zero size for population of zero. This creates a sharper contrast in the point size.

p2 <- p1 +  scale_x_log10(labels = function(x){paste(x/1000, "K")},                n.breaks = 8,                limits = c(300, 2*10^5)) +   annotation_logticks(sides = "b") +  scale_y_continuous(breaks = seq(0, 90, 10), limits = c(18, 90)) +  # `scale_size_area()` ensures zero value is mapped to zero size  scale_size_area(max_size = 20)  p2

Add rectangles to annotate the years. With coord_cartesian(expand = 0) (at the earlier step of p1), the plot expands to eliminate all plot margin. This allows the gap between the rectangles and the axis lines to be precisely controlled by the rectangle coordinates (xmin, xamx, ymin, ymax) and the axial scale limits (see limits argument above).

p3 <- p2 +   annotate(geom = "rect",            xmin = 350, xmax = 2*10^5,            ymin = c(20, 48), ymax = c(46, 90),            alpha = .08) +  # years text annotation  annotate(geom = "text",           x = 1.2*10^5, y = c(42, 87), label = c(1800, 2015),            size = 7, fontface = "bold") p3

Highlight three selected countries with the largest population in 2015.

d.big.countries <- d %>%   filter(country %in% c("China", "India", "US"))
p4 <- p3 + # highlight the lines geom_line(data = d.big.countries, aes(color = country), linewidth = c(1, 1, 1, 1, .5, .5)) + # highlight the points geom_point(data = d.big.countries, aes(size = population, color = country)) + # label with country names geom_text(data = d.big.countries %>% filter(year == 2015), aes(label = country), color = "white", size = c(5, 5, 4), fontface = "bold") + # add color to the three selected countries scale_color_manual(values = c("red4", "steelblue4", "darkgreen"))
p4

A final touch-up. Here we use str_wrap() from the stringr package to automatically wrap the title, with ~50 characters per line.

p5 <- p4 +  # remove axial titles  labs(x = NULL, y = NULL) +   # add plot title, with text wrapping after ~50 characters each line  ggtitle(str_wrap(    "There has been significant increases in human life span    and population in the last two hundred years",    width = 50)) +  # mark axial titles inside the plot  annotate(geom = "text",           x = c(340, 10^4.2), y = c(85, 22),            label = c("life expectancy\nin years",                     "GDP per capita (US $)"),            size = 4.5, fontface = "bold",            hjust = 0, color = "grey30") +  theme(    legend.position = "none",    plot.title = element_text(face = "bold", size = 15,                              margin = margin(b = 10)))p5
library(ggplot2)library(dplyr)library(tidyr)library(stringr)theme_set(theme_classic(base_size = 14))
d <- read.csv("/Users/boyuan/Desktop/R/gallery/DATASETS/point_line_gapminder_rising life Exp.csv")
# "population" variable is recognized as "character" due to "null" texts# here we force it to numerical (and the "null" values beome NA)d <- d %>% as_tibble() %>% mutate( population = as.double(population), country = str_replace_all(country, "United States", "US"))
head(d, n = 4)

# Create a scatterplot, # and connect the two points of the year 1800 and 2015 of the same country.p1 <- d %>% ggplot(aes(x = GDP.per.capita, y = Life.expectancy.in.years)) + geom_point(aes(size = population), color = "snow4", alpha = .8) + geom_line(aes(group = country), color = "snow4", alpha = .8, linewidth = .05) + coord_cartesian(expand = 0) p1

# Modify the scale of the `x` and `size` aesthetics. p2 <- p1 + scale_x_log10(labels = function(x){paste(x/1000, "K")}, n.breaks = 8, limits = c(300, 2*10^5)) + annotation_logticks(sides = "b") + scale_y_continuous(breaks = seq(0, 90, 10), limits = c(18, 90)) + # `scale_size_area()` ensures zero value is mapped to zero size scale_size_area(max_size = 20) p2

# Add rectangles to annotate the years.p3 <- p2 + annotate(geom = "rect", xmin = 350, xmax = 2*10^5, ymin = c(20, 48), ymax = c(46, 90), alpha = .08) + # years text annotation annotate(geom = "text", x = 1.2*10^5, y = c(42, 87), label = c(1800, 2015), size = 7, fontface = "bold") p3

# Highlight three selected countries with the largest population in 2015.d.big.countries <- d %>% filter(country %in% c("China", "India", "US"))
p4 <- p3 + # highlight the lines geom_line(data = d.big.countries, aes(color = country), linewidth = c(1, 1, 1, 1, .5, .5)) + # highlight the points geom_point(data = d.big.countries, aes(size = population, color = country)) + # label with country names geom_text(data = d.big.countries %>% filter(year == 2015), aes(label = country), color = "white", size = c(5, 5, 4), fontface = "bold") + # add color to the three selected countries scale_color_manual(values = c("red4", "steelblue4", "darkgreen")) p4

# A final touch-up.p5 <- p4 + # remove axial titles labs(x = NULL, y = NULL) + # add plot title, with text wrapping after ~50 characters each line ggtitle(str_wrap( "There has been significant increases in human life span and population in the last two hundred years", width = 50)) + # mark axial title inside the plot annotate(geom = "text", x = c(340, 10^4.2), y = c(85, 22), label = c("life expectancy\nin years", "GDP per capita (US $)"), size = 4.5, fontface = "bold", hjust = 0, color = "grey30") + theme(legend.position = "none", plot.title = element_text(face = "bold", size = 14.5, margin = margin(b = 10)))p5




Continue Exploring — 🚀 one level up!


Check out the following line plot that visualizes the rise and fall of the smoking popularity worldwide, in particular in the United States, Germany, and France.



Check out the following line plot that visualizes the social mobility: how does a father’s occupation impact his son’s career path, based on a survey in the 1970s in the United States.