library(ggplot2)library(dplyr) # library(tidyr) # tidy up data structure library(stringr) # string manipulation
theme_set(theme_minimal(base_size = 15)) # global theme
Create Annotated Line Plot in ggplot2 to Visualize the Changing Popularity of Smoking
In this work, we will create a line plot to visualize the rise and fall of the smoking popularity worldwide, with the United States, Germany, and France highlighted and annotated. The plot is a ggplot2 reproduction of the demo graphic by Datawrapper.
Packages and data cleaup
The data is sourced from Our World in Data, and can be downloaded here.
<- read.csv("/Users/boyuan/Desktop/R/gallery/DATASETS/cigarettes.csv") d
<- d %>% d.tidy pivot_longer(-year, names_to = "country", values_to = "cigarettes") %>% mutate(cigarettes = as.numeric(cigarettes))
head(d.tidy, n = 3)
Output:
# A tibble: 3 × 3
year country cigarettes
<int> <chr> <dbl>
1 1875 Austria NA
2 1875 Canada NA
3 1875 France 0.1
Visualization
Generate a simple line plot to visualize cigarette sales for each country. The lines, presented in grey, serve as a backdrop against which we will later highlight three specific countries.
<- d.tidy %>% p1 ggplot(aes(x = year, y = cigarettes, group = country)) + geom_line(color = "snow3") p1
Highlight the cigarette sales in three selected countries.
<- c("United.States", "France", "Germany") countries.selected
<- p1 + p2 geom_line( data = d.tidy %>% filter(country %in% countries.selected), aes(color = country), linewidth = 1) + scale_color_manual(values = c("skyblue3", "steelblue4", "firebrick")) p2
Add text annotations to the three highlight lines in replace of the default legend. And use text annotation to replace the default y-axis title. The default legend and y-axis title will be removed at a later step.
<- p2 + p3 annotate( geom = "text", x = c(1933, 1948, 1955, 1880), y = c(6, 1, 3, 10), hjust = c(1, 0, 0, 0), # 1, right justify; 0, left justify label = c("United States", "France", "Germany", "Cigarettes sold\nper day per adult"), color = c("firebrick", "skyblue3", "steelblue4", "snow4"), fontface = "bold", size = c(5, 5, 5, 4)) p3
Adjust the axis breaks and scales, remove axis titles, and add plot titles. For the long plot subtitle, we use the str_wrap()
function from the popular stringr
package to automatically wrap long strings into nicely formatted paragraphs.
<- p3 + p4 # adjust axis breaks scale_x_continuous(breaks = seq(1880, 2000, 20)) + scale_y_continuous(breaks = seq(0, 10, 2)) + # expand to fill up the entire plotting range coord_cartesian(expand = 0) + # update the axial and plot titles labs( # remove the axial titles y = NULL, x = NULL, # add plot title and subtitle title = "Cigarette consumption in developed countries", subtitle = str_wrap( "Smoking became increasingly popular since 1920s, peaked around 1960s ~ 1980s, and significant decreased in 1990s.", width = 60) # number of characters per line )
p4
Final polish-up of the theme.
<- p4 + p5 theme( legend.position = "none", panel.grid = element_blank(), panel.grid.major.y = element_line(linewidth = .1, linetype = "dashed"), axis.line.x = element_line(color = "snow4"), axis.ticks.x = element_line(color = "snow4"), plot.title = element_text(face = "bold", size = 15), plot.subtitle = element_text(size = 12, color = "orange4", margin = margin(b = 15))) p5
library(ggplot2)library(dplyr)library(tidyr)library(stringr)
theme_set(theme_minimal(base_size = 15)) # global theme
<- read.csv("/Users/boyuan/Desktop/R/gallery/DATASETS/cigarettes.csv") d
<- d %>% d.tidy pivot_longer(-year, names_to = "country", values_to = "cigarettes") %>% mutate(cigarettes = as.numeric(cigarettes))
#Create a basic line plot<- d.tidy %>% p1 ggplot(aes(x = year, y = cigarettes, group = country)) + geom_line(color = "snow3") p1
# Highlight the cigarettes sales in three selected countries. <- c("United.States", "France", "Germany") countries.selected
<- p1 + p2 geom_line( data = d.tidy %>% filter(country %in% countries.selected), aes(color = country), linewidth = 1) + scale_color_manual(values = c("skyblue3", "steelblue4", "firebrick")) p2
# Add text annotation to the three highlight lines in replace of the default legend, # and use text annotation to replace the default y-axis title.<- p2 + p3 annotate( geom = "text", x = c(1933, 1948, 1955, 1880), y = c(6, 1, 3, 10), hjust = c(1, 0, 0, 0), # 1, right justify; 0, left justify label = c("United States", "France", "Germany", "Cigarettes sold\nper day per adult"), color = c("firebrick", "skyblue3", "steelblue4", "snow4"), fontface = "bold", size = c(5, 5, 5, 4)) p3
# Adjust the axis breaks and scales, remove axis titles, and add plot titles. <- p3 + p4 # adjust axis breaks scale_x_continuous(breaks = seq(1880, 2000, 20)) + scale_y_continuous(breaks = seq(0, 10, 2)) + # expand to fill up the entire plotting range coord_cartesian(expand = 0) + # update the axial and plot titles labs( # remove the axial titles y = NULL, x = NULL, # add plot title and subtitle title = "Cigarette consumption in developed countries", subtitle = str_wrap( "Smoking became increasingly popular since 1920s, peaked around 1960s ~ 1980s, and significant decreased in 1990s.", width = 60)) # number of characters per line p4
# Final polish-up of the theme.<- p4 + p5 theme( legend.position = "none", panel.grid = element_blank(), panel.grid.major.y = element_line(linewidth = .1, linetype = "dashed"), axis.line.x = element_line(color = "snow4"), axis.ticks.x = element_line(color = "snow4"), plot.title = element_text(face = "bold", size = 15), plot.subtitle = element_text( size = 12, color = "orange4", margin = margin(b = 15))) p5
Continue Exploring — 🚀 one level up!
In the following jittered line plot, we’ll visualize the social mobility: how a son’s career path was affected by his father’s occupation in the United States during the 1970s. Each single line represents a distinct father-son pair.
To depict chronological changes, line plots are a powerful tool as illustrated above and in this example. Besides, ribbons are an attractive alternative with engaging visual appeal. Check out this awesome stacked ribbon / alluvium plot, which shows dynamic shifts in the migrant population to the United States from 1820 to 2009.