library(ggplot2)library(dplyr)library(stringr)library(RColorBrewer)
<- read.csv("/Users/boyuan/Desktop/R/gallery/DATASETS/US_disease.csv") data <- data %>% as_tibble() data
<- data %>% data.summary # extract years mutate(year = str_sub(epi_week, start = 1, end = 4) %>% as.integer()) %>% select(-epi_week) %>% # sum up cases across all weeks for each disease in each year and each state # incidences presented as per 100 K population group_by(state, year, disease) %>% summarise(n.per.100K = sum(incidence_per_100000))
Create Heatmaps in ggplot2 to Visualize Vaccines’ Impact on Diseases Control

In this earlier tutorial, we created a heatmap showing the historical incidences of the polio disease in the U.S. history, with highlight of the power of vaccination in its control. This article extends that earlier work by updating the script into a function to automate the generation of heatmaps for eight different infectious diseases throughout the history of the United States.
packages and data cleanup
To access the authentic dataset (as used in this demonstration), go to Project TYCHO, scroll down to “ACCESS DATA” - “DATASETS Pre-Compiled”, and click “START NOW”. Then scroll up to “PREVIOUS VERSIONS” - “Level 1 Data peer-reviewed research 8 diseases”, and click download ( create an account to initiate download).
For practice purpose, you can download this simulated data here with a single click. This simulated dataset has the same data structure as the original one.
Put the colors vector in the global environment. Alternatively, it can be put inside the function.
<- colorRampPalette( myColors c("grey95", "#d1e6f7", "#a6d3f1", "#57b7e7","#1f9ad4", "#3aab70", "#e0d141", "#fdd53e", "#f4cd3c", "#feb73e", "orange", "#eb6c10", "firebrick2", "firebrick4"), bias = 3)(50)
Create the visualizing function
In the function, values or parameters are extracted from the input dataset to automate the customization of certain plot attributes, such as scale breaks and annotation positions. Additionally, the output from the function is further fine-tuned, if necessary, separately for each disease.
# Create function<- function(which.disease = "HEPATITIS A", func.plot year.vaccine.introduced = NA) { # data subset containing the selected disease <- data.summary %>% data.summary.i filter(disease == which.disease) # create heatmap <- data.summary.i %>% myheatmap ggplot(aes(x = year, y = state, fill = n.per.100K)) + # heatmap base geom_tile(color = "white", linewidth = .4) + # x axis breaks: label with years ended with 0 and 5 scale_x_continuous( breaks = seq( from = (min(data.summary.i$year) / 5) %>% ceiling() * 5 , to = (max(data.summary.i$year) / 5) %>% floor() * 5, by = 5)) + # color scale scale_fill_gradientn( colours = myColors, breaks = seq(0, max(data.summary.i$n.per.100K), 50)) + # color bar customization guides( fill = guide_colorbar(barwidth = unit(250, "pt"), barheight = unit(10, "pt"), title = NULL)) + # axes labels labs( x = NULL, y = NULL, fill = "yearly cases \nper 100K", title = data.summary.i$disease %>% unique(), subtitle = "yearly cases per 100K population" %>% paste( "\n")) + # theme theme_classic() + theme( axis.line = element_blank(), plot.title = element_text(face = "bold", size = 20), plot.subtitle = element_text(size = 12, color = "grey60"), legend.position = "bottom", plot.margin = unit(c(.5, 0.5, 0.5, 0.5), "cm")) + # expand to fill up entire plot area, # and clip off to display the text annotation coord_cartesian(expand = 0, clip = "off") # if data is available, annotate with the year of vaccine introduction if (! is.na(year.vaccine.introduced)) { <- myheatmap + myheatmap # annotate with line: the year of vaccine introduction geom_vline( xintercept = year.vaccine.introduced, linewidth = 1) + # annotate with text: the year of vaccine introduction annotate( geom = "text", x = year.vaccine.introduced, y = data.summary.i$state %>% n_distinct(), label = "Vaccine introduced", vjust = -1.2, # shift the label upward hjust = 0, # left justify fontface = "bold", size = 4) } return(myheatmap) }
Create a heatmap plot for each disease
Plot the incidences and vaccination for polio.
<- func.plot(which.disease = "POLIO", p1 year.vaccine.introduced = 1954.5) p1
Plot the incidences and vaccination for mumps.
<- func.plot(which.disease = "MUMPS", p2 year.vaccine.introduced = 1967.5) p2
Plot the incidences and vaccination for rubella.
<- func.plot(which.disease = "RUBELLA", p3 year.vaccine.introduced = 1969.5) p3
Plot the incidences and vaccination for hepatitis A.
<- func.plot(which.disease = "HEPATITIS A", p4 year.vaccine.introduced = 1995.5) p4
Plot the incidences and vaccination for diphtheria.
<- func.plot(which.disease = "DIPHTHERIA", p5 year.vaccine.introduced = 1926.5) + # add manual update scale_fill_gradientn( colours = myColors, breaks = seq(0, 2000, 500)) p5
Plot the incidences and vaccination for measles.
<- func.plot(which.disease = "MEASLES", p6 year.vaccine.introduced = 1963.5) + # update the color scale to accommodate higher case number scale_fill_gradientn( colours = myColors, breaks = seq(0, 4000, 500) ) p6
Plot the incidences and vaccination for smallpox.
<- func.plot(which.disease = "SMALLPOX") + p7 # add caption labs(caption = "The vaccine for smallpox was introduced in 1800s.") + theme(plot.caption = element_text(hjust = 0)) p7
Plot the incidences and vaccination for pertusis.
<- func.plot(which.disease = "PERTUSSIS") + p8 # update the color scale scale_fill_gradientn(colours = myColors, breaks = seq(0, 800, 100)) + # annotate the striking missing data across nearly 20 years: annotate(geom = "rect", xmin = 1955.5, xmax = 1973.5, ymin = .5, ymax = 51.5, alpha = 0, color = "black", linewidth = .3) + annotate(geom = "text", x = 1965, y = 30, label = "lost history")
p8
Here we combine all plots together using the patchwork
package.
library(patchwork)| p2 | p3 | p4) / (p1 | p6 | p7 | p8 ) + (p5 # and title to the combined plot plot_annotation( title = "Impact of Vaccination Against Infectious Diseases in US", theme = theme(plot.title = element_text(size = 30, hjust = .5, face = "bold")))
Save the combined plots.
ggsave(filename = "heatmap_vaccine_all_8_diseases.pdf",path = "graphics", # a relative path width = 20, height = 14)
library(ggplot2)library(dplyr)library(stringr)library(RColorBrewer)
<- read.csv("/Users/boyuan/Desktop/R/gallery/DATASETS/US_disease.csv") data <- data %>% as_tibble() data
<- data %>% data.summary # extract years mutate(year = str_sub(epi_week, start = 1, end = 4) %>% as.integer()) %>% select(-epi_week) %>% # sum up cases across all weeks for each disease in each year and each state # incidences presented as per 100 K population group_by(state, year, disease) %>% summarise(n.per.100K = sum(incidence_per_100000))
# Put the colors vector in the global environment. Alternatively, it can be put inside the function.<- colorRampPalette( myColors c("grey95", "#d1e6f7", "#a6d3f1", "#57b7e7","#1f9ad4", "#3aab70", "#e0d141", "#fdd53e", "#f4cd3c", "#feb73e", "orange", "#eb6c10", "firebrick2", "firebrick4"), bias = 3)(50)
# Create the visualizing function<- function(which.disease = "HEPATITIS A", func.plot year.vaccine.introduced = NA) { # data subset containing the selected disease <- data.summary %>% data.summary.i filter(disease == which.disease) # create heatmap <- data.summary.i %>% myheatmap ggplot(aes(x = year, y = state, fill = n.per.100K)) + # heatmap base geom_tile(color = "white", linewidth = .4) + # x axis breaks: label with years ended with 0 and 5 scale_x_continuous( breaks = seq( from = (min(data.summary.i$year) / 5) %>% ceiling() * 5 , to = (max(data.summary.i$year) / 5) %>% floor() * 5, by = 5)) + # color scale scale_fill_gradientn( colours = myColors, breaks = seq(0, max(data.summary.i$n.per.100K), 50)) + # color bar customization guides( fill = guide_colorbar(barwidth = unit(250, "pt"), barheight = unit(10, "pt"), title = NULL)) + # axes labels labs( x = NULL, y = NULL, fill = "yearly cases \nper 100K", title = data.summary.i$disease %>% unique(), subtitle = "yearly cases per 100K population" %>% paste( "\n")) + # theme theme_classic() + theme( axis.line = element_blank(), plot.title = element_text(face = "bold", size = 20), plot.subtitle = element_text(size = 12, color = "grey60"), legend.position = "bottom", plot.margin = unit(c(.5, 0.5, 0.5, 0.5), "cm")) + # expand to fill up entire plot area, # and clip off to display the text annotation coord_cartesian(expand = 0, clip = "off") # if data is available, annotate with the year of vaccine introduction if (! is.na(year.vaccine.introduced)) { <- myheatmap + myheatmap # annotate with line: the year of vaccine introduction geom_vline( xintercept = year.vaccine.introduced, linewidth = 1) + # annotate with text: the year of vaccine introduction annotate( geom = "text", x = year.vaccine.introduced, y = data.summary.i$state %>% n_distinct(), label = "Vaccine introduced", vjust = -1.2, # shift the label upward hjust = 0, # left justify fontface = "bold", size = 4) } return(myheatmap) }
# Create a heatmap plot for each disease:
# POLIO<- func.plot(which.disease = "POLIO", p1 year.vaccine.introduced = 1954.5) p1
# MUMPS<- func.plot(which.disease = "MUMPS", p2 year.vaccine.introduced = 1967.5) p2
# RUBELLA<- func.plot(which.disease = "RUBELLA", p3 year.vaccine.introduced = 1969.5) p3
# HEPATITIS A<- func.plot(which.disease = "HEPATITIS A", p4 year.vaccine.introduced = 1995.5) p4
# DIPHTHERIA<- func.plot(which.disease = "DIPHTHERIA", p5 year.vaccine.introduced = 1926.5) + # add manual update scale_fill_gradientn( colours = myColors, breaks = seq(0, 2000, 500)) p5
# MEASLES<- func.plot(which.disease = "MEASLES", p6 year.vaccine.introduced = 1963.5) + # update the color scale to accommodate higher case number scale_fill_gradientn( colours = myColors, breaks = seq(0, 4000, 500) ) p6
# SMALLPOX<- func.plot(which.disease = "SMALLPOX") + p7 # add caption labs(caption = "The vaccine for smallpox was introduced in 1800s.") + theme(plot.caption = element_text(hjust = 0)) p7
# PERTUSSIS<- func.plot(which.disease = "PERTUSSIS") + p8 # update the color scale scale_fill_gradientn(colours = myColors, breaks = seq(0, 800, 100)) + # annotate the striking missing data across nearly 20 years: annotate(geom = "rect", xmin = 1955.5, xmax = 1973.5, ymin = .5, ymax = 51.5, alpha = 0, color = "black", linewidth = .3) + annotate(geom = "text", x = 1965, y = 30, label = "lost history") p8
# combine all plots togetherlibrary(patchwork)
| p2 | p3 | p4) / (p1 | p6 | p7 | p8 ) + (p5 # and title to the combined plot plot_annotation( title = "Impact of Vaccination Against Infectious Diseases in US", theme = theme(plot.title = element_text(size = 30, hjust = .5, face = "bold")))
ggsave(filename = "heatmap_vaccine_all_8_diseases.png",path = "graphics", # a relative path width = 20, height = 14, dpi = 300)
Continue Exploring — 🚀 one level up!
Check out the following heatmap with synchornized lineplot that visualize the sunspot activities in the past two hundred years.
And check out this amazing heatmap visualizing the population distribution in Africa. In particular, it leverages magical mathematical transformations in the color scale (with a simple line of code) to unveil the underlying highly skewed data pattern.
The above heatmaps are all created using the generic function geom_tile()
and geom_raster()
. Alternatively, a heatmap can be created using geom_bin2d()
in the form of a 2D histogram. Check out the following 2D histogram with a map overlap that visualizes the hurricane activities in North Atlantic Ocean.