library(showtext)showtext_auto()font_add_google(name ="Lobster", family ="Lobster") # for most textsfont_add_google(name ="lemon", family ="lemon") # for species notation # set the default theme (use "Lobster" as the default font)theme_set(theme_bw(base_family ="Lobster", base_size =16))
Create a basic scatterplot.
p1 <- penguins %>%ggplot(aes(x = flipper_length_mm, y = body_mass_g, color = species, fill = species)) +geom_point(position =position_jitter(.2, .2, 1)) p1
Create a simple linear regression for each species, and plot the associated formula using the ggpubr package.
library(ggpubr) p2 <- p1 +# create a simple linear model geom_smooth(method ="lm") +# Draw equationsstat_regline_equation(show.legend = F, size =5,label.x =170, label.y =c(5400, 5700, 6000)) p2
Use the ggalt package to highlight selected species by encircling the associated data points. Use the same position of jitter to synchronize the points and the circles.
library(ggalt) p3 <- p2 +# highlight "Gentoo" species as shaded areageom_encircle(data = penguins %>%filter(species =="Gentoo"),position =position_jitter(.2, .2, 1), alpha = .2, s_shape =1, expand =0) +# highlight the other two species in dashed outlinesgeom_encircle(data = penguins %>%filter(species !="Gentoo"),aes(group =1), # applies the encircling to all datasetposition =position_jitter(.2, .2, 1),color ="black", fill =NA,size =2, linetype ="dashed",s_shape =1, expand =0) p3
Create a color-enriched plot title, with penguin species annotated in color. This is a more concise alternative to the color legend. This involves two steps. First step: use HTML to markup the title string.
Put the long string inside c(). You can make a simple line break in the script (pressing the Enter or return key) without it being displayed in the rendered graphic (in the context of ggtext discussed later).
Use the HTML tag <br> to make an indicated line break to be displayed in the graphic. (or use automatic text wrapping; see below)
Use HTML to specify the text color.
mytitle <-c("Among the adult foraging penguins near <br> Palmer Station in Antarctica, <span style = 'color:DodgerBlue; font-family: lemon;'>Gentoo</span> is much <br> bigger than <span style = 'color:LimeGreen;font-family: lemon;'>Chinstrap</span> and <span style = 'color:red; font-family: lemon;'>Adelie</span>")
Second step: use element_markdown() from the ggtext package to render the plot title. The syntax in element_markdown() is the same as in element_text(). (Alternatively, you can use element_textbox_simple() to automatically wraps texts, as in this example)
library(ggtext) p4 <- p3 +labs(title = mytitle) +theme(plot.title =element_markdown(size =15, color ="snow4"), legend.position ="none") # remove the legendp4
Polish up a few more details.
p5 <- p4 +# axes: finer breaks, and rename the titlescale_x_continuous(breaks =seq(160, 240, 10), name ="Flipper length (mm)") +scale_y_continuous(breaks =seq(3000, 7000, 1000), name ="Body weight (g)") +theme(# increase margin on the right side of y-axis title, and on top of x-axis title# suffix .x and .y are required for margin updateaxis.title.x =element_text(margin =margin(t =10)),axis.title.y =element_text(margin =margin(r =10)),# remove panel gridpanel.grid =element_blank(),# background colorpanel.background =element_rect(fill ="lightyellow"),plot.background =element_rect(fill ="lightyellow"),# add more margins around the plotplot.margin =margin(rep(20, 4), unit ="pt") )p5
Save the plot. Here we save the plot to the folder “graphics”. This folder is under the same folder of the source code.
ggsave(filename ="penguis.pdf",path ="graphics", # a relative pathwidth =6, height =5)
library(ggplot2)library(dplyr)library(palmerpenguins) # data package head(penguins, n =3) # Load more fonts from Google Font Repository.library(showtext)showtext_auto()font_add_google(name ="Lobster", family ="Lobster") # for most textsfont_add_google(name ="lemon", family ="lemon") # for species notation # set as default theme (with "Lobster" being the default font)theme_set(theme_bw(base_family ="Lobster", base_size =16)) # Create a basic scatterplot.p1 <- penguins %>%ggplot(aes(x = flipper_length_mm, y = body_mass_g, color = species, fill = species)) +geom_point(position =position_jitter(.2, .2, 1)) p1 # Create a simple linear regression for each species, and plot the associated formula. library(ggpubr) p2 <- p1 +# create a simple linear model geom_smooth(method ="lm") +# Draw equationsstat_regline_equation(show.legend = F, size =5,label.x =170, label.y =c(5400, 5700, 6000)) p2 # Highlight selected species by encircling the associated data points. library(ggalt) p3 <- p2 +# highlight "Gentoo" species as shaded areageom_encircle(data = penguins %>%filter(species =="Gentoo"),position =position_jitter(.2, .2, 1), alpha = .2, s_shape =1, expand =0) +# highlight the other two species in dashed outlinesgeom_encircle(data = penguins %>%filter(species !="Gentoo"),aes(group =1), # applies the encircling to all datasetposition =position_jitter(.2, .2, 1),color ="black", fill =NA,size =2, linetype ="dashed",s_shape =1, expand =0)p3 # Create a plot title, with penguin species highlighted in color. library(ggtext) mytitle <-c("Among the adult foraging penguins near <br> Palmer Station in Antarctica, <span style = 'color:DodgerBlue; font-family: lemon;'>Gentoo</span> is much <br> bigger than <span style = 'color:LimeGreen;font-family: lemon;'>Chinstrap</span> and <span style = 'color:red; font-family: lemon;'>Adelie</span>") p4 <- p3 +labs(title = mytitle) +theme(plot.title =element_markdown(size =15, color ="snow4"), legend.position ="none") # remove the legendp4 # Polish up a few more details. p5 <- p4 +# axes: finer breaks, and rename the titlescale_x_continuous(breaks =seq(160, 240, 10), name ="Flipper length (mm)") +scale_y_continuous(breaks =seq(3000, 7000, 1000), name ="Body weight (g)") +theme(# increase margin on the right of y-axis title, and on top of x-axis title# suffix .x and .y are required for margin updateaxis.title.x =element_text(margin =margin(t =10)),axis.title.y =element_text(margin =margin(r =10)),# remove panel gridpanel.grid =element_blank(),# background colorpanel.background =element_rect(fill ="lightyellow"),plot.background =element_rect(fill ="lightyellow"),# add more margins around the plotplot.margin =margin(rep(20, 4), unit ="pt") )p5 # save the plotggsave(filename ="penguis.pdf",path ="graphics", width =6, height =5)
Continue Exploring — 🚀 one level up!
A scatterplot can be further enhanced by visualizing the marginal (univariate) distribution of the x and y variables, and the bivariate distribution pattern with confidence ellipses. Check out the following scatterplot with confidence ellipses and marginal visualization.
A scatterplot is often drawn on a semi-logarithmic or double-log scale when there is significant data skewness in one or two axes. Check out the following scatterplot on semi-logarithmic scale that unveils linear relationship between the percentage of urbanization and log(GDP per capita).