Visualize Big City Distributions with World Map and Scatterplot in ggplot2

In this article, we’ll create a world map and a scatterplot on top of it to visualize the distribution of cities with varied population sizes.


Packages and data cleanup

We’ll use the map_data() function from ggplot2 to create a world map, and use the world.cities dataset from the maps package to create a scatterplot of cities.

library(ggplot2)library(dplyr)library(ggrepel)library(maps) # install.packages("maps")
theme_set(theme_minimal(base_size = 13)) # default theme

Load the “Lobster” font from Google font repository.

library(showtext)font_add_google(name = "Lobster", family = "Lobster")showtext_auto()

Visualization

Create a world map.

m <- map_data("world")head(m, 3) %>% as_tibble()

Output:

# A tibble: 3 × 6
long lat group order region subregion
<dbl> <dbl> <dbl> <int> <chr> <chr>
1 -69.9 12.5 1 1 Aruba <NA>
2 -69.9 12.4 1 2 Aruba <NA>
3 -69.9 12.4 1 3 Aruba <NA>
p1 <- m %>%   filter(lat > -55) %>% # remove Antarctic  ggplot(aes(x = long, y = lat)) +  geom_polygon(aes(group = group),                color = "white", linewidth = .2) +  coord_fixed(1.3, clip = "off")p1

Use the world.cities dataset from the maps package.

head(world.cities, 3) %>% as_tibble()

Output:

# A tibble: 3 × 6
name country.etc pop lat long capital
<chr> <chr> <int> <dbl> <dbl> <int>
1 'Abasan al-Jadidah Palestine 5629 31.3 34.3 0
2 'Abasan al-Kabirah Palestine 18999 31.3 34.4 0
3 'Abdul Hakim Pakistan 47788 30.6 72.1 0

Here we’ll visualize cities and towns with a population size larger than 1000 people. As the population spans several orders of magnitude, we apply logarithmic transformation to the population before mapping it to apha and color aesthetics. (Also check the use of pseudo-logarithmic transformation in color scale to visualize skewed data pattern.)

p2 <- p1 +   geom_point(    data = world.cities %>% filter(pop > 10^3),     aes(alpha = log10(pop), color = log10(pop)),    size = .1) +  scale_color_distiller(palette = "Spectral") p2

Highlight big cities with a population size over a million people as red points.

p3 <- p2 +    geom_point(    data = world.cities %>% filter(pop > 10^6),    size = .2, color = "tomato3") 
p3

Highlight mega cities with over 5 million population as enlarged dark red points.

p4 <- p3 +    # mega cities:  over 5 million  geom_point(    data = world.cities %>% filter(pop > 5*10^6),    size = 3, shape = 21, fill = "red4", color = "white") +  # add city names  geom_text_repel(    data = world.cities %>% filter(pop > 5*10^6),    aes(label = name),     max.overlaps = Inf, box.padding = unit(0, "pt"),     size = 2, color = "cyan3", fontface = "bold",     min.segment.length = 0)
p4

Apply the blank canvas theme, and position the colorbar at the bottom left corner. Note that the themes specified in guides() takes precedence over that specified in theme().

p5 <- p4 +   theme_void() +  theme(legend.position = c(.37, 0.02),        plot.background = element_rect(fill = "floralwhite", color = NA),        # increase margin at top and bottom of the plot        plot.margin = margin(t = 10, b = 20, unit = "pt")) +   guides(    # remove legend associated with the alpha aesthetic    alpha = FALSE,     # adjust the color bar    color = guide_colorbar(      direction = "horizontal",      barwidth = unit(150, "pt"),      barheight = unit(5, "pt"),      # with precedence over       title.theme = element_text(vjust = 1, family = "Lobster")    )  )p5

Add legend of mega cities at the plot bottom right corner, and add plot title.

p6 <- p5 +   # create a point  annotate(    geom = "point",  x = 90, y = -60,     color = "red3", size = 3.5) +  # add text label  annotate(    geom = "text",  x = 100, y = -60,     label = "mega cities\npop. > 5 million",    color = "black", size = 4, hjust = 0,     family = "Lobster") + # using the loaded Google font    # add plot title  labs(title = "Populous Cities in the World") +  theme(plot.title = element_text(hjust = .5, family = "Lobster", face = "italic")) 
p6

Save the plot.

ggsave("World populous cities.pdf",       path = "graphics", # a relative path to the "graphics" folder        width = 6, height = 4)
library(ggplot2)library(dplyr)library(ggrepel)theme_set(theme_minimal())
m <- map_data("world")head(m, 3) %>% as_tibble()
# Load the "Lobster" font from Google font repositorylibrary(showtext)font_add_google(name = "Lobster", family = "Lobster")showtext_auto()

# Create a world map. p1 <- m %>% filter(lat > -55) %>% # remove Antactic ggplot(aes(x = long, y = lat)) + geom_polygon(aes(group = group), color = "white", linewidth = .2) + coord_fixed(1.5, clip = "off")

# Using the `world.cities` dataset from the `maps` package. Here we visualize cities / towns with population larger than 1000. #install.packages("maps")
library(maps) head(world.cities, 3) %>% as_tibble()
p2 <- p1 + geom_point( data = world.cities %>% filter(pop > 10^3), aes(alpha = log10(pop), color = log10(pop)), size = .1) + scale_color_distiller(palette = "Spectral") p2

# Highlight big cities with over a million population as red points. p3 <- p2 + geom_point( data = world.cities %>% filter(pop > 10^6), size = .2, color = "tomato3") p3

# Highlight mega cities with over 5 million population as enlarged dark red points.p4 <- p3 + # mega cities: over 5 million geom_point( data = world.cities %>% filter(pop > 5*10^6), size = 3, shape = 21, fill = "red4", color = "white") + # add city names geom_text_repel( data = world.cities %>% filter(pop > 5*10^6), aes(label = name), max.overlaps = Inf, box.padding = unit(0, "pt"), size = 2, color = "cyan3", fontface = "bold", min.segment.length = 0)p4

# adjust theme and legendp5 <- p4 + theme_void() + theme(legend.position = c(.37, 0.02), plot.background = element_rect(fill = "floralwhite", color = NA), # increase margin at top and bottom of the plot plot.margin = margin(t = 10, b = 20, unit = "pt")) + guides( # remove legend associated with the alpha aesthetic alpha = FALSE, # adjust the color bar color = guide_colorbar( direction = "horizontal", barwidth = unit(150, "pt"), barheight = unit(5, "pt"), # with precedence over title.theme = element_text(vjust = 1, family = "Lobster") ) )p5

# Add legend of mega cities, and add plot title. p6 <- p5 + # create a point annotate( geom = "point", x = 90, y = -60, color = "red3", size = 3.5) + # add text label annotate( geom = "text", x = 100, y = -60, label = "mega cities\npop. > 5 million", color = "black", size = 4, hjust = 0, family = "Lobster") + # using the loaded Google font # add plot title labs(title = "Populous Cities in the World") + theme(plot.title = element_text(hjust = .5, family = "Lobster", face = "italic"))
p6


Continue Exploring — 🚀 one level up!


Check out the following article that visualizes the global flights and airports illustrated below. And also check how to modify the script to turn static flight lines into animation.



Check out the following awesome 2D histogram with a world map overlay that visualizes the hurricane activities in North Atlantic Ocean.