20180507-A · Global Coffee Chains · ggplot2 usmap geom_map geom_point 地图热图美国地图

本文链接：https://blog.csdn.net/Albert_XN/article/details/127388691

所有作品合集传送门： Tidy Tuesday

2018 年合集传送门： 2018

Global Coffee Chains

欢迎来到ggplot2的世界！

ggplot2是一个用来绘制统计图形的 R 软件包。它可以绘制出很多精美的图形，同时能避免诸多的繁琐细节，例如添加图例等。

用 ggplot2 绘制图形时，图形的每个部分可以依次进行构建，之后还可以进行编辑。ggplot2 精心挑选了一系列的预设图形，因此在大部分情形下可以快速地绘制出许多高质量的图形。如果在格式上还有额外的需求，也可以利用 ggplot2 中的主题系统来进行定制, 无需花费太多时间来调整图形的外观，而可以更加转注地用图形来展现你的数据。

在这里插入图片描述

1. 一些环境设置

# 设置为国内镜像, 方便快速安装模块
options("repos" = c(CRAN = "https://mirrors.tuna.tsinghua.edu.cn/CRAN/"))

2. 设置工作路径

wkdir <- '/home/user/R_workdir/TidyTuesday/2018/2018-05-07_Global_Coffee_Chains/src-a'
setwd(wkdir)

3. 加载 R 包

library(usmap)
library(Hmisc)
library(usdata)
library(tidyverse)
library(albersusa)

# 导入字体设置包
library(showtext)
# font_add_google() showtext 中从谷歌字体下载并导入字体的函数
# name 中的是字体名称, 用于检索, 必须严格对应想要字体的名字
# family 后面的是代码后面引用时的名称, 自己随便起
# 需要能访问 Google, 也可以注释掉下面这行, 影响不大
# font_families_google() 列出所有支持的字体, 支持的汉字不多
# http://www.googlefonts.net/
font_add_google(name = "Karantina", family =  "Karantina")
font_add_google(name = "ZCOOL XiaoWei", family = "zxw")

# 后面字体均可以使用导入的字体
showtext_auto()

4. 加载数据

# 加载数据对象
# week6_coffee_chains.xlsx 有三个 Sheet: starbucks, timhorton, dunkin
# starbucks, dunkin 均有经纬信息, 但 timhorton 没有, 因此通过爬虫从其他地方获取相应数据, 并未使用提供的数据集
df_star <- readxl::read_excel("../data/week6_coffee_chains.xlsx", 'starbucks')
df_timh <- read.table("../data/TimHortons_US.csv", sep = ',', header = TRUE)
df_dunk <- readxl::read_excel("../data/week6_coffee_chains.xlsx", 'dunkin')

# 简要查看数据内容
glimpse(df_timh)

## Rows: 739
## Columns: 7
## $ Country   <chr> "USA", "USA", "USA", "USA", "USA", "USA", "USA", "USA", "USA…
## $ State     <chr> "Indiana", "Indiana", "Indiana", "Indiana", "Indiana", "Indi…
## $ ST        <chr> "IN", "IN", "IN", "IN", "IN", "IN", "IN", "IN", "IN", "IN", …
## $ City      <chr> "Fort Wayne", "Fort Wayne", "Fort Albert", "Fort Wayne", "For…
## $ Longitude <dbl> -85.13329, -85.16657, -85.14786, -85.12427, -85.09961, -85.2…
## $ Latitude  <dbl> 41.19946, 41.14502, 41.11528, 41.11421, 41.11906, 41.06067, …
## $ Store     <chr> "Tim Hortons", "Tim Hortons", "Tim Hortons", "Tim Hortons", …

# 检查数据的列名
colnames(df_timh)

## [1] "Country"   "State"     "ST"        "City"      "Longitude" "Latitude" 
## [7] "Store"

5. 数据预处理

# 从 usmap 包中加载 2015年全美各州的人口估算值
data(statepop)

# 获得星巴克在美国各州分布的数据集
df_starus <- df_star %>% 
  # 将 State/Province 列重命名为 ST
  rename(ST = `State/Province`) %>%
  # 筛选 Country == US 的观测
  filter(Country == "US") %>% 
  # mutate() 主要用于在数据框中添加新的变量, 这些变量是通过对现有的变量进行操作而形成的
  # 这一步是根据 usdata::abbr2state() 函数将美国州名缩写匹配全名
  dplyr::mutate(State = abbr2state(ST)) %>% 
  # 通过 select() 函数选择后续需要的列
  select(Country, State, ST, City, Longitude, Latitude) %>% 
  # 新增一列 Store, 将这个数据集标记为 星巴克 Starbucks
  dplyr::mutate(Store = "星巴克")

# 获取唐恩都乐在美国各州分布的数据集
df_duckus <- df_dunk %>% 
  # 通过 rename() 函数将原数据集中一些复杂列名进行简化
  rename(Country = e_country, ST = e_state, City = e_city,
         Longitude = loc_LONG_poly, Latitude = loc_LAT_poly) %>% 
  # 这一步是根据 usdata::abbr2state() 函数将美国州名缩写匹配全名
  dplyr::mutate(State = abbr2state(ST)) %>% 
  # 通过 select() 函数选择后续需要的列
  select(Country, State, ST, City, Longitude, Latitude) %>% 
  # 筛选 Country == US 的观测
  filter(Country == "USA") %>% 
  # 新增一列 Store, 将这个数据集标记为 唐恩都乐 Dunkin' Donuts
  mutate(Store = "唐恩都乐")
  
# 获取蒂姆·霍顿斯在美国各州分布的数据集
df_timhus <- df_timh %>% 
  mutate(Store = '蒂姆·霍顿斯')

# 合并这三家餐饮店的数据集
df_merge <- rbind(df_starus, df_duckus, df_timhus)
df_plot <- df_merge %>% 
  # 根据州和店名进行分组
  group_by(State, Store) %>% 
  # 计数和取州缩写名
  summarise(count = n(), ST = unique(ST)) %>% 
  # 合并2015年全美各州的人口数据集
  left_join(statepop, by = c("ST" = "abbr")) %>% 
  # 新增一列, 根据人口计算人均访问店铺值
  mutate(Avg = count/pop_2015 * 1e5)

6. 利用 ggplot2 绘图

6.1 绘制星巴克在美国各州分布热图

# 获得美国各州的复合地图
us <- usa_composite()
us_map <- broom::tidy(us, region = "name")

# PS: 方便讲解, 我这里进行了拆解, 具体使用时可以组合在一起
# 获得星巴克在美国各州分布人均饮用的数据集
df_avgstarbuckus <- df_plot %>% filter(Store == '星巴克')
gg <- ggplot()
# geom_map() 绘制地图
gg <- gg + geom_map(data = df_avgstarbuckus, aes(fill = Avg, map_id = State), color = "white", size = 0.01, map = us_map)
# expand_limits() 控制坐标轴的范围
gg <- gg + expand_limits(x = us_map$long, y = us_map$lat)
# scale_fill_gradientn() 将颜色比例转换为概率转换颜色分布, 同时可以根据 limits, breaks, labels 设定连续型刻度的值
gg <- gg + scale_fill_gradientn(colours = c("#98FB98", "#FF4500", "#191970"), limits=c(0, 15),
                                breaks = c(0, 2.5, 5, 7.5, 10, 12.5, 15), 
                                labels = c(0, 2.5, 5, 7.5, 10, 12.5, 15))
# guides() 设置图例信息
gg <- gg + guides(fill = guide_legend(title = '星巴克/10万人',
                                      frame.color = "white",
                                      ticks.colour = "red",
                                      title.position = "top",
                                      label.position = "bottom",
                                      nrow = 1, byrow = TRUE))
# theme_minimal() 去坐标轴边框的最小化主题
gg <- gg + theme_minimal()
# labs() 对图形添加注释和标签（包含标题 title、子标题 subtitle、坐标轴 x & y 和引用 caption 等注释)
gg <- gg + labs(x = NULL, 
                y = NULL, 
                title = '星巴克在美国各州的分布情况 (/10万人)',
                caption = '数据来源: Global Coffee Chains · 2015| GRAPH: 数绘小站')
# theme() 实现对非数据元素的调整, 对结果进行进一步渲染, 使之更加美观
gg <- gg + theme(
  # panel.grid.major 主网格线
  panel.grid.major = element_line(colour = "#333333", size = 0.25),
  # plot.background 图片背景
  plot.background = element_rect(color = '#DCDCDC', fill = '#DCDCDC', size = 2),
  # plot.margin 调整图像边距, 上-右-下-左
  plot.margin = unit(c(.15, .1, .15, 0.1), "cm"),
  # plot.title 主标题
  plot.title = element_text(color = "black", size = 24, face = "bold", vjust = -0.5, family = 'zxw'),
  # axis.text 坐标轴刻度文本
  axis.text = element_text(size = 16, face = "bold", family = 'Karantina'),
  # legend.position 设置图例位置, 这里设置图例的绝对位置
  legend.position  = c(0.88 , .165),
  # legend.direction 设置图例的方向 horizontal 表示水平摆放
  legend.direction = "horizontal",
  # legend.background 设置图例的背景
  legend.background = element_blank())

gg

在这里插入图片描述

6.2 绘制三种连锁店在美国各州分布情况

# 获得美国各州的复合地图
# 剔除 夏威夷岛 和 阿拉斯加州 的数据, 因为太远了, 懒得处理了
df_drop   <- filter(df_merge, ST != "HI" & ST != "AK")
us <- usa_composite()
us_map <- broom::tidy(us, region = "name")
us_map <- filter(us_map, id !="Hawaii" & id != "Alaska") 

# PS: 方便讲解, 我这里进行了拆解, 具体使用时可以组合在一起
hh <- ggplot()
# geom_map() 绘制地图
hh <- hh + geom_map(data = us_map, aes(map_id = id), color="#123445", size = 0.1, fill = NA, map = us_map)
# geom_point() 绘制散点图
hh <- hh + geom_point(data = df_drop, aes(x = Longitude, y = Latitude, color = Store), size = 0.5, alpha = 0.8)
# expand_limits() 控制坐标轴的范围
hh <- hh + expand_limits(x = us_map$long, y = us_map$lat)
# coord_map() 坐标系转换成地理坐标系
hh <- hh + coord_map()
# guides() 设置图例信息
hh <- hh + guides(colour = guide_legend(title = NULL, override.aes = list(size = 3)))
# scale_color_manual() 采取的是手动赋值的方法, 也就是直接把颜色序列赋值给它的参数 value
hh <- hh + scale_color_manual(values = c('#FF4500', '#008000', '#0000CD'))
# theme_minimal() 去坐标轴边框的最小化主题
hh <- hh + theme_minimal()
# labs() 对图形添加注释和标签（包含标题 title、子标题 subtitle、坐标轴 x & y 和引用 caption 等注释)
hh <- hh + labs(x = NULL, 
                y = NULL, 
                title = '唐恩都乐、星巴克、蒂姆霍顿斯在美国各州分布情况 (/10万人)',
                caption = '数据来源: Global Coffee Chains · 2015| GRAPH: 数绘小站')
# theme() 实现对非数据元素的调整, 对结果进行进一步渲染, 使之更加美观
hh <- hh + theme(
  # panel.grid.major 主网格线
  panel.grid.major = element_blank(),
  # plot.background 图片背景
  plot.background = element_rect(color = '#FFDEAD', fill = '#FFDEAD', size = 2),
  # plot.margin 调整图像边距, 上-右-下-左
  plot.margin = unit(c(.15, .1, .15, 0.1), "cm"),
  # plot.title 主标题
  plot.title = element_text(color = "black", size = 22, face = "bold", vjust = -0.5, family = 'zxw'),
  # axis.text 坐标轴刻度文本
  axis.text = element_text(size = 16, face = "bold", family = 'Karantina'),
  # legend.position 设置图例位置, 这里设置图例的绝对位置
  legend.position  = c(0.182 , .125),
  # legend.key.size 调整图例符号大小
  legend.key.size = unit(0.1, "inches"),
  # legend.direction 设置图例的方向 horizontal 表示水平摆放
  legend.direction = "horizontal",
  # legend.text 设置图例文本格式
  legend.text = element_text(size = 12, face = "bold"),
  # legend.background 设置图例的背景
  legend.background = element_blank())

hh

在这里插入图片描述

6.3 绘制某个州三种连锁店在美国各州分布情况

# 获得美国各州的复合地图
# 挑选 纽约 的数据
df_newyork <- filter(df_merge, ST  == "NY")
us <- usa_composite()
us_map <- broom::tidy(us, region = "name") %>% filter(id == "New York") 

# PS: 方便讲解, 我这里进行了拆解, 具体使用时可以组合在一起
ii <- ggplot()
# geom_map() 绘制地图
ii <- ii + geom_map(data = us_map, aes(map_id = id), color="#123456", size = 0.1, fill = NA, map = us_map) 
ii <- ii + geom_point(data = df_newyork, aes(x = Longitude, y = Latitude, color = Store), size = 1.2, alpha = 0.85)
# expand_limits() 控制坐标轴的范围
ii <- ii + expand_limits(x = us_map$long, y = us_map$lat)
# coord_map() 坐标系转换成地理坐标系
ii <- ii + coord_map()
# guides() 设置图例信息
ii <- ii + guides(colour = guide_legend(title = NULL, override.aes = list(size = 3)))
# scale_color_manual() 采取的是手动赋值的方法, 也就是直接把颜色序列赋值给它的参数 value
ii <- ii + scale_color_manual(values = c('#FF4500', '#008000', '#0000CD'))
# theme_minimal() 去坐标轴边框的最小化主题
ii <- ii + theme_minimal()
# labs() 对图形添加注释和标签（包含标题 title、子标题 subtitle、坐标轴 x & y 和引用 caption 等注释)
ii <- ii + labs(x = NULL, 
                y = NULL, 
                title = '唐恩都乐、星巴克、蒂姆霍顿斯\n三种连锁店在纽约的分布情况 (/10万人)',
                caption = '数据来源: Global Coffee Chains · 2015| GRAPH: 数绘小站')
# theme() 实现对非数据元素的调整, 对结果进行进一步渲染, 使之更加美观
ii <- ii + theme(
  # panel.grid.major 主网格线
  panel.grid.major = element_line(colour = "#333333", size = 0.25),
  # plot.background 图片背景
  plot.background = element_rect(color = '#FFB6C1', fill = '#FFB6C1', size = 2),
  # plot.margin 调整图像边距, 上-右-下-左
  plot.margin = unit(c(.15, .1, .15, 0.1), "cm"),
  # plot.title 主标题
  plot.title = element_text(color = "black", size = 24, face = "bold", vjust = -0.5, family = 'zxw'),
  # axis.text 坐标轴刻度文本
  axis.text = element_text(size = 16, face = "bold", family = 'Karantina'),
  # legend.position 设置图例位置, 这里设置图例的绝对位置
  legend.position  = c(0.182 , .155),
  # legend.key.size 调整图例符号大小
  legend.key.size = unit(0.25, "inches"),
  # legend.direction 设置图例的方向 vertical 表示垂直摆放
  legend.direction = "vertical",
  # legend.text 设置图例文本格式
  legend.text = element_text(size = 12, face = "bold"),
  # legend.background 设置图例的背景
  legend.background = element_rect(color = '#DDA0DD', fill = '#DDA0DD', size = 2))

ii

在这里插入图片描述

7. 保存图片到 PDF 和 PNG

filename = '20180507-A-01'
ggsave(gg, filename = paste0(filename, ".pdf"), width = 10.2, height = 6.3, device = cairo_pdf)
ggsave(gg, filename = paste0(filename, ".png"), width = 10.2, height = 6.3, dpi = 100, device = "png", bg = 'white')

filename = '20180507-A-02'
ggsave(hh, filename = paste0(filename, ".pdf"), width = 10.2, height = 6.3, device = cairo_pdf)
ggsave(hh, filename = paste0(filename, ".png"), width = 10.2, height = 6.3, dpi = 100, device = "png")

filename = '20180507-A-03'
ggsave(ii, filename = paste0(filename, ".pdf"), width = 6.3, height = 6.3, device = cairo_pdf)
ggsave(ii, filename = paste0(filename, ".png"), width = 6.3, height = 6.3, dpi = 100, device = "png")

8. session-info

sessionInfo()

## R version 4.2.1 (2022-06-23)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.5 LTS
## 
## Matrix products: albert
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] showtext_0.9-5  showtextdb_3.0  sysfonts_0.8.8  albersusa_0.4.1
##  [5] forcats_0.5.2   stringr_1.4.1   dplyr_1.0.10    purrr_0.3.4    
##  [9] readr_2.1.2     tidyr_1.2.1     tibble_3.1.8    tidyverse_1.3.2
## [13] usdata_0.2.0    Hmisc_4.7-1     ggplot2_3.3.6   Formula_1.2-4  
## [17] survival_3.4-0  lattice_0.20-45 usmap_0.6.0    
## 
## loaded via a namespace (and not attached):
##  [1] fs_1.5.2               sf_1.0-8               lubridate_1.8.0       
##  [4] RColorBrewer_1.1-3     httr_1.4.4             tools_4.2.1           
##  [7] backports_1.4.1        bslib_0.4.0            rgdal_1.5-32          
## [10] utf8_1.2.2             R6_2.5.1               KernSmooth_2.23-20    
## [13] rpart_4.1.16           rgeos_0.5-9            DBI_1.1.3             
## [16] colorspace_2.0-3       nnet_7.3-17            withr_2.5.0           
## [19] sp_1.5-0               tidyselect_1.1.2       gridExtra_2.3         
## [22] curl_4.3.2             compiler_4.2.1         textshaping_0.3.6     
## [25] cli_3.4.1              rvest_1.0.3            htmlTable_2.4.1       
## [28] xml2_1.3.3             labeling_0.4.2         sass_0.4.2            
## [31] scales_1.2.1           checkmate_2.1.0        classInt_0.4-8        
## [34] proxy_0.4-27           systemfonts_1.0.4      digest_0.6.29         
## [37] foreign_0.8-82         rmarkdown_2.16         base64enc_0.1-3       
## [40] jpeg_0.1-9             pkgconfig_2.0.3        htmltools_0.5.3       
## [43] maps_3.4.0             highr_0.9              dbplyr_2.2.1          
## [46] fastmap_1.1.0          htmlwidgets_1.5.4.9000 rlang_1.0.6           
## [49] readxl_1.4.1           rstudioapi_0.14        farver_2.1.1          
## [52] jquerylib_0.1.4        generics_0.1.3         jsonlite_1.8.2        
## [55] googlesheets4_1.0.1    magrittr_2.0.3         interp_1.1-3          
## [58] Matrix_1.5-1           Rcpp_1.0.9             munsell_0.5.0         
## [61] fansi_1.0.3            lifecycle_1.0.3        stringi_1.7.8         
## [64] yaml_2.3.5             grid_4.2.1             maptools_1.1-4        
## [67] crayon_1.5.1           deldir_1.0-6           haven_2.5.1           
## [70] splines_4.2.1          mapproj_1.2.8          hms_1.1.2             
## [73] knitr_1.40             pillar_1.8.1           reprex_2.0.2          
## [76] glue_1.6.2             evaluate_0.16          latticeExtra_0.6-30   
## [79] data.table_1.14.2      modelr_0.1.9           png_0.1-7             
## [82] vctrs_0.4.2            tzdb_0.3.0             cellranger_1.1.0      
## [85] gtable_0.3.1           assertthat_0.2.1       cachem_1.0.6          
## [88] xfun_0.32              broom_1.0.1            e1071_1.7-11          
## [91] ragg_1.2.3             class_7.3-20           googledrive_2.0.0     
## [94] gargle_1.2.1           units_0.8-0            cluster_2.1.4         
## [97] ellipsis_0.3.2