来自公众号【小明的数据分析笔记本】
在原图的基础上擅自自己大刀阔斧了一下
把图改成了我的好女儿阿尼亚,配色使用的是宫崎骏动画配色的ghibi包,
散点图参考第二周大佬原图的思路,把下面不重要的数据点全都用灰色处理,感觉好看了很多
这次的问题:
1. x轴标题和坐标轴的距离太近了,y轴的成功修改了
2. legend的颜色不知道怎么指定
3. 还可以再加个大标题(懒
代码在下面
这周新学了一下r4ds tidyverse入门,感觉画图起来更熟练了~
library(readr)
library(tidyverse)
library(ggrepel)
library(patchwork)
library(ghibli)
library(jpeg)
df_ghibli <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-04-23/tidy_anime.csv") %>%
filter(studio == "Studio Ghibli", type == "Movie") %>%
dplyr::select(animeID, title_english, title_japanese, genre, score, scored_by, members)
#filter保留符合条件的行,select保留想要的列
img_a <- readJPEG("ania1.jpeg",native = TRUE)
a <- grid::rasterGrob(img_a, interpolate = T)
#jpeg包读取图片,grid包建立一个grob对象
ghibli_genres <- df_ghibli %>%
group_by(genre) %>%
count() %>%
filter(n>5) %>%
ungroup() %>%
mutate(genre=fct_reorder(genre,n))
p_col <- ghibli_genres %>%
ggplot(aes(genre,n))+ #全局声明
geom_col(aes(fill=genre))+ #局部声明
coord_flip()+ #横纵轴转换,但不改变数据
scale_y_continuous(limits=c(0,20),expand=c(0.01,0))+ #y轴范围,但flip后显示为x轴
scale_fill_ghibli_d("PonyoMedium")+ #ghibli包里都是宫崎骏配色方案
guides(fill="none")+ #不要legend
labs(x = "Most common genres", y = "Count") +
annotation_custom(a,xmin = 0.8, xmax = 4, ymin = 10.5, ymax = 23)+ #把乖女儿插进图中,对象是grob,后面调节位置
theme_test()+
theme(axis.title.x = element_text(margin = margin(0,0,0.5,0,"cm")),
axis.title.y = element_text(margin = margin(0,0.5,0,0,"cm")))
p_col
df_ghibli_unique <- df_ghibli %>%
group_by(animeID) %>%
summarize_all(first) %>% #按片名分组,只留第一条记录
mutate(title = glue::glue("{title_japanese}\n({title_english})")) #glue包生成一个新列前面日语名后面括号里英语名
p_point <- df_ghibli_unique %>%
ggplot(aes(score,scored_by))+
geom_point(data = filter(df_ghibli_unique, scored_by > 140000),aes(color=title,size=members),alpha=0.8)+
geom_point(data = filter(df_ghibli_unique, scored_by < 140000),aes(size=members),color="black",fill="grey40",alpha=0.5)+ #想画评分14w以上的7个片子有颜色和标注,其他的用灰色的点表示
scale_color_manual(values = c("#CD4F38FF","#E7A79BFF","#67B9E9FF","#742C14FF","#6FB382FF","#E48C2AFF","#3D4F7DFF"))+
scale_x_continuous(limits = c(6, 11)) + #x轴范围
scale_y_continuous(labels = scales::comma, limits = c(0, 600000)) + #y轴范围与显示,scales包comma的作用是每三位数字加逗号增加可读性
scale_size_continuous(name = "Times listed by MAL users:",
breaks = c(1000, 10000, 100000, 250000, 500000),
labels = c(" 1,000", " 10,000", "100,000", "250,000", "500,000"))+
labs(x = "Average MAL user score", y = "Number of ratings")+
guides(color="none")+
theme_test()+
theme(legend.position = c(0.28, 0.5),
legend.background = element_rect(fill = "transparent"),
legend.title = element_text(size = 9),
legend.text = element_text(size = 8),
axis.title.x = element_text(margin = margin(0,0,0.5,0,"cm")),
axis.title.y = element_text(margin = margin(0,0.5,0,0,"cm")))+
geom_text_repel(data = filter(df_ghibli_unique, scored_by > 140000),
aes(label = title_english),
size = 3,
color = "grey40",
segment.size = 0.3,#线段粗细
xlim = c(9.25, 10),
box.padding = 0.5, #文本框填充
force = 5) #强制散开重叠文本
p_col+p_point+plot_layout(widths = c(1,1))