dot pairs 点_GGally与pairs相关关系图_史上最全(1)

原标题:GGally与pairs相关关系图_史上最全(1)

作者:李誉辉

四川大学在读研究生

简介

对于多个变量之间的相关关系,常常使用相关关系图来可视化,R自带有pairs()函数,

可以画相关关系图,但是比较复杂,我们先介绍基于ggplot2的GGally包。

等介绍完,再介绍pairs()函数。

1.ggmatrix()

ggmatrix()可以将多个ggplot2绘图对象,按照矩阵进行排列。

1.1

矩阵第1列

1library(ggplot2)

2data(tips, package = "reshape")

3

4head(tips)

5

6g1

7geom_density(show.legend= FALSE)

8

9g2

10geom_histogram(position= position_stack(),show.legend= FALSE)+

11facet_grid(rows= vars(time))#以time变量行分面

12

13g3

14geom_point(show.legend= FALSE)

15

1.2

矩阵第2列

1library(ggplot2)

2

3g4

4geom_boxplot(show.legend= FALSE)

5

6g5

7geom_bar(position= position_stack(),show.legend= FALSE)

8

9g6

10geom_histogram(position= position_stack(),show.legend= FALSE)+

11coord_flip() +

12facet_grid(cols= vars(time))

13

1.3

矩阵第3列

1library(ggplot2)

2library(dplyr)

3library(tibble)

4

5# 第一个图

6text_1

7tips_female% filter(sex == "Female") %>% as.data.frame()

8tips_male % filter(sex == "Male") %>% as.data.frame()

9text_2

10text_3

11mytext

12mytext

13mytext

14x= 5,

15y= c(6,4, 2),

16stringsAsFactors= FALSE)

17

18g7

19geom_text(show.legend= F)+

20geom_text(data= mytext[1,],aes(x= x,y= y,label= text),

21color= "black")

22

23rm(text_1, tips_female, tips_male, text_2, text_3, mytext)

24

25# 第2个图

26g8

27geom_boxplot(show.legend= FALSE)+

28coord_flip()

29

30# 第3个图

31g9

32geom_density(show.legend= FALSE)

33

1.4

customLayout合并图形

1library(customLayout)

2# 创建画布

3mylay

4mat = matrix(1:9, ncol = 3))

5

6plot_list

7

8lay_grid(plot_list, mylay) # ggplot2绘图列表传参,传递到画布mylay

9

10rm(g1, g2, g3, g4, g5, g6, g7, g8, g9, mylay)

1.5

ggmatrix合并图形

1library(GGally)

2

3gg_m

4plots = plot_list, # 绘图对象列表

5nrow = 3, ncol = 3, # 行数和列数

6xAxisLabels = c("Total Bill", "Time of Day", "Tip"),

7yAxisLabels = c("Total Bill", "Time of Day", "Tip"),

8byrow = FALSE, # 按列排

9title = "ggmatrix合并图形"

10)

11

12# 添加主题

13gg_m + theme_bw()

14

15# 提取子集,只能提取其中一个

16gg_m[1,2]

17

18rm(plot_list, gg_m)

2.ggpairs()

GGally通过添加几个函数来扩展ggplot2,以降低geom与转换数据组合的复杂性。

其中一些功能包括配对图矩阵,散点图矩阵,平行坐标图,生存图,以及绘制网络的几个函数。

2.1

语法及关键参数

语法:

1ggpairs(data, mapping = NULL, columns = 1:ncol(data), title = NULL,

2upper = list(continuous = "cor", combo = "box_no_facet", discrete =

3"facetbar", na = "na"), lower = list(continuous = "points", combo =

4"facethist", discrete = "facetbar", na = "na"), diag = list(continuous =

5"densityDiag", discrete = "barDiag", na = "naDiag"), params = NULL, ...,

6xlab = NULL, ylab = NULL, axisLabels = c("show", "internal", "none"),

7columnLabels = colnames(data[columns]), labeller = "label_value",

8switch= NULL, showStrips = NULL, legend = NULL,

9cardinality_threshold = 15, progress = NULL,

10legends = stop("deprecated"))

关键参数:

mapping, 表示要叠加到x,y上的aes()映射变量,这里是全局映射。

column, 表示选择要绘图的列,可以用变量索引值指定,也可以用变量名指定。

columnLabels, 指定矩阵列名称。

title, xlab, ylab, 表示指定标题和坐标轴名称。

lower,upper,表示指定下三角和上三角的plot类型,列表传参。

diag,表示指定对角线的plot类型,列表传参。

axisLabels, 指定变量名称的显示位置,默认显示在外侧, "internal"则显示在内测,"none"则不显示。

labeller, 表示指定分面标签,

switch, 表示指定分面标签位置,与ggplot2:facet_grid中一致,默认在顶部和右侧, 若switch = "x",则显示在底部和右侧,若switch = "y"则显示在顶部和左侧, 若swith = "both"则显示在底部和左侧。

showStrips, 布尔运算决定是否显示plots的条带,默认NULL只显示顶部和右侧的条带。 TRUE则显示所有的条带,FALSE则不显示所有的条带。

legend, 默认NULL不显示,可以通过theme(legend.position = "bottom")调整图例的位置。 有3种指定图例类型的方式:

长度为2的数字向量,表示给矩阵所在的行和列增加图例。如c(2,3)表示第2行第3列增加图例。

长度为1的数字向量,表示根据矩阵的顺序,给相应的panel添加图例, 如legend=3表示给1行第3列增加图例。

预先使用grab_legend()提取ggplot2对象的图例,然后指定给legend。

cardinality_threshold, 表示允许因子变量的最大因子水平数量,默认最多15个因子水平。NULL则因子变量不会绘图。

progress, 表示是否显示进度条,默认NULL当超过15个plots时显示进度条, 对绘图结果没有任何影响,不需要关注。 TRUE则显示进度条,FALSE则不显示进度条, 也可用ggmatrix_progress()生成进度条,然后指定。

plot类型:

通过5个参数控制plot类型:continuous,combo,discret,na,mapping

continuous, 表示如果变量x,y都是连续的,应该是什么plot。

对于lower和upper参数: 可以是 "point", "smooth","smooth_loess", "density", "cor", "blank"。

对于diag参数: 可以是 "densityDiag", "barDiag", "blankDiag"

combo, 表示如果变量一个连续,一个离散,应该是什么plot。 只能用于lower和upper不能用于diag。 离散变量只能计数,不能映射坐标,所以可能存在坐标翻转。

可以是 "box", "box_no_facet", "dot", "dot_no_facet", "facethist", "facetdensity", "denstrip", "blank"

discrete, 表示2个变量都是离散的,应该是什么plot。

对于upper和lower参数: 可以是: "facetbar", "ratio", "blank"。

对于diag参数: 可以是"barDiag", "blankDiag"。

na, 表示指定变量为na的情况,

对于lower和upper,可以是:"na", "blank"。

对于diag,可以是 "naDiag","blankDiag"。

mapping, 表示aes()映射。若指定mapping参数,则叠加到x,y上去。

默认 lower = list(continuous = "point", combo = "facetthist", discrete = "facetbar")

默认 upper = list(continuous = "cor", combo = "box_no_facet", discrete = "box")

默认 diag = list(continuous = "density", discrete = "barDiag")

2.2

column及columnLabels

1library(GGally)

2library(ggplot2)

3

4ggpairs(tips, mapping = aes(color = sex),

5columns = c("total_bill", "time", "tip"),

6columnLabels = c("Total_Bill(连续变量)", "Time(离散变量)", "Tip(连续变量)"),

7title = "变量名指定column")

8

9ggpairs(tips, mapping = aes(color = sex),

10columns = c(1, 6, 2),

11columnLabels = c("Total_Bill(连续变量)", "Time(离散变量)", "Tip(连续变量)"),

12title = "索引值指定column")

2.3

lower,upper, diag

2.3.1自定义lower

一个离散变量,lower的discrete参数无效。

1ggpairs(tips, mapping = aes(color = day),

2columns = c("total_bill", "time", "tip"),

3columnLabels = c("Total_Bill(连续变量)", "Time(离散变量)", "Tip(连续变量)"),

4lower = list(

5continuous = "cor",

6combo = "dot_no_facet"# 没有2个离散变量,不需要discrete参数

7),

8upper = list(

9continuous = "blank",

10combo = "blank"

11),

12diag = list(

13continuous = "blankDiag",

14discrete = "blankDiag"

15),

16title = "自定义lowern(lower$continuous = "cor", lower$combo = "dot_no_facet")"

17)

两个离散变量,lower的continuous参数无效。

1ggpairs(tips, mapping = aes(color = day),

2columns = c("total_bill", "time", "sex"),

3columnLabels = c("Total_Bill(连续变量)", "Time(离散变量)", "Sex(离散变量)"),

4lower = list(

5combo = "dot_no_facet", #

6discrete = "blank"

7),

8upper = list(

9combo = "blank",

10discrete = "blank"

11),

12diag = list(

13continuous = "blankDiag",

14discrete = "blankDiag"

15),

16title = "自定义lowern(lower$combo = "dot_no_facet",lower$discrete = "blank" )"

17)

2.3.2自定义upper

一个离散变量,upper的discrete参数无效。

1ggpairs(tips, mapping = aes(color = day),

2columns = c("total_bill", "time", "tip"),

3columnLabels = c("Total_Bill(连续变量)", "Time(离散变量)", "Tip(连续变量)"),

4upper = list(

5continuous = "density",

6combo = "dot_no_facet"# 没有2个离散变量,不需要discrete参数

7),

8lower = list(

9continuous = "blank",

10combo = "blank"

11),

12diag = list(

13continuous = "blankDiag",

14discrete = "blankDiag"

15),

16title = "自定义uppern(upper$continuous = "density", upper$combo = "dot_no_facet")"

17)

两个离散变量,upper的continuous参数无效。

1ggpairs(tips, mapping = aes(color = day),

2columns = c("total_bill", "time", "sex"),

3columnLabels = c("Total_Bill(连续变量)", "Time(离散变量)", "Sex(离散变量)"),

4upper = list(

5combo = "dot_no_facet", #

6discrete = "ratio"

7),

8lower = list(

9combo = "blank",

10discrete = "blank"

11),

12diag = list(

13continuous = "blankDiag",

14discrete = "blankDiag"

15),

16title = "自定义uppern(lower$combo = "dot_no_facet",upper$discrete = "ratio" )"

17)

2.3.3自定义diag

diag没有combo参数。

1ggpairs(tips, mapping = aes(color = day),

2columns = c("total_bill", "time", "tip"),

3columnLabels = c("Total_Bill(连续变量)", "Time(离散变量)", "Tip(连续变量)"),

4diag = list(

5continuous = "barDiag",

6discrete = "blankDiag"#

7),

8lower = list(

9continuous = "blank",

10combo = "blank"

11),

12upper = list(

13continuous = "blank",

14combo = "blank"

15),

16title = "自定义diagn(diag$continuous = "barDiag", diag$discrete = "blankDiag")"

17)

1ggpairs(tips, mapping = aes(color = day),

2columns = c("total_bill", "time", "sex"),

3columnLabels = c("Total_Bill(连续变量)", "Time(离散变量)", "Sex(离散变量)"),

4diag = list(

5continuous = "barDiag", #

6discrete = "barDiag"

7),

8lower = list(

9discrete = "blank",

10combo = "blank"

11),

12upper = list(

13discrete = "blank",

14combo = "blank"

15),

16title = "自定义diagn(lower$continuous = "barDiag",diag$barDiag = "barDiag" )"

17)

2.3.4mapping参数

1library(ggplot2)

2library(GGally)

3data(tips, package = "reshape")

4

5ggpairs(tips,

6columns = c("total_bill", "time", "tip"),

7columnLabels = c("Total_Bill(连续变量)", "Time(离散变量)", "Tip(连续变量)"),

8title = "无mapping"

9)

1ggpairs(tips,

2columns = c("total_bill", "time", "tip"),

3columnLabels = c("Total_Bill(连续变量)", "Time(离散变量)", "Tip(连续变量)"),

4lower = list(mapping = aes(color = time)),

5title = "自定义lower(lower$mapping = "time")"# 局部映射

6)

1ggpairs(tips,

2columns = c("total_bill", "tip", "size"),

3columnLabels = c("Total_Bill(连续变量)", "Tip(连续变量)", "Size(连续变量)"),

4lower = list(

5continuous = "cor",

6mapping = aes(color = sex)

7),

8upper = list(

9continuous = "cor",

10mapping = aes(color = smoker)

11),

12diag = list(

13continuous = "barDiag",

14mapping = aes(color = time)

15),

16title = "自定义lower,upper,diagn(下三角颜色为sex,上三角颜色为smoker,对角颜色为time)"

17)

2.3.5同时指定lower,upper,diag

2个连续变量,1个离散变量。

1ggpairs(tips, mapping = aes(color = day),

2columns = c("total_bill", "tip", "time"),

3columnLabels = c("Total_Bill(连续变量)", "Tip(连续变量)", "Time(离散变量)"),

4lower = list(

5continuous = "cor",

6combo = "dot_no_facet"# 没有2个离散变量,不需要discrete参数

7),

8upper = list(

9continuous = "density",

10combo = "dot_no_facet"# 没有2个离散变量,不需要discrete参数

11),

12diag = list(

13continuous = "barDiag",

14discrete = "blankDiag"#

15),

16title = "自定义lower,upper,diag(两个连续变量,一个离散变量)"

17)

1个连续变量,2个离散变量。

1ggpairs(tips, mapping = aes(color = day),

2columns = c("total_bill", "time", "sex"),

3columnLabels = c("Total_Bill(连续变量)", "Time(离散变量)", "Sex(离散变量)"),

4lower = list(

5combo = "dot_no_facet", #

6discrete = "blank"

7),

8upper = list(

9combo = "dot_no_facet", #

10discrete = "ratio"

11),

12diag = list(

13continuous = "barDiag", #

14discrete = "barDiag"

15),

16title = "自定义lower,upper,diag(一个连续变量,两个离散变量)"

17)

责任编辑:

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值