箱线图
1槽口
1.在geom_boxplot()括号中加入notch=TRUE即可在箱子上生成槽口,即
geom_boxplot(notch=TRUE,notchwidth=0.8)#notchwidth越小则越往里凹
2.均值标记
箱线中的均值标记常以钻石来表示,所以命令为
stat_summary(fun.y=“mean”,geom=“point”,shape=23,size=3,fill=“white”)
3.加入最大最小值线
想要使ggplot2所绘制的箱线图带有最大最小值线,可用stat_boxplot命令,完整如下:
stat_boxplot(geom=“errorbar”,width=0.15,aes(color=用于分类的列)
要注意的是,因为ggplot2的规则是图层叠加,所以如果是先作箱线图,即先输入geom_boxplot(),再输入stat_boxplot(),会导致箱线图中出现十字。
所以输入命令时,必须先输入stat_boxplot(),再输入geom_boxplot()。
ggplot(changdu,aes(factor,long))+stat_boxplot(geom ="errorbar",width=0.15)+geom_boxplot(aes(fill=factor))
4离群点(异常值)
关于离群点的参数有outlier开头的多个,如:
outlier.colour:离群点的颜色参数
outlier.fill:离群点的填充色参数
outlier.shape:离群点的形状参数
outlier.size:离群点的大小参数
outlier.alpha:离群点的透明度参数
使用时放在geom_boxplot中,如:
geom_boxplot(outlier.colour="red", outlier.shape=7,outlier.size=4)
5.将箱线图转置
添加
coord_flip()
可变宽度
箱线图只能反映数据的整体态势,而不能反映数据的密度,但是如果数据密度越大,箱子的宽度也随之增大,则可在一定程度上了解数据的密度。
geom_boxplot(varwidth = TRUE)
5.箱线图中列出散点
在箱线图中列出散点可以更直观地看出数据在箱线中的分布。有四种方法。
1)geom_point()将数据的散点图重叠在箱线图之上
第一种方法是使用geom_point()将数据的散点图重叠在箱线图之上,但缺点是画出的散点只能排列在同一x坐标上,会导致重叠,观察不出数据的分布密度。
2)geom_dotplot
第二种方法是使用geom_dotplot。相比于第一种方法,geom_dotplot画出来的图较为分散美观且可以加入抖动参数。
ggplot(chang,aes(x=days,y=length,color=days))+
geom_boxplot()+
geom_dotplot(binaxis = "y",
stackdir = "center",
position="jitter",
dotsize = 0.4,)
dotplot的具体参数信息可以参考下面这个网址:
https://www.cnblogs.com/ljhdo/p/4886067.html
3)ggpurb包
第三种方法,是使用ggpurb包进行作图。
命令如下:
ggboxplot(chang,
x="days",
y="length",
color="days",
palette = "jama",
add = "jitter")
4)使用geom_jitter体现出抖动的点,例如
ggplot(chang,aes(x=days,y=length,fill=days))+
geom_boxplot()+
scale_fill_viridis(discrete=T,alpha=0.6)+
geom_jitter(color="black",size=0.89,alpha=0.9)+
theme_ipsum_rc()
将散点在箱线中体现(2)
可以使用ggExtra包中的ggMarginal()功能将箱线放在散点图的周围
p<-ggplot(mtcars,aes(x=wt,y=mpg))+geom_point()
p1<-ggMarginal(p,type = "boxplot",fill="slateblue",size=6)
该方法不仅限于散点图和箱线图,也可用与直方图、箱线图、密度图与其他图的结合。
总结:
下面是并列型箱线图包含上述内容的总命令
ggplt2(表名,aes(x=表中列,y=表中列))+
stat_boxplot(geom = "errorbar",width=0.15)+
geom_boxplot(aes(fill=表中用于分类的列),
notch=TRUE,outlier.colour="red", outlier.shape=7,outlier.size=4)+
scale_x_discrete(limits=c(“A”,“B”,“C”,“D”,“E”,“F”,“G”))+
scale_fill_brewer(palette=“Set1”)+
stat_summary(fun.y=“mean”,geom=“point”,shape=23,size=3,fill=“white”)+
theme_bw()+theme(panel.border = element_blank(),panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "black"))#使背景为空白并保留坐标轴为黑色
下面是分组型箱线图包含上述内容的总命令
ggplt2(表名,aes(x=表中列,y=表中列))
stat_boxplot(geom = "errorbar",width=0.15)+
geom_boxplot(aes(fill=表中用于分类的列),notch=TRUE,
outlier.colour="red", outlier.shape=7,outlier.size=4)+
facet_wrap(~分面所根据的变量,scales=“free”)+
scale_fill_brewer(palette=“Set1”)+
stat_summary(fun.y=“mean”,geom=“point”,shape=23,size=3,fill=“white”)+
theme_bw()+theme(panel.border = element_blank(),panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),axis.line = element_line(colour = "black"))
如果想要尝试,可以从https://pan.baidu.com/s/1PQWiOGjSzdyVMTsa1sGwWA
中下载文件。
参考简书作者:dowaves
链接:https://www.jianshu.com/p/9ae6e06d4947
小提琴图
#绘制普通的小提琴图
ggplot(data = iris, mapping = aes(x = Species, y = Sepal.Width)) + geom_violin()
#绘制叠加盒形图的小提琴图
ggplot(data = iris, mapping = aes(x = Species, y = Sepal.Width)) + geom_violin() + geom_boxplot(width = 0.3, outlier.colour = NA, fill = 'blue') + stat_summary(fun.y = 'median', geom = 'point', shape = 18, colour = 'orange')
绘制二维密度函数
install.packages("C50")
install.packages("Rtools")
library(C50)
install.packages("modeldata")
library(modeldata)
data(churn)
#绘制散点图和密度等高线
ggplot(data = churnTrain, mapping = aes(x = total_day_minutes, y = total_eve_calls)) + geom_point() + stat_density2d()
#使用..level..,将密度曲面的高度映射给等高线的颜色
ggplot(data = mlc_churn, mapping = aes(x = total_day_minutes, y = total_eve_calls)) + stat_density2d(aes(colour = ..level..)) + scale_color_gradient(low = 'lightblue', high = 'darkred')
#将密度估计映射给填充色
ggplot(data = mlc_churn, mapping = aes(x = total_day_minutes, y = total_eve_calls)) + stat_density2d(aes(fill = ..density..), geom = 'tile', contour = FALSE)
#将密度估计映射给透明度
ggplot(data = mlc_churn, mapping = aes(x = total_day_minutes, y = total_eve_calls)) + stat_density2d(aes(alpha = ..density..), geom = 'tile', contour = FALSE)
ggplot(data = iris, mapping = aes(x = Petal.Length, y = Petal.Width)) + stat_density2d(aes(alpha = ..density..), geom = 'tile', contour = FALSE, h = c(0.1,0.2))
其他
library(ggplot2)
#直方图
set.seed(1234)
x <- rnorm(1000,mean = 2, sd = 3)
ggplot(data = NULL, mapping = aes(x = x)) + geom_histogram()
#将数据切割为50组,并将直方图的填充色设置为铁蓝色,边框色设置为黑色
ggplot(data = NULL, mapping = aes(x = x)) + geom_histogram(bins = 50, fill = 'steelblue', colour = 'black')
#将直方图的组距设置为极差的二十分之一
group_diff <- diff(range(x))/20
ggplot(data = NULL, mapping = aes(x = x)) + geom_histogram(binwidth = group_diff, fill = 'steelblue', colour = 'black')
#将分组变量映射给颜色属性
set.seed(1234)
x <- c(rnorm(500,mean = 1, sd = 2), rt(500, df = 10))
y <- rep(c(0,1), times = c(500,500))
df <- data.frame(x = x ,y = y)
#将数值型分组变量进行因子化
df$y = factor(df$y)
ggplot(data = df, mapping = aes(x = x, fill = y)) + geom_histogram(position = 'identity', bins = 50, colour = 'black')
ggplot(data = df, mapping = aes(x = x, fill = y)) + geom_histogram( bins = 50, colour = 'black')
#使用分面功能
ggplot(data = df, mapping = aes(x = x)) + geom_histogram( bins = 50, fill = 'steelblue', colour = 'black') + facet_grid(. ~ y)
#使用geom_density()函数绘制核密度曲线
state <- as.data.frame(state.x77)
ggplot(data = state, mapping = aes(x = Income)) + geom_density()
#geom_line()函数绘制核密度曲线
ggplot(data = state, mapping = aes(x = Income)) + geom_line(stat = 'density')
#为了对比不同带宽,将密度图绘制在一起
ggplot(data = state, mapping = aes(x = Income)) + geom_line(stat = 'density', adjust = 0.5, colour = 'red',size = 2) + geom_line(stat = 'density', adjust = 1, colour = 'black', size = 2) + geom_line(stat = 'density', adjust = 2, colour = 'steelblue', size = 2)
ggplot(data = state, mapping = aes(x = Income)) + geom_density( adjust = 0.5, fill = 'red',alpha = .2) + geom_density(adjust = 1, fill = 'black', alpha = .5) + geom_density(adjust = 2, fill = 'steelblue', alpha = .4)
#将分组变量映射给颜色属性
set.seed(1234)
x <- c(rnorm(500), rnorm(500,2,3), rnorm(500, 0,5))
y <- rep(c('A','B','C'), each = 500)
df <- data.frame(x = x, y = y)
ggplot(data = df, mapping = aes(x = x, colour = y)) + geom_line(stat = 'density', size = 2)
#使用分面功能
ggplot(data = df, mapping = aes(x = x, colour = y)) + geom_density(size = 2) + facet_grid(. ~ y)
ggplot(data = df, mapping = aes(x = x)) + geom_histogram(bins = 50, fill = 'blue', colour = 'black') + geom_density(adjust = 0.5, colour = 'black', size = 2) + facet_grid(. ~ y)
ggplot(data = df, mapping = aes(x = x)) + geom_histogram(aes(y = ..density..), bins = 50, fill = 'blue', colour = 'black') + geom_density(adjust = 0.5, colour = 'red') + facet_grid(. ~ y)
#箱线图
ggplot(data = iris, mapping = aes(x = Species, y = Sepal.Width)) + geom_boxplot(fill = 'steelblue')
ggplot(data = iris, mapping = aes(x = Species, y = Sepal.Width)) + geom_boxplot(fill = 'steelblue', outlier.colour = 'red', outlier.shape = 15, width = 1.2)
ggplot(data = iris, mapping = aes(x = Species, y = Sepal.Width)) + geom_boxplot(notch = TRUE, fill = 'steelblue', outlier.colour = 'red', outlier.shape = 15, width = 1.2)
ggplot(data = iris, mapping = aes(x = 'Test', y = Sepal.Width)) + geom_boxplot(fill = 'steelblue', outlier.colour = 'red', outlier.shape = 15, width = 1.2)
#清除x轴上的刻度标记和标签
ggplot(data = iris, mapping = aes(x = 'Test', y = Sepal.Width)) + geom_boxplot(fill = 'steelblue', outlier.colour = 'red', outlier.shape = 15, width = 1.2) + theme(axis.title.x = element_blank()) + scale_x_discrete(breaks = NULL)
ggplot(data = iris, mapping = aes(x = 'Test', y = Sepal.Width)) + geom_boxplot(fill = 'steelblue', outlier.colour = 'red', outlier.shape = 15, width = 1.2) + theme(axis.title.x = element_blank()) + scale_x_discrete(breaks = NULL) + stat_summary(fun.y = 'mean', geom = 'point', shape = 18, colour = 'orange', size = 5)