【简说基因】蜂群图事实上也是一种散点图,不过比传统散点图和抖动散点图更加优雅,也比箱线图和小提琴图能够展示更多细节。
蜂群图(也称为柱形散点图或小提琴散点图)是一种绘制数据点的方式,通常情况下这些点会重叠在一起,蜂群图则将它们相邻排列。除了减少重叠,它还有助于可视化每个数据点的数据密度(类似于小提琴图),同时仍然显示每个数据点的具体数值。
画蜂群图的 R 包主要有:beeswarm 和 ggbeeswarm,本文介绍后者,它为画更好的散点图提供两个几何对象:
geom_quasirandom:准随机散点图几何对象。
geom_beeswarm:蜂群图几何对象。
安装
install.packages('ggbeeswarm')
示例
使用 iris 数据集,先比较一下抖动散点图和准随机散点图:
set.seed(12345)
library(ggplot2)
library(ggbeeswarm)
library(patchwork)
#compare to jitter
p1 = ggplot(iris,aes(Species, Sepal.Length)) + geom_jitter() + ggtitle("jitter")
p2 = ggplot(iris,aes(Species, Sepal.Length)) + geom_quasirandom() + ggtitle("quasirandom")
p1 / p2
![3077cdc48258c038acedcebf2e640c2e.png](https://i-blog.csdnimg.cn/blog_migrate/854d2408dbd1d2b05d5723a12613deaf.png)
geom_quasirandom()
#default geom_quasirandom
ggplot(mpg,aes(class, hwy)) + geom_quasirandom()
![b3f43ef35818169c256da2dcc2d62e1a.png](https://i-blog.csdnimg.cn/blog_migrate/ea85fca9c36698635046b610e5703a59.png)
# With categorical y-axis
ggplot(mpg,aes(hwy, class)) + geom_quasirandom(groupOnX=FALSE)
![eda5212aa3c3b0392ced88a6ced850c3.png](https://i-blog.csdnimg.cn/blog_migrate/d81a0c84b54c1c54b24d0a51f4b06973.png)
# Some groups may have only a few points. Use `varwidth=TRUE` to adjust width dynamically.
ggplot(mpg,aes(class, hwy)) + geom_quasirandom(varwidth = TRUE)
![8e787c1af077b6cacfc370e58c523255.png](https://i-blog.csdnimg.cn/blog_migrate/abd14c7d5bff6a0cc337fcbae812522f.png)
# Automatic dodging
sub_mpg <- mpg[mpg$class %in% c("midsize", "pickup", "suv"),]
ggplot(sub_mpg, aes(class, displ, color=factor(cyl))) + geom_quasirandom(dodge.width=1)
![b6dc580b58c35d6035e8e6782f219a30.png](https://i-blog.csdnimg.cn/blog_migrate/b0b9cc268479fb5374ecbcac27a17a11.png)
改变方法
geom_quasirandom 还有许多其他方法用于分布点,例如:
ggplot(iris, aes(Species, Sepal.Length)) + geom_quasirandom(method = "tukey") + ggtitle("Tukey texture")
![096b590f4446131b6e7c68f4fced2837.png](https://i-blog.csdnimg.cn/blog_migrate/3f27ece9bc2d25cb6f9bfabb84e6682c.png)
ggplot(iris, aes(Species, Sepal.Length)) + geom_quasirandom(method = "tukeyDense") +
ggtitle("Tukey + density")
![2376652ad18bdd818d3a7a258de89334.png](https://i-blog.csdnimg.cn/blog_migrate/3d46e969723cb6fd92158dffcd9ec5b6.png)
ggplot(iris, aes(Species, Sepal.Length)) + geom_quasirandom(method = "frowney") +
ggtitle("Banded frowns")
![53e9482b75a6d68af622d491bb01be84.png](https://i-blog.csdnimg.cn/blog_migrate/1a27884ca4dadc72646905690803d48b.png)
ggplot(iris, aes(Species, Sepal.Length)) + geom_quasirandom(method = "smiley") +
ggtitle("Banded smiles")
![92fac5f8155ba2c789df9a8d68859172.png](https://i-blog.csdnimg.cn/blog_migrate/4e94e92213245e99f99c4ebb1b409961.png)
ggplot(iris, aes(Species, Sepal.Length)) + geom_quasirandom(method = "pseudorandom") +
ggtitle("Jittered density")
![fb93b132d3d0a960fa58b51f3fc83588.png](https://i-blog.csdnimg.cn/blog_migrate/696944b0f0058e57d109dfab775ac580.png)
geom_beeswarm()
ggplot(iris, aes(Species, Sepal.Length)) + geom_beeswarm() + ggtitle("Beeswarm")
![3616f6f27bfb0b42a302a7b7610a3b81.png](https://i-blog.csdnimg.cn/blog_migrate/7cd14c96aafc2853cda3a0ebdd5df9b0.png)
ggplot(iris,aes(Species, Sepal.Length)) + geom_beeswarm(side = 1L)
![bb05c1822f4f7dae9de4fb0594366ad5.png](https://i-blog.csdnimg.cn/blog_migrate/fd4a14d695a7215f20f9ddccdbbbbd1d.png)
ggplot(mpg,aes(class, hwy)) + geom_beeswarm(size=.5)
![e2dc47bef28f55a2234b15381a2f5711.png](https://i-blog.csdnimg.cn/blog_migrate/eb46a0dbfc7c8931a94a04a4ad20940f.png)
# With categorical y-axis
ggplot(mpg,aes(hwy, class)) + geom_beeswarm(size=.5)
![f3a832fbbec14ae07d36688bd9fd9af0.png](https://i-blog.csdnimg.cn/blog_migrate/4b8fe612bb0dc9ed3203129d12164baf.png)
# Also watch out for points escaping from the plot with geom_beeswarm
ggplot(mpg,aes(hwy, class)) + geom_beeswarm(size=.5) + scale_y_discrete(expand=expansion(add=c(0.5,1)))
![a0bf498d885ec48643941326af37c106.png](https://i-blog.csdnimg.cn/blog_migrate/15e47b7801b22c77eacd225c0d3d0543.png)
ggplot(mpg,aes(class, hwy)) + geom_beeswarm(size=1.1)
![0e506182bebcdc7e1088a1caeab326bf.png](https://i-blog.csdnimg.cn/blog_migrate/9b1b97896ac4412389dd99c69d436848.png)
# With automatic dodging
ggplot(sub_mpg, aes(class, displ, color=factor(cyl))) + geom_beeswarm(dodge.width=0.5)
![804024ba275366f34ac01d24b9484924.png](https://i-blog.csdnimg.cn/blog_migrate/85a7f6ef2ae27d11ce177bd46641ba13.png)
改变方法
df <- data.frame(
x = "A",
y = sample(1:100, 200, replace = TRUE)
)
ggplot(df, aes(x = x, y = y)) + geom_beeswarm(cex = 2.5, method = "swarm") + ggtitle('method = "swarm" (default)')
![777e151b813697ccf3521742c7c37dfd.png](https://i-blog.csdnimg.cn/blog_migrate/c38bfcd70452ce276a3c87744d35d5b5.png)
ggplot(df, aes(x = x, y = y)) + geom_beeswarm(cex = 2.5, method = "compactswarm") + ggtitle('method = "compactswarm"')
![ba392e1dda799e4834c04d8f92bbb073.png](https://i-blog.csdnimg.cn/blog_migrate/7eac8d38ab0f63a546c8c95e8b1182fc.png)
ggplot(df, aes(x = x, y = y)) + geom_beeswarm(cex = 2.5, method = "compactswarm") + ggtitle('method = "compactswarm"')
![c13185651e539a3fa794a96e0ac49321.png](https://i-blog.csdnimg.cn/blog_migrate/6fc2fa02e0de7ed627d8af84cf18b3f9.png)
ggplot(df, aes(x = x, y = y)) + geom_beeswarm(cex = 2.5, method = "hex") + ggtitle('method = "hex"')
![e3522eeb46256cde164c6487bc42924d.png](https://i-blog.csdnimg.cn/blog_migrate/246e75dbf15e3b93d48fe2f69401dd67.png)
ggplot(df, aes(x = x, y = y)) + geom_beeswarm(cex = 2.5, method = "square") + ggtitle('method = "square"')
![791e87062019d18e65d5a9de5d8457b0.png](https://i-blog.csdnimg.cn/blog_migrate/4364a0060dac02c84fb4948c886c2831.png)
ggplot(df, aes(x = x, y = y)) + geom_beeswarm(cex = 2.5, method = "center") + ggtitle('method = "center"')
![44b782a9bfbfd5d1e36e0afd3a883772.png](https://i-blog.csdnimg.cn/blog_migrate/2bf9c801306bfbb4aedebfd674129c7a.png)
点分布的优先级
#With different beeswarm point distribution priority
dat<-data.frame(x=rep(1:3,c(20,40,80)))
dat$y<-rnorm(nrow(dat),dat$x)
ggplot(dat,aes(x,y)) + geom_beeswarm(cex=2) + ggtitle('Default (ascending)') + scale_x_continuous(expand=expansion(add=c(0.5,.5)))
![921725439860bf0a1c0a4b3d07da4eb3.png](https://i-blog.csdnimg.cn/blog_migrate/a92e5144a72b12f40e96e3250a7394a6.png)
ggplot(dat,aes(x,y)) + geom_beeswarm(cex=2,priority='descending') + ggtitle('Descending') + scale_x_continuous(expand=expansion(add=c(0.5,.5)))
![ebf0cce7407ebbda4a28d9122fd3c21e.png](https://i-blog.csdnimg.cn/blog_migrate/335c75ba448d91a8b9c464cffc0379d3.png)
ggplot(dat,aes(x,y)) + geom_beeswarm(cex=2,priority='density') + ggtitle('Density') + scale_x_continuous(expand=expansion(add=c(0.5,.5)))
![d2d5b04bca87bad7fc9bdafc50cc43c9.png](https://i-blog.csdnimg.cn/blog_migrate/3e62e96e38b3de1bd876f6a635de3d45.png)
ggplot(dat,aes(x,y)) + geom_beeswarm(cex=2,priority='random') + ggtitle('Random') + scale_x_continuous(expand=expansion(add=c(0.5,.5)))
![66181390b7c0be166568f68158b15a01.png](https://i-blog.csdnimg.cn/blog_migrate/97e3dd53e76fd8cf9f5845bd5fdd1f10.png)
围捕逃逸点
set.seed(1995)
df2 <- data.frame(
y = rnorm(1000),
id = sample(c("G1", "G2", "G3"), size = 1000, replace = TRUE)
)
p <- ggplot(df2, aes(x = id, y = y, colour = id))
# use corral.width to control corral width
p + geom_beeswarm(cex = 2.5, corral = "none", corral.width = 0.9) + ggtitle('corral = "none" (default)')
![9fed7f9cd5cac998773b674d68a787cf.png](https://i-blog.csdnimg.cn/blog_migrate/b229b041641e10fe13ac4101a7a399cd.png)
p + geom_beeswarm(cex = 2.5, corral = "gutter", corral.width = 0.9) + ggtitle('corral = "gutter"')
![089828924bd2bbaba16111877448d54e.png](https://i-blog.csdnimg.cn/blog_migrate/9f26bce55376f7f7f2ec482e942cd678.png)
p + geom_beeswarm(cex = 2.5, corral = "wrap", corral.width = 0.9) + ggtitle('corral = "wrap"')
![7d79c416d6f12c33f514b8baf4dc4112.png](https://i-blog.csdnimg.cn/blog_migrate/a43edbeed410dece85ad50ba0cb1d24e.png)
p + geom_beeswarm(cex = 2.5, corral = "random", corral.width = 0.9) + ggtitle('corral = "random"')
![5b5d6d7c36c66abca4ef8af153786836.png](https://i-blog.csdnimg.cn/blog_migrate/a6e9bbc3c4908d6824f59730cb72e205.png)
p + geom_beeswarm(cex = 2.5, corral = "omit", corral.width = 0.9) + ggtitle('corral = "omit"')
![088f593ecfbbf33b130d533a88d88102.png](https://i-blog.csdnimg.cn/blog_migrate/3155018ee6836c0a979c676e19339182.png)
总结
蜂群图可以更好地展示数据点之间的关系,避免数据点的重叠,同时也可以进行分组和着色,方便进行数据的比较和分析。但是需要注意的是,由于数据点会在 x 轴上分散,因此其位置并不准确,需要根据具体情况进行分析和解释。
参考文献:
https://github.com/eclarke/ggbeeswarm