本文在 Scatterplots (ggplot2) 的基础上加入了自己的理解
数据
set.seed(955) # 设置随机种子, 设置种子之后可以很容易的得到相同的随机数据
# Make some noisily increasing data
dat <- data.frame(cond = rep(c("A", "B"), each=10),
xvar = 1:20 + rnorm(20,sd=3),
yvar = 1:20 + rnorm(20,sd=3))
head(dat)
## cond xvar yvar
## 1 A -4.252354 3.473157275
## 2 A 1.702318 0.005939612
## 3 A 4.323054 -0.094252427
## 4 A 1.780628 2.072808278
## 5 A 11.537348 1.215440358
## 6 A 6.672130 3.608111411
library(ggplot2)
带有回归线的散点图
# xvar为横坐标, yvar为纵坐标
ggplot(dat, aes(x=xvar, y=yvar)) +
geom_point(shape=1) # 空心圆
ggplot(dat, aes(x=xvar, y=yvar)) +
geom_point(shape=1) + # Use hollow circles
geom_smooth(method=lm) # 添加线性回归线
# 灰色阴影部分为95%置信区间
ggplot(dat, aes(x=xvar, y=yvar)) +
geom_point(shape=1) + # Use hollow circles
geom_smooth(method=lm,
se=FALSE) # 不添加置信区间
ggplot(dat, aes(x=xvar, y=yvar)) +
geom_point(shape=1) + # Use hollow circles
geom_smooth()
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.
设置颜色
# 根据 cond 设置颜色
ggplot(dat, aes(x=xvar, y=yvar, color=cond)) + geom_point(shape=1)
# Same, but with different colors and add regression lines
ggplot(dat, aes(x=xvar, y=yvar, color=cond)) +
geom_point(shape=1) +
scale_colour_hue(l=50) + # 设置颜色, l表示 亮度
geom_smooth(method=lm, #回归线
se=FALSE)
ggplot(dat, aes(x=xvar, y=yvar, color=cond)) + geom_point(shape=1) +
scale_colour_hue(l=50) + # 设置亮度
geom_smooth(method=lm, #回归线
se=FALSE, # Don't add shaded confidence region
fullrange=TRUE) # 延伸回归线
# 根据 cond 设置形状
ggplot(dat, aes(x=xvar, y=yvar, shape=cond)) + geom_point()
ggplot(dat, aes(x=xvar, y=yvar, shape=cond)) + geom_point() +
scale_shape_manual(values=c(1,2)) # 手动设置形状(自定义)
重叠数据的处理
# Round xvar and yvar to the nearest 5
dat$xrnd <- round(dat$xvar/5)*5
dat$yrnd <- round(dat$yvar/5)*5
# 设置透明, 重叠的地方越多,颜色越重
# Make each dot partially transparent, with 1/4 opacity
# For heavy overplotting, try using smaller values
ggplot(dat, aes(x=xrnd, y=yrnd)) +
geom_point(shape=19, # Use solid circles
alpha=1/4) # 1/4 opacity
# 重叠的地方设置扰动(在重叠的位置周围扰动)
# Jitter the points
# Jitter range is 1 on the x-axis, .5 on the y-axis
ggplot(dat, aes(x=xrnd, y=yrnd)) +
geom_point(shape=1, # Use hollow circles
position=position_jitter(width=1,height=.5))