R语言基础图形绘制--条形图
简介
- 条形图也许是最常用的数据可视化方法,通常用来展示不同的分类下(在 x 轴上)某个数值型变量的取值(在 y 轴上)。
1. 基础函数barplot()绘制条形图
(1) R包安装
# 首先绘制类风湿性关节炎新疗法研究的结果,该数据包含于vcd包中。
if(!require("vcd")) install.packages("vcd")
# 因为我已经安装过vcd包,所以上面的命令会直接加载vcd包,否则,需要载入vcd包
# suppressPackageStartupMessages(library(vcd))
(2) Barplot
Barplot 1
improved <- table(Arthritis$Improved)
improved
# None Some Marked
# 42 14 28
barplot(improved,main = "simple barplot",xlab = "improvement",ylab = "frequency")
Barplot 2
barplot(improved,main = "simple barplot",
xlab = "improvement",ylab = "frequency",
horiz = T)
Barplot 3
# 对柱子进行排序
data <- as.data.frame(improved) # 将table转换成data frame
data <- data[order(data$Freq),]
data$Var1 <- factor(data$Var1,levels = data$Var1)
barplot(data$Freq ~ data$Var1, main = "sorted barplot",
xlab = "improvement",ylab = "frequency",space = .4) # space参数控制柱子间距
Barplot 4
barplot(data$Freq ~ data$Var1, main = "sorted barplot",
xlab = "improvement",ylab = "frequency",space = .4,
col = "steelblue",border = "red")
Barplot 5(堆砌条形图)
注意:分组的柱状图不一定必须是table类型,matrix也可以,举一个简单的小例子:
a <- c(26.74,17.55,37.03,18.68)
data <- matric(a,nrow = 2)
barplot(data,beside = T,names.arg = c('group1','group2'))
# 堆砌条形图
improved <- table(Arthritis$Improved, Arthritis$Treatment)
improved
# Placebo Treated
# None 29 13
# Some 7 7
# Marked 7 21
barplot(improved,
main="Stacked Bar Plot",ylab="Frequency",
col=c("red","green","steelblue"),xlim = c(0,3))
legend("topright", rownames(improved), cex = 0.8, fill = c("red","green","steelblue"))
Barplot 6(分组条形图)
# 分组条形图
barplot(improved,
main="Grouped Bar Plot",
xlab="Treatment", ylab="Frequency",
col=c("red","green","steelblue"),beside=TRUE)
legend("top", rownames(improved), cex = 0.8, fill = c("red","green","steelblue"))
# 更多barplot()函数的参数设置,请参考barplot官方说明文档
# help(barplot)
# ?barplot
2. ggplot函数绘制条形图
(1) R包安装并加载
if(!require("gcookbook")) install.packages("gcookbook")
if(!require("ggplot2")) install.packages("ggplot2")
library(gcookbook) # 加载数据包
library(ggplot2)
(2) ggplot_bar
ggplot_bar 1
# 柱子排序参考基础函数的方法
ggplot(pg_mean, aes(x=group, y=weight)) +
geom_bar(stat="identity",width = .5,col = "red",lwd = 1,fill = "steelblue")+
theme_classic()+ #坐标轴风格修改
theme(axis.line.x= element_line(size = 1.5),
axis.line.y= element_line(size = 1.5)) # 修改主题
ggplot_bar 2
ggplot(pg_mean, aes(x=group, y=weight)) +
geom_bar(stat="identity",width = .5,col = "red",lwd = 1,fill = c("gray","green","steelblue"))+
# lwd可以改变柱子border的宽度
theme_classic()+
scale_x_discrete(expand = c(0,0))+ # x轴为非连续型变量时,起始柱子位置设置
scale_y_continuous(expand = c(0,0),limits = c(0,6))+ # 修改y轴
theme(axis.line.x= element_line(size = 1.5),
axis.line.y= element_line(size = 1.5)) # 修改主题
ggplot_bar 3(分组条形图)
# 分组柱形图
cabbage_exp # 查看数据
Cultivar Date Weight sd n se
# c39 d16 3.18 0.9566144 10 0.30250803
# c39 d20 2.80 0.2788867 10 0.08819171
# c39 d21 2.74 0.9834181 10 0.31098410
# c52 d16 2.26 0.4452215 10 0.14079141
# c52 d20 3.11 0.7908505 10 0.25008887
# c52 d21 1.47 0.2110819 10 0.06674995
ggplot(cabbage_exp, aes(x=Date, y=Weight, fill=Cultivar)) +
geom_bar(stat="identity",position=position_dodge(0.7),width = .6)+
theme_bw()
ggplot_bar 4(堆砌条形图)
# 堆砌柱形图
ggplot(cabbage_exp, aes(x=Date, y=Weight, fill=Cultivar)) +
geom_bar(stat="identity",width = .6,lwd = 1,col = "black")+
theme_bw()
补充: 对正负取值的条形图分别着色。
library(gcookbook) # 数据包
csub <- climate %>% subset(Source=="Berkeley" & Year >= 1900)
csub$pos <- csub$Anomaly10y >=0
ggplot(csub, aes(x=Year, y=Anomaly10y, fill=pos))+
geom_bar(stat="identity", position="identity",col = "black",size = 0.5)+
scale_fill_manual(values = c("blue","red"),guide = F)+
theme_void()
3. 学以致用
- 读入基因表达数据,计算标准差(SD),并对数据进行处理。
- 并且在此部分中,对柱子添加了误差线。
bar <- read.csv("bar.csv",header = T)
bar$ctl <- (bar$fC.1 + bar$fC.2) / 2
bar$exp <- (bar$fC.kd1 + bar$fC.kd2) / 2
rownames(bar) <- bar$genename
bar <- bar[,-1]
bar <- log2(bar + 1)
bar$exp <- bar$exp /bar$ctl
bar$ctl <- bar$ctl / bar$ctl
bar$fC.kd1 <- bar$fC.kd1 / bar$fC.2
bar$fC.kd2 <- bar$fC.kd2 / bar$fC.1
bar$fC.1 <- bar$fC.1 / bar$fC.1
bar$fC.2 <- bar$fC.2 / bar$fC.2
fpkm <- c(bar$ctl,bar$exp)
gene <- rep(rownames(bar),2)
a <- apply(bar[,1:2], 1, sd)
b <- apply(bar[,3:4], 1, sd)
sd <- c(a,b)
group <- c(rep('control',10),rep('experiment',10))
fig <- data.frame(fpkm,gene,sd,group)
fig
fig$gene = factor(fig$gene, levels=c('Pou5f1','Eif4g1','Tead4','Yap1',
'Yy1','Gata4','Ctnnb1','Klf2',
'Pnn','Zfp553'))
1. 绘制基因表达量FPKM的相对值
library(ggpubr)
windowsFonts(myFont=windowsFont("Arial"))
ggplot(fig, aes(x=gene, y=fpkm, fill=group)) +
geom_bar(stat="identity", position=position_dodge(0.9),
width = 0.6,
color = 'black',size = 1) +
geom_errorbar(aes(ymin=fpkm, ymax=fpkm+sd), width=.2,
size = 1,
position=position_dodge(.9))+
#stat_compare_means(aes(group = gene),label.y = 1.10) +
scale_fill_brewer(palette="Paired") +
xlab(label = "") +
ylab(label = "") +
scale_y_continuous(limits = c(0,1.15),
breaks = seq(0,1.15,0.25),
expand = c(0,0)) +
theme_classic(base_line_size = 1,base_rect_size = 1)+
theme(axis.text.x = element_text(angle = 45,size = 11,
family="myFont",face = 'italic',
hjust = 0.5,vjust = 0.5),
axis.text.y = element_text(family="myFont",
size = 11))
2. 绘制挑选基因表达量FPKM的相对值
fig2 <- fig[fig$gene == 'Yap1' | fig$gene == 'Tead4',]
fig2$gene = factor(fig2$gene,levels = c('Yap1','Tead4'))
ggplot(fig2, aes(x=gene, y=fpkm, fill=group)) +
geom_bar(stat="identity", position=position_dodge(0.9),
width = 0.6,
color = 'black',size = 1) +
geom_errorbar(aes(ymin=fpkm, ymax=fpkm+sd), width=.2,
size = 1,
position=position_dodge(.9))+
#stat_compare_means(aes(group = gene),label.y = 1.10) +
scale_fill_brewer(palette="Paired") +
xlab(label = "") +
ylab(label = "") +
scale_y_continuous(limits = c(0,1.15),
breaks = seq(0,1.15,0.25),
expand = c(0,0)) +
theme_classic(base_line_size = 1,base_rect_size = 1)+
theme(axis.text.x = element_text(angle = 45,size = 11,
family="myFont",face = 'italic',
hjust = 0.5,vjust = 0.5),
axis.text.y = element_text(family="myFont",
size = 11))
- ##侵权请联系作者删除!
参考书籍
[1] 《R数据可视化手册》
[2] 《R语言实战》(第二版)