箱线图汇总ggplot2(自备)

目录

①简单箱线图

②配对箱线图

③多组箱线图

数据格式查看

进行数据重塑

进行P值计算

进行绘图

④批量多组间箱线图

⑤多组抖动散点图


Immunogenomic Landscape of Hematological Malignancies - PubMed (nih.gov)有多个图的数据及可视化代码:focuslyj/ImmunogenomicLandscape-BloodCancers (gitee.com)

①简单箱线图

数据使用鸢尾花数据模拟

多种技巧可查看:ggplot2 Based Publication Ready Plots • ggpubr (datanovia.com)

箱线图合并散点图拼图_箱型图加散点-CSDN博客

小提琴图差异分析_差异分析小提琴图-CSDN博客

#普通箱线图##
rm(list = ls()) 
library(ggplot2)
library(ggpubr)
library(cowplot)
data <- iris##鸢尾花数据集

data1 <- data[,c(1,5)]##取两个种类的 萼片长度数据
data2 <- data1[data1$Species %in% c('setosa','versicolor'),]
range(data2$Sepal.Length)#[1] 4.3 7.0 范围设置
#差异检验
wilcox.test(Sepal.Length~Species,data =data2)#W = 168.5, p-value = 8.346e-14

p<- ggplot(data=data2)+ 
  geom_boxplot(mapping=aes(x=Species,        #X轴设置
                           y=Sepal.Length,   #Y轴设置
                           colour = Species ), #箱线图
               alpha = 0.5,
               size=1.5,
               width = 0.6)+ 
  #geom_jitter(mapping=aes(x=Species,y=Sepal.Length,colour = Species), #散点
  #            alpha = 0.3,size=3)+
  scale_color_manual(limits=c("setosa","versicolor"), 
                     values=c("#5F80B4","#922927"))+ 
  #"#85B22E","#5F80B4","#E29827","#922927" 颜色配置
  geom_signif(mapping=aes(x=Species,y=Sepal.Length), # 不同组别的显著性
              comparisons = list(c("setosa", "versicolor")),
              #map_signif_level=T, # T显示显著性,F显示p value
              map_signif_level=F, # T显示显著性,F显示p value
              tip_length=c(0,0), # 修改显著性线两端的长短
              y_position = c(7.5), # 设置显著性线的位置高度(Y 轴的高度)
              size=1, # 修改线的粗细
              textsize = 4, # 修改显著性标记的大小
              test = "wilcox.test")+ # 检验选择 "t.test"
  theme_classic(  # 主题设置,这个是无线条主题
    base_line_size = 1 # 坐标轴的粗细
  )+
  labs(title="鸢尾花",##标题
       x="",y="Sepal.Length")+ # 添加标题,x轴,y轴内容
  theme(plot.title = element_text(size = 15,
                                  colour = "black",
                                  hjust = 0.5),
        axis.title.y = element_text(size = 15, 
                                    # family = "myFont", 
                                    color = "black",
                                    face = "bold", 
                                    vjust = 1.9, 
                                    hjust = 0.5, 
                                    angle = 90),
        legend.title = element_text(color="black", # 修改图例的标题
                                    size=15, 
                                    face="bold"),
        legend.text = element_text(color="black", # 设置图例标签文字
                                   size = 10, 
                                   face = "bold"),
        axis.text.x = element_text(size = 13,  # 修改X轴上字体大小,
                                   color = "black", # 颜色
                                   face = "bold", #  face取值:plain普通,bold加粗,italic斜体,bold.italic斜体加粗
                                   vjust = 0.5, # 位置
                                   hjust = 0.5, 
                                   angle = 0), #角度
        axis.text.y = element_text(size = 13,  
                                   color = "black",
                                   face = "bold", 
                                   vjust = 0.5, 
                                   hjust = 0.5, 
                                   angle = 0) 
  )
p
dev.off()

#修改:增加散点
geom_jitter(mapping=aes(x=Species,y=Sepal.Length,colour = Species), #散点
              alpha = 0.3,size=3)+
#显示显著性标志
map_signif_level=T, # T显示显著性,F显示p value

②配对箱线图

需要rep函数对数据进行配对编号

#rep replicates the values in x. It is a generic function, and the (internal) default #method is described here.
rep(1:4, 2)
rep(1:4, each = 2)       # not the same.
rep(1:4, c(2,2,2,2))     # same as second.
##配对箱线图需要设置配对编号##
?rep
data2$id <- rep(1:50, 2)#使用rep 函数进行编号配对

 head(data2)
  Sepal.Length Species id
1          5.1  setosa  1
2          4.9  setosa  2
3          4.7  setosa  3
4          4.6  setosa  4
5          5.0  setosa  5
6          5.4  setosa  6

主要是添加geom_line参数R语言绘图基础篇-线图(geom_line) - 知乎 (zhihu.com)

复现SCI文章:配对连线、散点箱线图 - 知乎 (zhihu.com)

注意细节抖动点与连线的位置参数需要一致才能一一对应,这里geom_jitter为colour = Species,而 geom_line的设置为group = id,所以将其都设置为中线对应。

geom_jitter(aes(x=Species,y=Sepal.Length,colour = Species), #散点
              alpha = 0.3,size=3,
              position = position_dodge(0))+
  geom_line(aes(x=Species,y=Sepal.Length,group = id),##添加连线
            color = 'grey40', 
            lwd = 0.5,
            position = position_dodge(0))+ #添加连线#"identity" 表示在中线的连线
p<- ggplot(data=data2)+ 
  geom_boxplot(mapping=aes(x=Species,y=Sepal.Length,colour = Species ), #箱线图
               alpha = 0.5,size=1.5,width = 0.6)+ 
  geom_jitter(aes(x=Species,y=Sepal.Length,colour = Species), #散点
              alpha = 0.3,size=3,
              position = position_dodge(0))+
  geom_line(aes(x=Species,y=Sepal.Length,group = id),##添加连线
            color = 'grey40', 
            lwd = 0.5,
            position = position_dodge(0))+ #添加连线#"identity" 表示在中线的连线
  scale_color_manual(limits=c("setosa","versicolor"), 
                     values=c("#5F80B4","#922927"))+ 
  #"#85B22E","#5F80B4","#E29827","#922927" 颜色配置
  geom_signif(mapping=aes(x=Species,y=Sepal.Length), # 不同组别的显著性
              comparisons = list(c("setosa", "versicolor")),
              map_signif_level=T, # T显示显著性,F显示p value
              #map_signif_level=F, # T显示显著性,F显示p value
              tip_length=c(0,0), # 修改显著性线两端的长短
              y_position = c(7.5), # 设置显著性线的位置高度(Y 轴的高度)
              size=1, # 修改线的粗细
              textsize = 4, # 修改显著性标记的大小
              test = "wilcox.test")+ # 检验选择 "t.test"
  theme_classic(  # 主题设置,这个是无线条主题
    base_line_size = 1 # 坐标轴的粗细
  )+
  labs(title="鸢尾花",##标题
       x="",y="Sepal.Length")+ # 添加标题,x轴,y轴内容
  theme(plot.title = element_text(size = 15, colour = "black",hjust = 0.5),
        axis.title.y = element_text(size = 15,color = "black",face = "bold", #  face取值:plain普通,bold加粗,italic斜体,bold.italic斜体加粗
                                    vjust = 1.9,hjust = 0.5,angle = 90),
        legend.title = element_text(color="black", size=15,face="bold"),
        legend.text = element_text(color="black", size = 10,face = "bold"),
        axis.text.x = element_text(size = 13, color = "black", face = "bold", 
                                   vjust = 0.5,hjust = 0.5,angle = 0), #角度
        axis.text.y = element_text(size = 13,color = "black",face = "bold",vjust = 0.5,hjust = 0.5,angle = 0) 
  )
p
dev.off()

还可以使用跟着Nat Commun学作图 | 4.配对箱线图+差异分析-CSDN博客


③多组箱线图
数据格式查看
##其实就是多组箱线图进行合并处理而已##
rm(list = ls())
library(ggpubr)
library(ggplot2)
library(RColorBrewer)
library(gridExtra)
library(dplyr)
library(cowplot)##进行拼图
library(tibble)#加载了tibble包的缘故:需要手动检测行名是否添加成功
library(reshape2)

data <- iris##鸢尾花数据集
data1 <- data[c(1:100),]#提取两组数据做多组间箱线图
data1$id <- paste0("iris",1:nrow(data1))##数据重塑需要一列行名
 head(data1)
  Sepal.Length Sepal.Width Petal.Length Petal.Width Species    id
1          5.1         3.5          1.4         0.2  setosa iris1
2          4.9         3.0          1.4         0.2  setosa iris2
3          4.7         3.2          1.3         0.2  setosa iris3
4          4.6         3.1          1.5         0.2  setosa iris4
5          5.0         3.6          1.4         0.2  setosa iris5
6          5.4         3.9          1.7         0.4  setosa iris6
进行数据重塑
##数据重塑 "Species"      "id"    
mydata1<-melt(data1,
  id.vars=c("id",#行名
            "Species"),##分类变量
  variable.name="xlabel",##其余的行名,也就是结果的X轴变量
  value.name="number"##数值
)

ylabname <- paste("number")#绘图Y轴名称
colnames(mydata1)#[1] "id"      "Species" "xlabel"  "number" 
head(mydata1)
  Species       xlabel number
1  setosa Sepal.Length    5.1
2  setosa Sepal.Length    4.9
3  setosa Sepal.Length    4.7
4  setosa Sepal.Length    4.6
5  setosa Sepal.Length    5.0
6  setosa Sepal.Length    5.4
进行P值计算
#### 计算p value####
pvalues <- sapply(mydata1$xlabel, function(x) {
  res <- wilcox.test(as.numeric(number) ~ Species, 
                     data = subset(mydata1, xlabel == x)) #两组,wilcox.test或t.test;多组,kruskal.test或aov(one-way ANOVA test)
  res$p.value
})
pv <- data.frame(xlabelP = mydata1$xlabel, pvalue = pvalues)
pv$sigcode <- cut(pv$pvalue, c(0,0.0001, 0.001, 0.01, 0.05, 1), 
                  labels=c('****','***', '**', '*', 'ns'))
pv$sigcode1 <- signif(pv$pvalue, 4)##保留有效数值
mydata1<-mydata1[,-1]##可以去除样本名称
head(pv)
       xlabelP       pvalue sigcode  sigcode1
1 Sepal.Length 8.345827e-14    **** 8.346e-14
2 Sepal.Length 8.345827e-14    **** 8.346e-14
3 Sepal.Length 8.345827e-14    **** 8.346e-14
4 Sepal.Length 8.345827e-14    **** 8.346e-14
5 Sepal.Length 8.345827e-14    **** 8.346e-14
6 Sepal.Length 8.345827e-14    **** 8.346e-14
进行绘图
# 画box plot
#pdf("多组箱线图.pdf",width = 9,height = 6)##一定添加大小
p.box <- ggplot(mydata1, 
                aes(x=xlabel, y=number, color=Species, fill=Species)) +
  geom_boxplot(alpha = .5) + #半透明
  theme_classic() + #或theme_bw()
  scale_fill_brewer(palette = "Set1") + #按类填充颜色
  scale_color_brewer(palette = "Set1") + #按类给边框着色
  theme(axis.text.x = element_text(colour="black", size = 11,
                                   angle = 30, hjust = .5, vjust = .5)) +
  geom_text(aes(x=xlabelP, y=max(mydata1$number) * 1,##可以调整Y轴坐标,用于展示P或显著性标记
                label = pv$sigcode),data=pv, inherit.aes=F) +#pv$sigcode1 展示P值数值
  labs(x=NULL,title = "iris") + ##标题
  ylab("iris")##y 轴名称
p.box

dev.off()

多图合并

grid.arrange(p1, p.box, ncol = 2)

补充文献学习及代码获取:focuslyj/ImmunogenomicLandscape-BloodCancers (gitee.com)

数据获取:https://www.synapse.org/#!Synapse:syn21991014/files/

R语言设置数值输出(保留至小数点后位数和保留有效数字)_options(digits=3)-CSDN博客


④批量多组间箱线图

代码:COVID19/Boxplot_mRNA.R at master · DongshengChen-TY/COVID19 (github.com)

数据在图2C进行整理:https://static-content.springer.com/esm/art%3A10.1038%2Fs41467-021-24482-1/MediaObjects/41467_2021_24482_MOESM13_ESM.xlsx


rm(list = ls()) 
mRNA<-read.csv("All_mRNA_FPKM.csv",header=T,row.names=1)
gc<-c("CD28","CD3D","CD8A","LCK","ZAP70",
      "GATA3","EOMES","IL23A","CXCL8","CXCR1")##选取部分

exp<-log2(mRNA+1)##进行log转换
bar_mat<-exp[,gc]
anno<- read.csv("sample_index.csv",header=T,row.names=1)
anno$type2 <- anno$Type
bar_mat<-bar_mat[rownames(anno),]
bar_mat$sam=anno$Type##给表达矩阵添加样本分组信息 
 

转换为这种数据格式即可使用代码进行运算作图

##对分组设置向量,为了保证箱线图的排序一致##
bar_mat$sam<-factor(bar_mat$sam,levels=c("Asymptomatic","Mild","Severe","Critical"))
library(RColorBrewer)
getPalette = colorRampPalette(brewer.pal(9, "Set1"))
c=getPalette(9)
library(ggpubr)
library(ggplot2)
plist2<-list()
co<-c("#5CB85C","#337AB7","#F0AD4E","#D9534F")##颜色设置
for (i in 1:length(gc)){
  bar_tmp<-bar_mat[,c(gc[i],"sam")]
  colnames(bar_tmp)<-c("Expression","sam")
  bar_tmp$color<-c(rep(x="1",times=64),rep(x="2",times=64),rep(x="3",times=34),rep(x="4",times=16))
  my_comparisons1 <- list(c("Asymptomatic", "Mild")) #共有4种样本,C43 有6种两两组合形式
  my_comparisons2 <- list(c("Asymptomatic", "Severe"))
  my_comparisons3 <- list(c("Asymptomatic", "Critical"))
  my_comparisons4 <- list(c("Mild", "Severe"))
  my_comparisons5 <- list(c("Mild", "Critical"))
  my_comparisons6 <- list(c("Severe", "Critical"))
  pb1<-ggboxplot(bar_tmp,x="sam",y="Expression",color="color",fill=NULL,add = "jitter",bxp.errorbar.width = 0.6,width = 0.4,size=0.01,font.label = list(size=30), palette = c(co[1],co[2],co[3],co[4]))+theme(panel.background =element_blank())
  pb1<-pb1+theme(axis.line=element_line(colour="black"))+theme(axis.title.x = element_blank())
  #pb1<-pb1+scale_fill_manual(name="",labels=c("Ctrl","COV"),values=c(c[2],c[1]))
  pb1<-pb1+theme(axis.title.y = element_blank())+theme(axis.text.x = element_text(size = 15,angle = 45,vjust = 1,hjust = 1))
  pb1<-pb1+theme(axis.text.y = element_text(size = 15))+ggtitle(gc[i])+theme(plot.title = element_text(hjust = 0.5,size=15,face="bold"))
  pb1<-pb1+theme(legend.position = "NA")
  pb1<-pb1+stat_compare_means(method="t.test",hide.ns = F,comparisons =c(my_comparisons1,my_comparisons2,my_comparisons3,my_comparisons4,my_comparisons5,my_comparisons6),label="p.signif")
  plist2[[i]]<-pb1
} 
pall<-plot_grid(plist2[[1]],plist2[[2]],plist2[[3]],
                plist2[[4]],plist2[[5]],plist2[[6]],
                plist2[[7]],plist2[[8]],plist2[[9]],
                plist2[[10]],ncol=4)##这里选中的基因数量,排为多少列
pall


⑤多组抖动散点图
##数据整理##
rm(list = ls()) 
library(ggplot2)
library(ggpubr)
library(cowplot)
data <- iris

#模拟数据
data1 <- data[,c(5,1)]
colnames(data1)[2] <- "values"
data2 <- data[,c(5,2)]
data2$Species <- paste0("B",data2$Species)
colnames(data2)[2] <- "values"

dat <- rbind(data1,data2)
colnames(dat)[1] <- "Group"
table(dat$Group)#setosa  versicolor   virginica     Bsetosa Bversicolor  Bvirginica 
##进行因子设定,为了X轴的方向
dat$Group <- factor(dat$Group, levels =c('setosa','versicolor','virginica',
                                           'Bsetosa','Bversicolor','Bvirginica'))
head(dat)
############# 绘图 -----------
library(ggplot2)
library(latex2exp)
ggplot(dat, aes(Group, values))+
  # 箱线图:
  geom_boxplot(outlier.shape = NA, width = 0.6)+
  # 抖动散点:
  geom_jitter(aes(color = Group), width = 0.15, size = 1)+
  # 横线:
  geom_hline(yintercept = 5, linetype = "dashed")+
  # 箭头:
  geom_segment(aes(x = 2, y = 7.5, xend = 2, yend = 7.2),
               arrow = arrow(length = unit(1, "mm"))) +
  scale_color_manual(name = "Subtype",
                     values = c("#fd6ab0", "#aa5700", "#f48326", "#ffd711",
                                "#9bd53f", "#00ae4c", "#00c1e3", "#007ddb",
                                "#8538d1", "#d01910"))+
  # 文字注释:
  annotate("text", label = TeX("$\\textit{P} = 1.6e-06$"), 
           size = 3, x = 2, y = 8)+##文字注释位置
  # 坐标轴标签:
  xlab("")+
  ylab(TeX("number"))+
  # 标题:
  ggtitle(TeX("iris"))+
  # 主题:
  theme_classic()+
  theme(plot.title = element_text(hjust = 0.5))+
  guides(color=guide_legend(override.aes = list(size=2),
                            title.theme = element_text(face = "bold")))

#ggsave("plot.pdf", height = 5, width = 7)
dev.off()

跟着高分SCI学作图 -- 箱线图+抖动散点 (qq.com)


参考文献:

1:Immunogenomic Landscape of Hematological Malignancies

2:The trans-omics landscape of COVID-19

评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值