目录
Immunogenomic Landscape of Hematological Malignancies - PubMed (nih.gov)有多个图的数据及可视化代码:focuslyj/ImmunogenomicLandscape-BloodCancers (gitee.com)
①简单箱线图
数据使用鸢尾花数据模拟
多种技巧可查看:ggplot2 Based Publication Ready Plots • ggpubr (datanovia.com)
#普通箱线图##
rm(list = ls())
library(ggplot2)
library(ggpubr)
library(cowplot)
data <- iris##鸢尾花数据集
data1 <- data[,c(1,5)]##取两个种类的 萼片长度数据
data2 <- data1[data1$Species %in% c('setosa','versicolor'),]
range(data2$Sepal.Length)#[1] 4.3 7.0 范围设置
#差异检验
wilcox.test(Sepal.Length~Species,data =data2)#W = 168.5, p-value = 8.346e-14
p<- ggplot(data=data2)+
geom_boxplot(mapping=aes(x=Species, #X轴设置
y=Sepal.Length, #Y轴设置
colour = Species ), #箱线图
alpha = 0.5,
size=1.5,
width = 0.6)+
#geom_jitter(mapping=aes(x=Species,y=Sepal.Length,colour = Species), #散点
# alpha = 0.3,size=3)+
scale_color_manual(limits=c("setosa","versicolor"),
values=c("#5F80B4","#922927"))+
#"#85B22E","#5F80B4","#E29827","#922927" 颜色配置
geom_signif(mapping=aes(x=Species,y=Sepal.Length), # 不同组别的显著性
comparisons = list(c("setosa", "versicolor")),
#map_signif_level=T, # T显示显著性,F显示p value
map_signif_level=F, # T显示显著性,F显示p value
tip_length=c(0,0), # 修改显著性线两端的长短
y_position = c(7.5), # 设置显著性线的位置高度(Y 轴的高度)
size=1, # 修改线的粗细
textsize = 4, # 修改显著性标记的大小
test = "wilcox.test")+ # 检验选择 "t.test"
theme_classic( # 主题设置,这个是无线条主题
base_line_size = 1 # 坐标轴的粗细
)+
labs(title="鸢尾花",##标题
x="",y="Sepal.Length")+ # 添加标题,x轴,y轴内容
theme(plot.title = element_text(size = 15,
colour = "black",
hjust = 0.5),
axis.title.y = element_text(size = 15,
# family = "myFont",
color = "black",
face = "bold",
vjust = 1.9,
hjust = 0.5,
angle = 90),
legend.title = element_text(color="black", # 修改图例的标题
size=15,
face="bold"),
legend.text = element_text(color="black", # 设置图例标签文字
size = 10,
face = "bold"),
axis.text.x = element_text(size = 13, # 修改X轴上字体大小,
color = "black", # 颜色
face = "bold", # face取值:plain普通,bold加粗,italic斜体,bold.italic斜体加粗
vjust = 0.5, # 位置
hjust = 0.5,
angle = 0), #角度
axis.text.y = element_text(size = 13,
color = "black",
face = "bold",
vjust = 0.5,
hjust = 0.5,
angle = 0)
)
p
dev.off()
#修改:增加散点
geom_jitter(mapping=aes(x=Species,y=Sepal.Length,colour = Species), #散点
alpha = 0.3,size=3)+
#显示显著性标志
map_signif_level=T, # T显示显著性,F显示p value
②配对箱线图
需要rep函数对数据进行配对编号
#rep replicates the values in x. It is a generic function, and the (internal) default #method is described here.
rep(1:4, 2)
rep(1:4, each = 2) # not the same.
rep(1:4, c(2,2,2,2)) # same as second.
##配对箱线图需要设置配对编号##
?rep
data2$id <- rep(1:50, 2)#使用rep 函数进行编号配对
head(data2)
Sepal.Length Species id
1 5.1 setosa 1
2 4.9 setosa 2
3 4.7 setosa 3
4 4.6 setosa 4
5 5.0 setosa 5
6 5.4 setosa 6
主要是添加geom_line参数R语言绘图基础篇-线图(geom_line) - 知乎 (zhihu.com)
复现SCI文章:配对连线、散点箱线图 - 知乎 (zhihu.com)
注意细节抖动点与连线的位置参数需要一致才能一一对应,这里geom_jitter为colour = Species,而 geom_line的设置为group = id,所以将其都设置为中线对应。
geom_jitter(aes(x=Species,y=Sepal.Length,colour = Species), #散点
alpha = 0.3,size=3,
position = position_dodge(0))+
geom_line(aes(x=Species,y=Sepal.Length,group = id),##添加连线
color = 'grey40',
lwd = 0.5,
position = position_dodge(0))+ #添加连线#"identity" 表示在中线的连线
p<- ggplot(data=data2)+
geom_boxplot(mapping=aes(x=Species,y=Sepal.Length,colour = Species ), #箱线图
alpha = 0.5,size=1.5,width = 0.6)+
geom_jitter(aes(x=Species,y=Sepal.Length,colour = Species), #散点
alpha = 0.3,size=3,
position = position_dodge(0))+
geom_line(aes(x=Species,y=Sepal.Length,group = id),##添加连线
color = 'grey40',
lwd = 0.5,
position = position_dodge(0))+ #添加连线#"identity" 表示在中线的连线
scale_color_manual(limits=c("setosa","versicolor"),
values=c("#5F80B4","#922927"))+
#"#85B22E","#5F80B4","#E29827","#922927" 颜色配置
geom_signif(mapping=aes(x=Species,y=Sepal.Length), # 不同组别的显著性
comparisons = list(c("setosa", "versicolor")),
map_signif_level=T, # T显示显著性,F显示p value
#map_signif_level=F, # T显示显著性,F显示p value
tip_length=c(0,0), # 修改显著性线两端的长短
y_position = c(7.5), # 设置显著性线的位置高度(Y 轴的高度)
size=1, # 修改线的粗细
textsize = 4, # 修改显著性标记的大小
test = "wilcox.test")+ # 检验选择 "t.test"
theme_classic( # 主题设置,这个是无线条主题
base_line_size = 1 # 坐标轴的粗细
)+
labs(title="鸢尾花",##标题
x="",y="Sepal.Length")+ # 添加标题,x轴,y轴内容
theme(plot.title = element_text(size = 15, colour = "black",hjust = 0.5),
axis.title.y = element_text(size = 15,color = "black",face = "bold", # face取值:plain普通,bold加粗,italic斜体,bold.italic斜体加粗
vjust = 1.9,hjust = 0.5,angle = 90),
legend.title = element_text(color="black", size=15,face="bold"),
legend.text = element_text(color="black", size = 10,face = "bold"),
axis.text.x = element_text(size = 13, color = "black", face = "bold",
vjust = 0.5,hjust = 0.5,angle = 0), #角度
axis.text.y = element_text(size = 13,color = "black",face = "bold",vjust = 0.5,hjust = 0.5,angle = 0)
)
p
dev.off()
还可以使用跟着Nat Commun学作图 | 4.配对箱线图+差异分析-CSDN博客
③多组箱线图
数据格式查看
##其实就是多组箱线图进行合并处理而已##
rm(list = ls())
library(ggpubr)
library(ggplot2)
library(RColorBrewer)
library(gridExtra)
library(dplyr)
library(cowplot)##进行拼图
library(tibble)#加载了tibble包的缘故:需要手动检测行名是否添加成功
library(reshape2)
data <- iris##鸢尾花数据集
data1 <- data[c(1:100),]#提取两组数据做多组间箱线图
data1$id <- paste0("iris",1:nrow(data1))##数据重塑需要一列行名
head(data1) Sepal.Length Sepal.Width Petal.Length Petal.Width Species id 1 5.1 3.5 1.4 0.2 setosa iris1 2 4.9 3.0 1.4 0.2 setosa iris2 3 4.7 3.2 1.3 0.2 setosa iris3 4 4.6 3.1 1.5 0.2 setosa iris4 5 5.0 3.6 1.4 0.2 setosa iris5 6 5.4 3.9 1.7 0.4 setosa iris6
进行数据重塑
##数据重塑 "Species" "id"
mydata1<-melt(data1,
id.vars=c("id",#行名
"Species"),##分类变量
variable.name="xlabel",##其余的行名,也就是结果的X轴变量
value.name="number"##数值
)
ylabname <- paste("number")#绘图Y轴名称
colnames(mydata1)#[1] "id" "Species" "xlabel" "number"
head(mydata1) Species xlabel number 1 setosa Sepal.Length 5.1 2 setosa Sepal.Length 4.9 3 setosa Sepal.Length 4.7 4 setosa Sepal.Length 4.6 5 setosa Sepal.Length 5.0 6 setosa Sepal.Length 5.4
进行P值计算
#### 计算p value####
pvalues <- sapply(mydata1$xlabel, function(x) {
res <- wilcox.test(as.numeric(number) ~ Species,
data = subset(mydata1, xlabel == x)) #两组,wilcox.test或t.test;多组,kruskal.test或aov(one-way ANOVA test)
res$p.value
})
pv <- data.frame(xlabelP = mydata1$xlabel, pvalue = pvalues)
pv$sigcode <- cut(pv$pvalue, c(0,0.0001, 0.001, 0.01, 0.05, 1),
labels=c('****','***', '**', '*', 'ns'))
pv$sigcode1 <- signif(pv$pvalue, 4)##保留有效数值
mydata1<-mydata1[,-1]##可以去除样本名称
head(pv) xlabelP pvalue sigcode sigcode1 1 Sepal.Length 8.345827e-14 **** 8.346e-14 2 Sepal.Length 8.345827e-14 **** 8.346e-14 3 Sepal.Length 8.345827e-14 **** 8.346e-14 4 Sepal.Length 8.345827e-14 **** 8.346e-14 5 Sepal.Length 8.345827e-14 **** 8.346e-14 6 Sepal.Length 8.345827e-14 **** 8.346e-14
进行绘图
# 画box plot
#pdf("多组箱线图.pdf",width = 9,height = 6)##一定添加大小
p.box <- ggplot(mydata1,
aes(x=xlabel, y=number, color=Species, fill=Species)) +
geom_boxplot(alpha = .5) + #半透明
theme_classic() + #或theme_bw()
scale_fill_brewer(palette = "Set1") + #按类填充颜色
scale_color_brewer(palette = "Set1") + #按类给边框着色
theme(axis.text.x = element_text(colour="black", size = 11,
angle = 30, hjust = .5, vjust = .5)) +
geom_text(aes(x=xlabelP, y=max(mydata1$number) * 1,##可以调整Y轴坐标,用于展示P或显著性标记
label = pv$sigcode),data=pv, inherit.aes=F) +#pv$sigcode1 展示P值数值
labs(x=NULL,title = "iris") + ##标题
ylab("iris")##y 轴名称
p.box
dev.off()
多图合并
grid.arrange(p1, p.box, ncol = 2)
补充文献学习及代码获取:focuslyj/ImmunogenomicLandscape-BloodCancers (gitee.com)
数据获取:https://www.synapse.org/#!Synapse:syn21991014/files/
R语言设置数值输出(保留至小数点后位数和保留有效数字)_options(digits=3)-CSDN博客
④批量多组间箱线图
代码:COVID19/Boxplot_mRNA.R at master · DongshengChen-TY/COVID19 (github.com)
rm(list = ls())
mRNA<-read.csv("All_mRNA_FPKM.csv",header=T,row.names=1)
gc<-c("CD28","CD3D","CD8A","LCK","ZAP70",
"GATA3","EOMES","IL23A","CXCL8","CXCR1")##选取部分
exp<-log2(mRNA+1)##进行log转换
bar_mat<-exp[,gc]
anno<- read.csv("sample_index.csv",header=T,row.names=1)
anno$type2 <- anno$Type
bar_mat<-bar_mat[rownames(anno),]
bar_mat$sam=anno$Type##给表达矩阵添加样本分组信息
转换为这种数据格式即可使用代码进行运算作图
##对分组设置向量,为了保证箱线图的排序一致##
bar_mat$sam<-factor(bar_mat$sam,levels=c("Asymptomatic","Mild","Severe","Critical"))
library(RColorBrewer)
getPalette = colorRampPalette(brewer.pal(9, "Set1"))
c=getPalette(9)
library(ggpubr)
library(ggplot2)
plist2<-list()
co<-c("#5CB85C","#337AB7","#F0AD4E","#D9534F")##颜色设置
for (i in 1:length(gc)){
bar_tmp<-bar_mat[,c(gc[i],"sam")]
colnames(bar_tmp)<-c("Expression","sam")
bar_tmp$color<-c(rep(x="1",times=64),rep(x="2",times=64),rep(x="3",times=34),rep(x="4",times=16))
my_comparisons1 <- list(c("Asymptomatic", "Mild")) #共有4种样本,C43 有6种两两组合形式
my_comparisons2 <- list(c("Asymptomatic", "Severe"))
my_comparisons3 <- list(c("Asymptomatic", "Critical"))
my_comparisons4 <- list(c("Mild", "Severe"))
my_comparisons5 <- list(c("Mild", "Critical"))
my_comparisons6 <- list(c("Severe", "Critical"))
pb1<-ggboxplot(bar_tmp,x="sam",y="Expression",color="color",fill=NULL,add = "jitter",bxp.errorbar.width = 0.6,width = 0.4,size=0.01,font.label = list(size=30), palette = c(co[1],co[2],co[3],co[4]))+theme(panel.background =element_blank())
pb1<-pb1+theme(axis.line=element_line(colour="black"))+theme(axis.title.x = element_blank())
#pb1<-pb1+scale_fill_manual(name="",labels=c("Ctrl","COV"),values=c(c[2],c[1]))
pb1<-pb1+theme(axis.title.y = element_blank())+theme(axis.text.x = element_text(size = 15,angle = 45,vjust = 1,hjust = 1))
pb1<-pb1+theme(axis.text.y = element_text(size = 15))+ggtitle(gc[i])+theme(plot.title = element_text(hjust = 0.5,size=15,face="bold"))
pb1<-pb1+theme(legend.position = "NA")
pb1<-pb1+stat_compare_means(method="t.test",hide.ns = F,comparisons =c(my_comparisons1,my_comparisons2,my_comparisons3,my_comparisons4,my_comparisons5,my_comparisons6),label="p.signif")
plist2[[i]]<-pb1
}
pall<-plot_grid(plist2[[1]],plist2[[2]],plist2[[3]],
plist2[[4]],plist2[[5]],plist2[[6]],
plist2[[7]],plist2[[8]],plist2[[9]],
plist2[[10]],ncol=4)##这里选中的基因数量,排为多少列
pall
⑤多组抖动散点图
##数据整理##
rm(list = ls())
library(ggplot2)
library(ggpubr)
library(cowplot)
data <- iris
#模拟数据
data1 <- data[,c(5,1)]
colnames(data1)[2] <- "values"
data2 <- data[,c(5,2)]
data2$Species <- paste0("B",data2$Species)
colnames(data2)[2] <- "values"
dat <- rbind(data1,data2)
colnames(dat)[1] <- "Group"
table(dat$Group)#setosa versicolor virginica Bsetosa Bversicolor Bvirginica
##进行因子设定,为了X轴的方向
dat$Group <- factor(dat$Group, levels =c('setosa','versicolor','virginica',
'Bsetosa','Bversicolor','Bvirginica'))
head(dat)
############# 绘图 -----------
library(ggplot2)
library(latex2exp)
ggplot(dat, aes(Group, values))+
# 箱线图:
geom_boxplot(outlier.shape = NA, width = 0.6)+
# 抖动散点:
geom_jitter(aes(color = Group), width = 0.15, size = 1)+
# 横线:
geom_hline(yintercept = 5, linetype = "dashed")+
# 箭头:
geom_segment(aes(x = 2, y = 7.5, xend = 2, yend = 7.2),
arrow = arrow(length = unit(1, "mm"))) +
scale_color_manual(name = "Subtype",
values = c("#fd6ab0", "#aa5700", "#f48326", "#ffd711",
"#9bd53f", "#00ae4c", "#00c1e3", "#007ddb",
"#8538d1", "#d01910"))+
# 文字注释:
annotate("text", label = TeX("$\\textit{P} = 1.6e-06$"),
size = 3, x = 2, y = 8)+##文字注释位置
# 坐标轴标签:
xlab("")+
ylab(TeX("number"))+
# 标题:
ggtitle(TeX("iris"))+
# 主题:
theme_classic()+
theme(plot.title = element_text(hjust = 0.5))+
guides(color=guide_legend(override.aes = list(size=2),
title.theme = element_text(face = "bold")))
#ggsave("plot.pdf", height = 5, width = 7)
dev.off()
跟着高分SCI学作图 -- 箱线图+抖动散点 (qq.com)
参考文献:
1:Immunogenomic Landscape of Hematological Malignancies
2:The trans-omics landscape of COVID-19