DSP分析之热图绘制

DSP分析之热图绘制
library(pheatmap)
library(ggplot2)
library(tidyverse)
library(plyr)
library(readxl)
library(reshape2)
library(ggpubr)

getwd()
#数据预处理


```r
raw_data <- as.data.frame(read_excel('E:/page/GeoMx_DSP/MY_DSP/GroupB/DSP_ICB_GROUP.xlsx',sheet = 'raw_data'))
rownames(raw_data) <- raw_data[,1]
raw_data

#将分组信息单独和数据信息分开整理出来
meta_data <- raw_data[1:4,]
meta_data <- meta_data[,-1]
meta_data['patient',] <- colnames(meta_data)
flite_data <- raw_data[-1:-4,]
rownames(flite_data) <- flite_data[,1]
#这是我们用到的数据
flite_data <- flite_data[,-1]
```r
####将数据格式转换为数字型
for (i in 1:length(colnames(flite_data))){
  flite_data[,i] <- as.numeric(flite_data[,i])
}

#展示相关性图-查看样本之间相关性如何

#####
flite_cor <- cor(flite_data)
pheatmap(flite_cor,fontsize = 6)
#####使用melt整理数据格式
mydata_flite_cor.m <-melt(flite_cor)

head(mydata_flite_cor.m)
p_cor <-ggplot(mydata_flite_cor.m, aes(Var1, Var2)) + 
  geom_tile(aes(fill = value),colour = "blue") 

p_cor + scale_fill_gradient(name="Value", low = "white",high = "red") +
  theme(axis.text.x = element_text(vjust = 0.5, hjust = 0.5, angle = 0))+
  coord_fixed(ratio=1)+
  theme(axis.text= element_text(size = 8,family="ARL"))+
  theme(plot.margin = unit(c(0.1,0,0,0), unit="mm"))+
  labs(x = "Var1", y = "Var2", title = "correlation")+
  theme(plot.title = element_text(size = 13,hjust = 0.5,family = "ARL" ))+
  theme(legend.key.width=unit(3,'mm'),legend.key.height=unit(3,'cm'))+
  theme(legend.title = element_text(size = 8))

get_lower_tri<-function(cormat){
  cormat[upper.tri(cormat)] <- NA
  return(cormat)
}
#####获取相关性矩阵的下三角矩阵
get_upper_tri <- function(cormat){
  cormat[lower.tri(cormat)]<- NA
  return(cormat)
}
upper_tri <- get_upper_tri(flite_cor)
upper_tri

upper_tri.m <- melt(upper_tri, na.rm = TRUE)
p_cor.up <-ggplot(upper_tri.m, aes(Var1, Var2)) + 
  geom_tile(aes(fill = value),colour = "white") 

p_cor.up + scale_fill_gradient(name="Value", low = "white",high = "red") +
  theme(axis.text.x = element_text(vjust = 0.5, hjust = 0.5, angle = 90))+
  coord_fixed(ratio=1)+
  theme(axis.text= element_text(size = 8,family="ARL"))+
  theme(plot.margin = unit(c(0.1,0,0,0), unit="mm"))+
  labs(x = "Var1", y = "Var2", title = "correlation")+
  theme(plot.title = element_text(size = 13,hjust = 0.5,family = "ARL" ))+
  theme(legend.key.width=unit(3,'mm'),legend.key.height=unit(3,'cm'))+
  theme(legend.title = element_text(size = 8))

reorder_cormat <- function(cormat){
  dd <- as.dist((1-cormat)/2)  ##使用相关性系数作为变量间距离
  hc <- hclust(dd)
  cormat <-cormat[hc$order, hc$order]
}

mydata_cor.order <- reorder_cormat(flite_cor)
order.upper_tri <- get_upper_tri(mydata_cor.order)
order.upper_tri.m <- melt(order.upper_tri, na.rm = TRUE)
p_cor.order <-ggplot(order.upper_tri.m, aes(Var1, Var2)) + 
  geom_tile(aes(fill = value),colour = "white") 

p_cor.order + scale_fill_gradient(name="Value", low = "blue",high = "red") +
  theme(axis.text.x = element_text(vjust = 0.5, hjust = 0.5, angle = 90))+
  coord_fixed(ratio=1)+
  theme(axis.text= element_text(size = 8,family="ARL"))+
  theme(plot.margin = unit(c(0.1,0,0,0), unit="mm"))+
  labs(x = "Var1", y = "Var2", title = "correlation")+
  theme(plot.title = element_text(size = 13,hjust = 0.5,family = "ARL" ))+
  theme(legend.key.width=unit(3,'mm'),legend.key.height=unit(3,'cm'))+
  theme(legend.title = element_text(size = 8))

根据样本之间的相关性好坏可以整体上看是否有ROI“不合群”从而进行甄选。
结果如下
在这里插入图片描述

#筛选出特征marker比较差异
#Part I nike_gene
#####
marker_nike <-c('CD3D','CD4','CD8','NCAM1','CD68','CD94','CD20','CD206')
marker_nike_data <- flite_data[which(rownames(flite_data)%in%marker_nike),]
#####
#展示热图
#####
annotation_col <- data.frame(t(meta_data))
#rownames(annotation_row) <- rownames(heatmap.marker)
ann_colors <- list(ROI = c('Border CD57' = "#00CCCC",'Tumor CD57' = "#FF9999",
                           'Border CD57 (CD3)'= "#00CCCC",'Tumor CD57 (CD3)' = "#FF9999"),
                   Gender = c(man = 'blue'),
                   age = c('46' = 'orange','48' = 'yellow','52' = 'green','68' = 'black','67' = 'grey'),
                   Therpy = c(PD = '#FFCC33',PR = '#66CC33'))
###绘图区域
#热图先看一下基因表达情况
#以PD和PR来划分
pdf(file = 'heatmap_nike.pdf',width = 12,height = 6)
pheatmap(marker_nike_data,scale = "row",
         color =  colorRampPalette(c("#3333FF","#FFFF33"))(1000),
         cluster_cols = F,cluster_rows = T,
         annotation_col = annotation_col,annotation_colors = ann_colors,
         #annotation_row = annotation_row,
         cellwidth = 10, cellheight = 10,angle_col = "90",treeheight_row = 13,treeheight_col = 15,
         fontsize = 10,fontsize_col = 10,fontsize_row = 10)

在这里插入图片描述
GGPLOT绘图主题
##绘图主题

mytheme <- theme(plot.title=element_text(face="plain",
                                         size="13", color="black"),
                 axis.title=element_text(face="plain",
                                         size=12, color="black"),
                 axis.text=element_text(face="plain", size=9,
                                        color="black"),
                 axis.title.x = element_text(size = "15"),
                 axis.title.y = element_text(size = "15"),
                 panel.background=element_rect(fill="white",
                                               color="black"),
                 panel.grid.major.y=element_line(color="white",
                                                 linetype=1),
                 panel.grid.minor.y=element_line(color="white",
                                                 linetype=2),
                 panel.grid.minor.x=element_blank(),
                 legend.key.size = unit(.3,"inches"),
                 legend.position="top")+
  theme(legend.text=element_text(size=13))+theme(legend.title = element_text(size = 13))

再绘制一个箱图比较两组之间某些基因表达是否具有统计学差异
对数据先整理成ggplot绘制箱图的格式
####统计图处理

nike_data <- melt(marker_nike_data)
nike_data$gene <- rownames(marker_nike_data)
colnames(nike_data) <- c('patient','Value','Gene')
nike_data_plot <- merge(nike_data,t(meta_data),by = 'patient' )
nike_data_plot$ROI[which(nike_data_plot$ROI=='Border CD57 (CD3)')] <- 'Border CD57'
nike_data_plot$ROI[which(nike_data_plot$ROI=='Tumor CD57 (CD3)')] <- 'Tumor CD57'

绘图代码

pdf(file = 'nike_PD_Border_vs_Tumor.pdf',width = 8,height = 6)
ggplot(nike_data_plot[which(nike_data_plot$Therpy =='PD'),],
       aes(x = Gene,y = Value,fill = ROI ))+
  geom_boxplot()+
  geom_boxplot(outlier.colour="red", outlier.shape=8, outlier.size=4)+
  labs(title="ICB_PD", x='Nike Markers', y="Normalized Counts") +
  stat_compare_means(label = "p.signif",method = "t.test")+
  #ylim(c(-3,15))+
  guides(fill=guide_legend(title=NULL))+#隐藏图例标题
   #scale_fill_discrete(name="Group",breaks=c("Covid19", "Non-Covid19"))
  mytheme
dev.off()

结果如下
在这里插入图片描述
对于单样本包含多种类型细胞时候需要去卷积来查看不同类型细胞的分布占比。付Cibersort使用的官网链接。
https://cibersort.stanford.edu/

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值