bulk的热图和火山图

最新推荐文章于 2024-10-01 18:51:18 发布

chenjnnn

最新推荐文章于 2024-10-01 18:51:18 发布

阅读量513

点赞数 7

文章标签： java 前端 javascript

本文链接：https://blog.csdn.net/cjnnnn16/article/details/135744325

版权

本文介绍了使用R语言对脑谱图数据进行预处理、基因表达量计算、热力图展示以及使用limma进行差异表达分析的过程，展示了如何通过pheatmap包进行可视化，包括对管家基因和显著差异基因的检测和可视化结果的解读。

摘要由CSDN通过智能技术生成

setwd("D:/脑谱图")
bulktpm <- read.table("D:/脑谱图/GSE71315_bulk_tpm.polya.txt.gz", header=T, sep="\t")
allpseu = read.table("D:/脑谱图/GW22测试/allpseu.txt")
a <- allpseu$gene_id
b <- bulktpm$X
useful <- intersect(a,b)
rownames(bulktpm) = bulktpm[,1]
#查看管家基因的表达量
bulktpm['GAPDH',]
pseu <- bulktpm[useful,]
#以基因名为列名，去除enselmble号
rownames(pseu) <- pseu$gname
pseu <- pseu[,-1]
pseu <- pseu[,-1]
#删除表达量为0的基因
tpm1 <- pseu[which(rowMeans(pseu)>1),]
pseu <- as.matrix(pseu)
colnames(pseu)
pseu <- pseu[, c("GW13_1", "GW14.5_1","GW16_1","GW16_2","GW21_1","GW21_2", "GW23_1", "GW23_2")]
heatmap(pseu,trace = "none",dendrogram = "column",scale = "row",colsep = 4,rowsep = 42,key = TRUE,density.info = "none", key.title = "GENE", key.xlab = "Z Score",key.ylab = NA)
####pheatmap
install.packages("rlang")
install.packages("pheatmap")
library(pheatmap)
pheatmap(tpm1,scale="none",cluster_cols=F,show_rownames = FALSE,treeheight_row = 0)
log_data <- log2(tpm1+1)
# 进行绘图
bk <- c(seq(0,2,by=0.001),seq(2.1,10,by=0.001))    # 调整图例的数值范围
pheatmap(log_data,scale="none",cellweight = 5,cluster_cols=F,
         show_rownames = FALSE,treeheight_row = 0,
         color = c(colorRampPalette(colors = c("blue","white"))(length(bk)/2),
                   colorRampPalette(colors = c("white","red"))(length(bk)/2)),    # 调整图例的颜色
         #legend_breaks=c(0,7,10),     # 设置颜色断点color = colorRampPalette(c("white", "firebrick3"))(5000)
         breaks=bk,    # 调整图例的数值范围heat.colors(256)
)
pseu <- pseu[which(rowSums(pseu)!=0),]
log_data <- log2(pseu+1)
pheatmap(log_data,color = colorRampPalette(c("white", "firebrick3"))(500), 
         #legend_breaks=c(0,1,2),cluster_cols = F,cluster_rows = T,col = heat.colors(256)
         scale="none",cluster_cols=F,show_rownames = FALSE,treeheight_row = 0)
pheatmap(log2(top100+1),color = colorRampPalette(c("white", "firebrick3"))(500), 
         scale="none",cluster_cols=F,show_rownames = FALSE)
##筛选表达量在前100的假基因进行绘制
top100<-pseu[order(rowSums(pseu),decreasing=T)[1:100],]  
top100 <- as.matrix(top100)
mark_gene <- row.names(top100)[1:10]
gene_pos <- which(row.names(top100) %in% mark_gene)cellwidth = 15, cellheight = 8
row_anno <- rowAnnotation(mark_gene=anno_mark(at = gene_pos),lables = mark_gene)
pheatmap(log2(top100+1),scale="none",cluster_cols=F,show_rownames = TRUE)
heatmap(log2(top100+1),Rowv = NULL,Colv = NA,scale = "none",key = TRUE,density.info = "none", key.title = "GENE", key.xlab = "Z Score",key.ylab = NA)
dendrogram = "column"

library(ggplot2)
#BiocManager安装limma包
if (!require("BiocManager", quietly = TRUE))
  install.packages("BiocManager")
BiocManager::install(pkgs = "limma", force = TRUE)
library(limma)
library(dplyr)
#构建分组
list <- c(rep("Early", 4), rep("Late",4)) %>% factor(., levels = c("Early", "Late"), ordered = F)
list <- model.matrix(~factor(list)+0)  #把group设置成一个model matrix
colnames(list) <- c("Early", "Late")
df.fit <- lmFit(pseu, list) ## 数据与list进行匹配
df.matrix <- makeContrasts(Early - Late , levels = list)
fit <- contrasts.fit(df.fit, df.matrix)
fit <- eBayes(fit)
tempOutput <- topTable(fit,n = Inf, adjust = "fdr")
head(tempOutput)
## 导出所有的差异结果
nrDEG = na.omit(tempOutput) ## 去掉数据中有NA的行或列
diffsig <- nrDEG  
write.csv(diffsig, "all.limmaOut.csv")
resdata <- read.csv("D:/脑谱图/all.limmaOut.csv")

install.packages("ggrepel")
library(ggplot2)
library(ggrepel)
resdata <- diffsig
resdata$Group <- factor(ifelse(resdata$P.Value < 0.05 & abs(resdata$logFC) >= 1,
                          ifelse(resdata$logFC >= 1, 'Up','Down'),'NotSignifi'))
resdata$gene <- row.names(resdata)
ggplot(resdata, aes(x = logFC, y = -log10(P.Value), colour = Group))+
  geom_point(size =4, shape = 20, stroke = 0.5)+
  #控制最人气泡和最小气泡，调节气泡相对大小
  scale_size(limits = c(2,16))+
  ##设置颜色
  #scale_fill_manual(values = c("#fe0000","#13fc00","#bdbdbd"))+
  scale_color_manual(values=c('steelblue','gray','brown'))+
  ylab('-log10 (Pvalue)')+
  xlab('log2 (FoldChange)')+
  #添加关注的点的基因名
  geom_text_repel(
    data = resdata[resdata$P.Value < 0.05 & abs(resdata$logFC) > 1,],
    aes(label = gene),
    size = 3.5,
    color = "black",
    segment.color = "black", show.legend = FALSE)+
  ## 增加横竖线条
  geom_vline(xintercept = c(-1,1),lty = 2, col = "black", lwd = 0.5)+
  geom_hline(yintercept = -log10(0.05), lty = 2, col = "black", lwd = 0.5)