setwd("D:/脑谱图")
bulktpm <- read.table("D:/脑谱图/GSE71315_bulk_tpm.polya.txt.gz", header=T, sep="\t")
allpseu = read.table("D:/脑谱图/GW22测试/allpseu.txt")
a <- allpseu$gene_id
b <- bulktpm$X
useful <- intersect(a,b)
rownames(bulktpm) = bulktpm[,1]
#查看管家基因的表达量
bulktpm['GAPDH',]
pseu <- bulktpm[useful,]
#以基因名为列名,去除enselmble号
rownames(pseu) <- pseu$gname
pseu <- pseu[,-1]
pseu <- pseu[,-1]
#删除表达量为0的基因
tpm1 <- pseu[which(rowMeans(pseu)>1),]
pseu <- as.matrix(pseu)
colnames(pseu)
pseu <- pseu[, c("GW13_1", "GW14.5_1","GW16_1","GW16_2","GW21_1","GW21_2", "GW23_1", "GW23_2")]
heatmap(pseu,trace = "none",dendrogram = "column",scale = "row",colsep = 4,rowsep = 42,key = TRUE,density.info = "none", key.title = "GENE", key.xlab = "Z Score",key.ylab = NA)
####pheatmap
install.packages("rlang")
install.packages("pheatmap")
library(pheatmap)
pheatmap(tpm1,scale="none",cluster_cols=F,show_rownames = FALSE,treeheight_row = 0)
log_data <- log2(tpm1+1)
# 进行绘图
bk <- c(seq(0,2,by=0.001),seq(2.1,10,by=0.001)) # 调整图例的数值范围
pheatmap(log_data,scale="none",cellweight = 5,cluster_cols=F,
show_rownames = FALSE,treeheight_row = 0,
color = c(colorRampPalette(colors = c("blue","white"))(length(bk)/2),
colorRampPalette(colors = c("white","red"))(length(bk)/2)), # 调整图例的颜色
#legend_breaks=c(0,7,10), # 设置颜色断点color = colorRampPalette(c("white", "firebrick3"))(5000)
breaks=bk, # 调整图例的数值范围heat.colors(256)
)
pseu <- pseu[which(rowSums(pseu)!=0),]
log_data <- log2(pseu+1)
pheatmap(log_data,color = colorRampPalette(c("white", "firebrick3"))(500),
#legend_breaks=c(0,1,2),cluster_cols = F,cluster_rows = T,col = heat.colors(256)
scale="none",cluster_cols=F,show_rownames = FALSE,treeheight_row = 0)
pheatmap(log2(top100+1),color = colorRampPalette(c("white", "firebrick3"))(500),
scale="none",cluster_cols=F,show_rownames = FALSE)
##筛选表达量在前100的假基因进行绘制
top100<-pseu[order(rowSums(pseu),decreasing=T)[1:100],]
top100 <- as.matrix(top100)
mark_gene <- row.names(top100)[1:10]
gene_pos <- which(row.names(top100) %in% mark_gene)cellwidth = 15, cellheight = 8
row_anno <- rowAnnotation(mark_gene=anno_mark(at = gene_pos),lables = mark_gene)
pheatmap(log2(top100+1),scale="none",cluster_cols=F,show_rownames = TRUE)
heatmap(log2(top100+1),Rowv = NULL,Colv = NA,scale = "none",key = TRUE,density.info = "none", key.title = "GENE", key.xlab = "Z Score",key.ylab = NA)
dendrogram = "column"
library(ggplot2)
#BiocManager安装limma包
if (!require("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install(pkgs = "limma", force = TRUE)
library(limma)
library(dplyr)
#构建分组
list <- c(rep("Early", 4), rep("Late",4)) %>% factor(., levels = c("Early", "Late"), ordered = F)
list <- model.matrix(~factor(list)+0) #把group设置成一个model matrix
colnames(list) <- c("Early", "Late")
df.fit <- lmFit(pseu, list) ## 数据与list进行匹配
df.matrix <- makeContrasts(Early - Late , levels = list)
fit <- contrasts.fit(df.fit, df.matrix)
fit <- eBayes(fit)
tempOutput <- topTable(fit,n = Inf, adjust = "fdr")
head(tempOutput)
## 导出所有的差异结果
nrDEG = na.omit(tempOutput) ## 去掉数据中有NA的行或列
diffsig <- nrDEG
write.csv(diffsig, "all.limmaOut.csv")
resdata <- read.csv("D:/脑谱图/all.limmaOut.csv")
install.packages("ggrepel")
library(ggplot2)
library(ggrepel)
resdata <- diffsig
resdata$Group <- factor(ifelse(resdata$P.Value < 0.05 & abs(resdata$logFC) >= 1,
ifelse(resdata$logFC >= 1, 'Up','Down'),'NotSignifi'))
resdata$gene <- row.names(resdata)
ggplot(resdata, aes(x = logFC, y = -log10(P.Value), colour = Group))+
geom_point(size =4, shape = 20, stroke = 0.5)+
#控制最人气泡和最小气泡,调节气泡相对大小
scale_size(limits = c(2,16))+
##设置颜色
#scale_fill_manual(values = c("#fe0000","#13fc00","#bdbdbd"))+
scale_color_manual(values=c('steelblue','gray','brown'))+
ylab('-log10 (Pvalue)')+
xlab('log2 (FoldChange)')+
#添加关注的点的基因名
geom_text_repel(
data = resdata[resdata$P.Value < 0.05 & abs(resdata$logFC) > 1,],
aes(label = gene),
size = 3.5,
color = "black",
segment.color = "black", show.legend = FALSE)+
## 增加横竖线条
geom_vline(xintercept = c(-1,1),lty = 2, col = "black", lwd = 0.5)+
geom_hline(yintercept = -log10(0.05), lty = 2, col = "black", lwd = 0.5)