#######差异分析
###########################33
library(clusterProfiler)
library(Seurat)
library(SeuratWrappers)
library(ggplot2)
library(dplyr)
library(stringr)
library(openxlsx)
library(org.Mm.eg.db)
library("reshape")
library("RColorBrewer")
getwd()
file="G:/silicosis/sicosis/silicosis_ST/yll/0214/harmony_cluster/d_all/r=0.6_11clusters"
dir.create(file)
setwd(file)
getwd()
?loadWorkbook
#library(xlsx) #比openxlsx读取文件慢多了!!!!!
##再三检查文件路径!!!!!!!!!!!!
markers = read.xlsx("G:/silicosis/sicosis/silicosis_ST/yll/0214/harmony_cluster/d_all/silicosis_ST_harmony_r0.6_cluster_marker.xlsx")
head(markers)

#View(markers)
names(table(markers$cluster))
#markers$"Myofibroblast/vascular smooth muscle cell"<-"myo-fib"
markers$cluster[markers$cluster=="Myofibroblast/vascular smooth muscle cell"]="fib-myo" #把第17个cluster的非法名字改成正常名字,因为给导出的xlsx命名时使用了custer的名字
names(table(markers$cluster))
k <- keys(org.Mm.eg.db, keytype = "ENTREZID")
gene.list <- select(org.Mm.eg.db, keys = k, columns = c("SYMBOL", "ENSEMBL"), keytype = "ENTREZID")## 73306 3
head(gene.list)
#自建函数 输入go富集条目 和文件的名称 就返回相应的pdf文件和xlsx文件
clusterProfilerOut = function(enrichRes,outfile){
enrichRes$function.gene.num = unlist(strsplit(enrichRes[,"BgRatio"],"/"))[seq(1,length(unlist(strsplit(enrichRes[,"BgRatio"],"/"))),2)]
enrichRes$GeneRatio = enrichRes$Count / as.numeric(enrichRes$function.gene.num) #计算generatio
write.xlsx(enrichRes,paste0(outfile, ".xlsx"),col.names=T,row.names=F)
if (dim(enrichRes)[1]>=10) {enrichRes.use = enrichRes[1:10,]} else{enrichRes.use = enrichRes[1:dim(enrichRes)[1],]}#显示的富集条目不超过10条
enrichRes.use = enrichRes.use[order(enrichRes.use$GeneRatio, decreasing=T),] #按照generatio降序显示
enrichRes.use$Description = factor(enrichRes.use$Description,levels=rev(enrichRes.use$Description))
gap = (max(enrichRes.use$GeneRatio) - min(enrichRes.use$GeneRatio))/5
#制作并输出pdf
pdf(paste0(outfile, ".pdf"))
p = ggplot(enrichRes.use,mapping = aes(x=GeneRatio,y=Description))+
geom_point(aes(size=Count,color=p.adjust))+theme_bw()+
scale_color_gradient(low = "blue", high = "red")+
scale_x_continuous(expand = c(gap, gap))+ylab(NULL)+
scale_y_discrete(labels=function(x) str_wrap(x, width=60))
print(p)
dev.off()
}
getwd() #此时手动把all_cluster_markers.xlsx文件复制到工作目录下
path=getwd()
dir(path)
files=dir(path)
files #得到工作目录下的所有文件名
'''
path="G:/silicosis/sicosis/silicosis_ST/yll/0214/harmony_cluster/d_all/r=0.6_11clusters_0.69fc"
dir.create(path)
setwd(path)
'''
getwd()
head(markers)
#library(xlsx)
for (i in files) {
#i="all_cluster_markers.xlsx"
input = read.xlsx(paste(path, i, sep = "/"))#读取xlsx文件 注意有的时候需要加row.names 有的时候不需要加
head(input)
input$cluster[input$cluster=="Myofibroblast/vascular smooth muscle cell"]="fib-myo" #把第17个cluster的非法名字改成正常名字,因为给导出的xlsx命名时使用了custer的名字
input$symbol<-input$gene
head(input)
input$FeatureName.entrez = gene.list[match(input$symbol, gene.list$SYMBOL),"ENTREZID"]#在input文件的基础上添加symbol与entrezid相对应的列FeatureName.entrez
head(input)
#input$change=ifelse(input$avg_log2FC>0,"up","down")
input.sub<-input[input$p_val_adj<0.001,] #取出所有p值小于0.001的
print(head(input.sub))
dim(input.sub)
input.sub = na.omit(input[,c("cluster","FeatureName.entrez","avg_log2FC","p_val_adj")])
head(input.sub)
colnames(input.sub) = c("cluster","gene", "log2FC", "PValue")
print(head(input.sub))
subset_data=input.sub
head(subset_data)
cluster_number<-names(table(subset_data$cluster)) #获取文件中的cluster个数
length(cluster_number)
cluster_number
str(cluster_number)
names(table(subset_data$cluster))[1]
subset_data[subset_data$cluster=="0",]
##注意文件中cluster的命名 最好不要出现斜杠|等特殊字符 容易出错
#注意p值是否需要约束?
#注意是否需要正负号表示上下调?
#for (cluster_num in 0:(length(cluster_number)-1)) { #对于cluster为数字的
##只看上调的基因 ,注意修改名字!!!!!!!!!!!!!
subset_data = subset(input.sub, log2FC>0.69&PValue<0.05)
head(subset_data)
cluster_number<-names(table(subset_data$cluster)) #获取文件中的cluster个数
cluster_number
for (cluster_num in cluster_number) { ##对于cluster为字符的
ego=enrichGO(gene=subset_data[subset_data$cluster==cluster_num,]$gene, #!!!!!!全都要改!!!!!!!!!!!!!!!!!!!泉沟要改
keyType = "ENTREZID",
OrgDb = org.Mm.eg.db,
ont = "ALL", readable = TRUE,
pvalueCutoff = 1, qvalueCutoff = 1, pAdjustMethod = "BH")
res = ego[ego$p.adjust<0.05,]
print(dim(res))
print(table(res$ONTOLOGY))
if(nrow(res) != 0){#go,使用自定义的函数画图 ,注意命名时候把i的 文件属性名去掉
clusterProfilerOut(res, paste0(unlist(strsplit(i, ".xl"))[1],cluster_num,"_up_enrichGO"))
}
ego.KEGG = enrichKEGG(gene=subset_data[subset_data$cluster==cluster_num,]$gene,
organism = "mmu",keyType = "kegg", pvalueCutoff = 1, pAdjustMethod = "BH", qvalueCutoff = 1)
res = ego.KEGG[ego.KEGG$p.adjust<0.05,]
print(dim(res))
if(nrow(res) !=0){#kegg
clusterProfilerOut(res,paste0(unlist(strsplit(i, ".xl"))[1],cluster_num,"_up_enrichKEGG"))
}
}
##只看下调的基因 ,注意修改名字!!!!!!!!!!!!!
subset_data = subset(input.sub, log2FC<(-0.69)&PValue<0.05)
head(subset_data)
cluster_number<-names(table(subset_data$cluster)) #获取文件中的cluster个数
cluster_number
for (cluster_num in cluster_number) { ##对于cluster为字符的
ego=enrichGO(gene=subset_data[subset_data$cluster==cluster_num,]$gene, #!!!!!!全都要改!!!!!!!!!!!!!!!!!!!泉沟要改
keyType = "ENTREZID",
OrgDb = org.Mm.eg.db,
ont = "ALL", readable = TRUE,
pvalueCutoff = 1, qvalueCutoff = 1, pAdjustMethod = "BH")
res = ego[ego$p.adjust<0.05,]
print(dim(res))
print(table(res$ONTOLOGY))
if(nrow(res) != 0){#go,使用自定义的函数画图 ,注意命名时候把i的 文件属性名去掉
clusterProfilerOut(res, paste0(unlist(strsplit(i, ".xl"))[1],cluster_num,"_down_enrichGO"))
}
ego.KEGG = enrichKEGG(gene=subset_data[subset_data$cluster==cluster_num,]$gene,
organism = "mmu",keyType = "kegg", pvalueCutoff = 1, pAdjustMethod = "BH", qvalueCutoff = 1)
res = ego.KEGG[ego.KEGG$p.adjust<0.05,]
print(dim(res))
if(nrow(res) !=0){#kegg
clusterProfilerOut(res,paste0(unlist(strsplit(i, ".xl"))[1],cluster_num,"_down_enrichKEGG"))
}
}
##不区分上下调 #################################
subset_data = subset(input.sub, abs(log2FC)>0.69&PValue<0.05)
head(subset_data)
cluster_number<-names(table(subset_data$cluster)) #获取文件中的cluster个数
cluster_number
for (cluster_num in cluster_number) { ##对于cluster为字符的
ego=enrichGO(gene=subset_data[subset_data$cluster==cluster_num,]$gene, #!!!!!!全都要改!!!!!!!!!!!!!!!!!!!泉沟要改
keyType = "ENTREZID",
OrgDb = org.Mm.eg.db,
ont = "ALL", readable = TRUE,
pvalueCutoff = 1, qvalueCutoff = 1, pAdjustMethod = "BH")
res = ego[ego$p.adjust<0.05,]
print(dim(res))
print(table(res$ONTOLOGY))
if(nrow(res) != 0){#go,使用自定义的函数画图 ,注意命名时候把i的 文件属性名去掉
clusterProfilerOut(res, paste0(unlist(strsplit(i, ".xl"))[1],cluster_num,"_both_enrichGO"))
}
ego.KEGG = enrichKEGG(gene=subset_data[subset_data$cluster==cluster_num,]$gene,
organism = "mmu",keyType = "kegg", pvalueCutoff = 1, pAdjustMethod = "BH", qvalueCutoff = 1)
res = ego.KEGG[ego.KEGG$p.adjust<0.05,]
print(dim(res))
if(nrow(res) !=0){#kegg
clusterProfilerOut(res,paste0(unlist(strsplit(i, ".xl"))[1],cluster_num,"_both_enrichKEGG"))
}
}
}
dev.off()
path="D:/ARDS_scripts_1012/ARDS/Step2_harmony_f200_R3/ws/Fibroblast/0.5/sepsis_Fibroblast_r0.5.rds"
load(path)
table(subset_data$stim,subset_data$seurat_clusters)
DimPlot(subset_data,group.by = "stim")
DimPlot(subset_data,split.by = "stim",
group.by = "seurat_clusters",
label = TRUE)
seurat使用findallmarkers 得到的差异基因列表进行富集分析clusterprolifer-2 调成fc值为0.69
于 2022-03-23 17:38:08 首次发布