空间转录组从loupe软件自动聚类之后，分为11个群，使用seurat中的doheatmap函数画出top20基因表达热图此处使用标准流程来进行标准化 normalization

生信小博士

已于 2023-02-19 17:32:24 修改

阅读量1.1k

点赞数 1

分类专栏：笔记文章标签：聚类 python 数据挖掘

于 2022-08-22 11:58:44 首次发布

本文链接：https://blog.csdn.net/qq_52813185/article/details/126463130

版权

笔记专栏收录该内容

221 篇文章 38 订阅

订阅专栏

空间转录组从loupe软件自动聚类之后，分为11个群，使用seurat中的doheatmap函数画出top20基因表达热图
此处使用标准流程来进行标准化 normalization
在这里插入图片描述

st=ls())
library(patchwork)
library(ggplot2)
library(ggalluvial)
library(svglite)
library(Seurat)
library(openxlsx)
options(stringsAsFactors = FALSE)

getwd()  ##富集分析  loupe导出的列表进行富集分析  go kegg  gsea  注意富集条件

#path = "/data/home/longmin/code_test/silicosis_ST/YLL/0105"
#path= "G:/silicosis/sicosis/silicosis_ST/yll/0105"
path="G:/silicosis/sicosis/silicosis_ST/yll/0113-3_2022_8-19_doheatmap/"
dir.create(path)
setwd(path)
getwd()

dir()
dir(include.dirs = TRUE)

getwd()
list.files()
list.files(full.names = TRUE) #显示全路径的文件  文件的全路径



fs=list.files(pattern = '.cluster')
fs
clustering_spot=read.csv(fs)
head(clustering_spot)
clustering_spot=clustering_spot[,1:2]
head(clustering_spot)
rownames(clustering_spot)=clustering_spot$Barcode           
head(clustering_spot)



files = dir()[grep("Genes.csv", dir())]  #必须把相应的文件放在文件目录下
files 

fs=list.files(pattern = '.h5')
fs



library(Seurat)
a=Read10X_h5(fs)
a[1:4,1:4] 
length(colnames(a))
colnames(a)[grepl(pattern = '2',colnames(a))]
match(c(1,3,4,0),  c(3,4,5,1))
match(colnames(a),rownames(clustering_spot))
match(rownames(clustering_spot),colnames(a))

b=a[,match(rownames(clustering_spot),colnames(a))]
length(colnames(b))


??CreateSeuratObject
All=CreateSeuratObject(counts = b,project = 'spatial_data')
All$percent.mt=PercentageFeatureSet(All,pattern = "^mt-")


myindex=match(colnames(All),rownames(clustering_spot))
graphclustering=clustering_spot$graphclust[c(myindex)]

All$clustering_spot_from_loupe=graphclustering
colnames(All)[1:10]
Idents(All)[1:10]
Idents(All)=All$clustering_spot_from_loupe

clustering_spot[rownames(clustering_spot)=='AAACATTTCCCGGATT-1',]

table(Idents(All))
table(Idents(All))
table(is.na(Idents(All)))

VlnPlot(All, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
All = All%>%Seurat::NormalizeData(verbose = FALSE) %>%  
  FindVariableFeatures(selection.method = "vst", nfeatures = length(rownames(All))) %>%
  ScaleData(verbose = FALSE)
All = RunPCA(All, npcs = 50, verbose = FALSE)
ElbowPlot(All, ndims = 50)




#######################cluster
All <- All %>% 
  RunUMAP(reduction = "pca", dims = 1:30) %>% 
  RunTSNE(reduction = "pca", dims = 1:30) %>% 
  FindNeighbors(reduction = "pca", dims = 1:30)
All<-All%>% FindClusters(resolution = 3) %>% identity()

DimPlot(All,label ='clustering_spot_from_loupe' )
Idents(All)=All$clustering_spot_from_loupe
table(Idents(All))
table(is.na(Idents(All)))

DimPlot(All)


##
getwd()

markers_from_loupe=read.csv("G:/silicosis/sicosis/silicosis_ST/yll/0113-3_2022_8-19_doheatmap/all-merged-Graph-Based Genes.csv")
head(markers_from_loupe)
input=markers_from_loupe
n = (ncol(input)-2)/3 #获取每个csv文件中cluster的个数
n
head(input)
k <- keys(org.Mm.eg.db, keytype = "ENTREZID")  #获取小鼠的entrezid 和gene symbol的对应关系，注释包
gene.list <- select(org.Mm.eg.db, keys = k, columns = c("SYMBOL", "ENSEMBL"), keytype = "ENTREZID")## 73306     3
head(gene.list)
input$FeatureName.entrez = gene.list[match(input$FeatureID, gene.list$ENSEMBL),"ENTREZID"]
#在csv文件的基础上添加symbol与entrezid相对应的列FeatureName.entrez
print(head(input,15))
head(input[,grepl(pattern = 'Average',colnames(input))])


stat = data.frame()
stat
mygene=c()
for(cluster in 1:n){
  #cluster=1
  print(cluster)
  #提取input中的"FeatureName.entrez"列和 cluster.1.log2.Fold.Change列
  input.sub = na.omit(input[,c("FeatureName",paste0("Cluster.",cluster,".Log2.Fold.Change"))])
  head(input.sub)
  #write.table(input.sub, paste0(unlist(strsplit(i, " "))[1], "_Cluster_", cluster, "_log2FC.txt"), quote = FALSE, sep = "\t", row.names = FALSE, col.names = FALSE)
  input.sub = na.omit(input[,c("FeatureName",paste0("Cluster.",cluster,".Log2.Fold.Change"),paste0("Cluster.",cluster,".P.Value"))])
  colnames(input.sub) = c("gene", "log2FC", "PValue")
  head(input.sub)
  
  subset_data = head(subset(input.sub, log2FC>1&PValue<0.01),n=20) ###可以自己定义的地方  这里选择上调变化大于2倍，且p-value小于0.01的基因列表
  subset_data
  stat[paste0(unlist(strsplit('i', " "))[1], "_Cluster_", cluster), "up"] = nrow(subset_data)
  head(stat)
  mygene=c(mygene,subset_data$gene)

}
mygene



All$clustering_spot_from_loupe=factor(All$clustering_spot_from_loupe,levels = c(seq(1,11,1)))
Idents(All)=All$clustering_spot_from_loupe
DoHeatmap(All,features = mygene)

DimPlot(All,label = T)
getwd()

jpeg("heatmap_mygene_20.jpg",width = 13,height = 15,units = 'in', res=400)
p=DoHeatmap(All, features =mygene,size = 2) 
print(p)
dev.off()




#
markers=FindAllMarkers(All,only.pos = T)
getwd()
write.xlsx(markers,file = "G:/silicosis/sicosis/silicosis_ST/yll/0113-3_2022_8-19_doheatmap/markers_forheatmap.xlsx")


data.markers=markers
head(data.markers)
library(dplyr)
topgene<-data.markers %>% group_by(cluster) %>% top_n(n = 30, wt = avg_log2FC)
head(topgene)
openxlsx::write.xlsx(topgene,file = "G:/silicosis/sicosis/silicosis_ST/yll/0113-3_2022_8-19_doheatmap/markers_to30p_forheatmap.xlsx")

openxlsx::write.xlsx(topgene,file = "G:/silicosis/sicosis/silicosis_ST/yll/0113-3_2022_8-19_doheatmap/markers_to50p_forheatmap.xlsx")

topgene

DoHeatmap(All, features = topgene$gene,size = 2) + NoLegend()

getwd()
jpeg("heatmap_20.jpg",width = 15,height = 13,units = 'in', res=400)
p=DoHeatmap(All, features = topgene$gene,size = 2) 
print(p)
dev.off()

jpeg("heatmap_10.jpg",width = 13,height = 10,units = 'in', res=400)
p=DoHeatmap(All, features = topgene$gene,size = 2) 
print(p)
dev.off()


getwd()
jpeg("heatmap_30.jpg",width = 15,height = 17,units = 'in', res=400)
p=DoHeatmap(All, features = topgene$gene,size = 2) 
print(p)
dev.off()


###


topgene=openxlsx::read.xlsx("G:/silicosis/sicosis/silicosis_ST/yll/0113-3_2022_8-19_doheatmap/markers_to30p_forheatmap.xlsx")
head(topgene)

getwd()
save(All,file = "G:/silicosis/sicosis/silicosis_ST/yll/0113-3_2022_8-19_doheatmap/All_from_loupe_for_heatmap.rds")
load("G:/silicosis/sicosis/silicosis_ST/yll/0113-3_2022_8-19_doheatmap/All_from_loupe_for_heatmap.rds")