空间转录组从loupe软件自动聚类之后,分为11个群,使用seurat中的doheatmap函数画出top20基因表达热图
此处使用标准流程来进行标准化 normalization
st=ls())
library(patchwork)
library(ggplot2)
library(ggalluvial)
library(svglite)
library(Seurat)
library(openxlsx)
options(stringsAsFactors = FALSE)
getwd() ##富集分析 loupe导出的列表进行富集分析 go kegg gsea 注意富集条件
#path = "/data/home/longmin/code_test/silicosis_ST/YLL/0105"
#path= "G:/silicosis/sicosis/silicosis_ST/yll/0105"
path="G:/silicosis/sicosis/silicosis_ST/yll/0113-3_2022_8-19_doheatmap/"
dir.create(path)
setwd(path)
getwd()
dir()
dir(include.dirs = TRUE)
getwd()
list.files()
list.files(full.names = TRUE) #显示全路径的文件 文件的全路径
fs=list.files(pattern = '.cluster')
fs
clustering_spot=read.csv(fs)
head(clustering_spot)
clustering_spot=clustering_spot[,1:2]
head(clustering_spot)
rownames(clustering_spot)=clustering_spot$Barcode
head(clustering_spot)
files = dir()[grep("Genes.csv", dir())] #必须把相应的文件放在文件目录下
files
fs=list.files(pattern = '.h5')
fs
library(Seurat)
a=Read10X_h5(fs)
a[1:4,1:4]
length(colnames(a))
colnames(a)[grepl(pattern = '2',colnames(a))]
match(c(1,3,4,0), c(3,4,5,1))
match(colnames(a),rownames(clustering_spot))
match(rownames(clustering_spot),colnames(a))
b=a[,match(rownames(clustering_spot),colnames(a))]
length(colnames(b))
??CreateSeuratObject
All=CreateSeuratObject(counts = b,project = 'spatial_data')
All$percent.mt=PercentageFeatureSet(All,pattern = "^mt-")
myindex=match(colnames(All),rownames(clustering_spot))
graphclustering=clustering_spot$graphclust[c(myindex)]
All$clustering_spot_from_loupe=graphclustering
colnames(All)[1:10]
Idents(All)[1:10]
Idents(All)=All$clustering_spot_from_loupe
clustering_spot[rownames(clustering_spot)=='AAACATTTCCCGGATT-1',]
table(Idents(All))
table(Idents(All))
table(is.na(Idents(All)))
VlnPlot(All, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
All = All%>%Seurat::NormalizeData(verbose = FALSE) %>%
FindVariableFeatures(selection.method = "vst", nfeatures = length(rownames(All))) %>%
ScaleData(verbose = FALSE)
All = RunPCA(All, npcs = 50, verbose = FALSE)
ElbowPlot(All, ndims = 50)
#######################cluster
All <- All %>%
RunUMAP(reduction = "pca", dims = 1:30) %>%
RunTSNE(reduction = "pca", dims = 1:30) %>%
FindNeighbors(reduction = "pca", dims = 1:30)
All<-All%>% FindClusters(resolution = 3) %>% identity()
DimPlot(All,label ='clustering_spot_from_loupe' )
Idents(All)=All$clustering_spot_from_loupe
table(Idents(All))
table(is.na(Idents(All)))
DimPlot(All)
##
getwd()
markers_from_loupe=read.csv("G:/silicosis/sicosis/silicosis_ST/yll/0113-3_2022_8-19_doheatmap/all-merged-Graph-Based Genes.csv")
head(markers_from_loupe)
input=markers_from_loupe
n = (ncol(input)-2)/3 #获取每个csv文件中cluster的个数
n
head(input)
k <- keys(org.Mm.eg.db, keytype = "ENTREZID") #获取小鼠的entrezid 和gene symbol的对应关系,注释包
gene.list <- select(org.Mm.eg.db, keys = k, columns = c("SYMBOL", "ENSEMBL"), keytype = "ENTREZID")## 73306 3
head(gene.list)
input$FeatureName.entrez = gene.list[match(input$FeatureID, gene.list$ENSEMBL),"ENTREZID"]
#在csv文件的基础上添加symbol与entrezid相对应的列FeatureName.entrez
print(head(input,15))
head(input[,grepl(pattern = 'Average',colnames(input))])
stat = data.frame()
stat
mygene=c()
for(cluster in 1:n){
#cluster=1
print(cluster)
#提取input中的"FeatureName.entrez"列和 cluster.1.log2.Fold.Change列
input.sub = na.omit(input[,c("FeatureName",paste0("Cluster.",cluster,".Log2.Fold.Change"))])
head(input.sub)
#write.table(input.sub, paste0(unlist(strsplit(i, " "))[1], "_Cluster_", cluster, "_log2FC.txt"), quote = FALSE, sep = "\t", row.names = FALSE, col.names = FALSE)
input.sub = na.omit(input[,c("FeatureName",paste0("Cluster.",cluster,".Log2.Fold.Change"),paste0("Cluster.",cluster,".P.Value"))])
colnames(input.sub) = c("gene", "log2FC", "PValue")
head(input.sub)
subset_data = head(subset(input.sub, log2FC>1&PValue<0.01),n=20) ###可以自己定义的地方 这里选择上调变化大于2倍,且p-value小于0.01的基因列表
subset_data
stat[paste0(unlist(strsplit('i', " "))[1], "_Cluster_", cluster), "up"] = nrow(subset_data)
head(stat)
mygene=c(mygene,subset_data$gene)
}
mygene
All$clustering_spot_from_loupe=factor(All$clustering_spot_from_loupe,levels = c(seq(1,11,1)))
Idents(All)=All$clustering_spot_from_loupe
DoHeatmap(All,features = mygene)
DimPlot(All,label = T)
getwd()
jpeg("heatmap_mygene_20.jpg",width = 13,height = 15,units = 'in', res=400)
p=DoHeatmap(All, features =mygene,size = 2)
print(p)
dev.off()
#
markers=FindAllMarkers(All,only.pos = T)
getwd()
write.xlsx(markers,file = "G:/silicosis/sicosis/silicosis_ST/yll/0113-3_2022_8-19_doheatmap/markers_forheatmap.xlsx")
data.markers=markers
head(data.markers)
library(dplyr)
topgene<-data.markers %>% group_by(cluster) %>% top_n(n = 30, wt = avg_log2FC)
head(topgene)
openxlsx::write.xlsx(topgene,file = "G:/silicosis/sicosis/silicosis_ST/yll/0113-3_2022_8-19_doheatmap/markers_to30p_forheatmap.xlsx")
openxlsx::write.xlsx(topgene,file = "G:/silicosis/sicosis/silicosis_ST/yll/0113-3_2022_8-19_doheatmap/markers_to50p_forheatmap.xlsx")
topgene
DoHeatmap(All, features = topgene$gene,size = 2) + NoLegend()
getwd()
jpeg("heatmap_20.jpg",width = 15,height = 13,units = 'in', res=400)
p=DoHeatmap(All, features = topgene$gene,size = 2)
print(p)
dev.off()
jpeg("heatmap_10.jpg",width = 13,height = 10,units = 'in', res=400)
p=DoHeatmap(All, features = topgene$gene,size = 2)
print(p)
dev.off()
getwd()
jpeg("heatmap_30.jpg",width = 15,height = 17,units = 'in', res=400)
p=DoHeatmap(All, features = topgene$gene,size = 2)
print(p)
dev.off()
###
topgene=openxlsx::read.xlsx("G:/silicosis/sicosis/silicosis_ST/yll/0113-3_2022_8-19_doheatmap/markers_to30p_forheatmap.xlsx")
head(topgene)
getwd()
save(All,file = "G:/silicosis/sicosis/silicosis_ST/yll/0113-3_2022_8-19_doheatmap/All_from_loupe_for_heatmap.rds")
load("G:/silicosis/sicosis/silicosis_ST/yll/0113-3_2022_8-19_doheatmap/All_from_loupe_for_heatmap.rds")