前言:当下基于高通量测序的10X Visium 空间转录组技术还达不到单细胞转录组的分辨率,所使用的 6.5 X 6.5mm的捕获区域包括5000个孔(spot),其每个spot的直径为55μm,远超单个细胞体积,导致测得的每个孔(spot)可能包含2-10个以上的细胞,这会导致特定位置的基因表达是同质或者异质细胞类型的混合状态。(10X Visium HD已与2024年初正式发布,可以实现空间单细胞分辨率,但离大规模使用还有一段时间)
因此,需要使用细胞类型去卷积(deconvolution)的分析工具去解析每个孔中的细胞类型组成,从而实现对细胞类型的空间定位。 现在此类分析工具出来很多,像SPOTlight、RCTD、CARD、Cell2location、STdeconvolve等。
本次实战3给大家介绍的是一款2022年发表在Nature Biotechnology杂志上的空转细胞类型去卷积软件——CARD
文章:Spatially informed cell type deconvolution for spatial transcriptomics
CARD分析步骤示意图
接实战2:
首先需要找到相同癌种的单细胞转录组数据,本次实战使用GEO单细胞数据,编号:GSE138794
下载该数据集下的GSM4119531— GSM4119535这5个单细胞转录组数据
①进行单细胞数据分析流程
##实战3:结合10X单细胞数据对空间转录组数据进行反卷积细胞注释
##加载R包
library(Seurat)
library(Matrix)
library(harmony)
library(hdf5r)
library(ggplot2)
library(data.table)
library(dplyr)
##GBM单细胞数据:GSM4119531,GSM4119532
setwd('E:/GSE138794/') #单细胞矩阵存放路径
dir = c("GSM4119531/","GSM4119532/","GSM4119533/","GSM4119534/","GSM4119535/")
names(dir) <- c('GSM4119531','GSM4119532','GSM4119533','GSM4119534','GSM4119535')
scRNAlist <- list()
#每个样本创建一个seurat对象,并存放到scRNAlist里
for(i in 1:length(dir)){
counts <- Read10X(data.dir = dir[i],gene.column = 1)
scRNAlist[[i]] <- CreateSeuratObject(counts, min.cells=3, min.features = 200)
}
#使用merge将两个单细胞合并成一个seurat对象
scRNA <- merge(scRNAlist[[1]], y = scRNAlist[2:5])
#查看各样本细胞数量
table(scRNA@meta.data$orig.ident)
#QC质控
scRNA[["mt_percent"]] <- PercentageFeatureSet(scRNA, pattern = "^MT-")
#可视化QC指标,用来过滤细胞
VlnPlot(scRNA, features = c("nFeature_RNA", "nCount_RNA", "mt_percent"), ncol = 3,raster=FALSE)
##数据质控参数,根据上一步小提琴结果来设定
minGene=200
maxGene=7500
mt=10
scRNA <- subset(scRNA, subset = nFeature_RNA > minGene & nFeature_RNA < maxGene & mt_percent < mt)
#可视化质控效果
VlnPlot(scRNA, features = c("nFeature_RNA", "nCount_RNA", "mt_percent"), ncol = 3,raster=FALSE)
##筛选高可变基因,并进行数据降维
scRNA <- NormalizeData(scRNA) %>%
FindVariableFeatures(selection.method = "vst",nfeatures = 3000) %>%
ScaleData() %>%
RunPCA(npcs = 30, verbose = T)
library(harmony)
###使用harmony进行多样本整合&批次矫正
scRNA <- RunHarmony(scRNA,reduction = "pca",group.by.vars = "orig.ident",reduction.save = "harmony")
#RUN UMAP
scRNA <- RunUMAP(scRNA, reduction = "harmony", dims = 1:30,reduction.name = "umap")
#查看去批次效果
DimPlot(scRNA, reduction = "umap",group.by = "orig.ident")
总体效果尚可,GSM4119535样本看起来有些特殊
# FindClusters
scRNA <- FindNeighbors(scRNA, reduction = "harmony", dims = 1:30) %>%
FindClusters(resolution = 0.6)
DimPlot(scRNA, reduction = "umap",group.by = "seurat_clusters",label = T)
#####手动注释:依据 原文marker,Dotplot展示各亚群marker
Marker = list('MES like'=c('CHI3L1', 'ADM'),
'AC like'=c('MLC1', 'HOPX'),
'NPC like' =c('CD24'),
'OPC like'=c('PDGFRA','OLIG1'),
Oligo=c('PTGDS','MBP'),
Mac=c('CD163', 'CD68'))
#气泡图展示marker在各cluster表达情况
Dot<-DotPlot(scRNA,features=Marker,cols = c('gray','red')) +
theme(
axis.text.x = element_text(angle = 90, hjust = 1)
)
Dot
#根据 上一步DotPlot结果,进行初步注释
scRNA@meta.data$celltype <- NA
scRNA@meta.data$celltype[scRNA@meta.data$seurat_clusters %in% c('0','5','6','13')] <- "OPC like"
scRNA@meta.data$celltype[scRNA@meta.data$seurat_clusters %in% c('1','7')] <- "Macrophage"
scRNA@meta.data$celltype[scRNA@meta.data$seurat_clusters %in% c('2','3','8','9')] <- "AC like"
scRNA@meta.data$celltype[scRNA@meta.data$seurat_clusters %in% c('4','11','14')] <- "Oligo"
scRNA@meta.data$celltype[scRNA@meta.data$seurat_clusters %in% c('3','8','10','12')] <- "MES like"
scRNA@meta.data$celltype[scRNA@meta.data$seurat_clusters %in% c('15')] <- "NPC like"
Idents(scRNA)<-scRNA$celltype
DimPlot(scRNA, reduction = "umap",label = T)
#保存单细胞分析结果
save(scRNA,file = 'scRNA.rdata')
初步将GBM 单细胞数据分为6种细胞类型,细胞注释过程仅供参考。
②进行 CRAD 空转反卷积分析流程
devtools::install_github('YingMa0107/CARD') #安装CARD
devtools::install_github('xuranw/MuSiC') #安装依赖包
###加载CARD包
library(CARD)
library(MuSiC)
library(Seurat)
library(patchwork)
library(tidyverse)
#载入实战2保存的 GBM4 空转数据
load('GBM4.rdata')
###获取空转的counts表达矩阵
spatial_count <- GBM4@assays$Spatial@counts
spatial_count[1:4,1:4]
###获取空转的空间位置矩阵
spatial_loca <- GBM4@images$GBM4@coordinates
spatial_location <- spatial_loca[,2:3]
#名字必须是x y ,不然后面CARD_deconvolution会报错
colnames(spatial_location) <- c("x","y")
spatial_location[1:4,]
# x y
#AAACAAGTATCTCCCA-1 50 102
#AAACACCAATAACTGC-1 59 19
#AAACAGAGCGACTCCT-1 14 94
#AAACAGGGTCTATATT-1 47 13
#载入实战3保存的 GBM-scRNA 单细胞数据
load('scRNA.rdata')
#获取单细胞counts矩阵
sc_count <- scRNA@assays$RNA@counts
#获取单细胞细胞注释矩阵
sc_meta <- scRNA@meta.data %>%
rownames_to_column("cellID") %>%
dplyr::select(cellID,orig.ident,celltype) %>%
mutate(CB = cellID) %>%
column_to_rownames("CB")
head(sc_meta)
head(sc_meta)
# cellID orig.ident celltype
#GSM4119531_AAACCTGAGTCAAGGC GSM4119531_AAACCTGAGTCAAGGC GSM4119531 MES like
#GSM4119531_AAACCTGTCAGGCAAG GSM4119531_AAACCTGTCAGGCAAG GSM4119531 Macrophage
#GSM4119531_AAACCTGTCCTGCCAT GSM4119531_AAACCTGTCCTGCCAT GSM4119531 OPC like
##构建CARD对象
CARD_obj = createCARDObject(
sc_count = sc_count,
sc_meta = sc_meta,
spatial_count = spatial_count,
spatial_location = spatial_location,
ct.varname = "celltype",
ct.select = unique(sc_meta$celltype), #细胞类型列名
sample.varname = "orig.ident")
## QC on scRNASeq dataset! ...
## QC on spatially-resolved dataset! ...
#CARD 解卷积
CARD_obj = CARD_deconvolution(CARD_object = CARD_obj)
## create reference matrix from scRNASeq...
## Select Informative Genes! ...
## Deconvolution Starts! ...
## Deconvolution Finish! ...
#CARD-spot 可视化spot的细胞类型分布饼图
colors = c("#4DAF4A","#F0027F","#377EB8","#FDC086","#A6761D","#FFFF00","#BEAED4",
"#BF5B17","#666666")
p1<-CARD.visualize.pie(proportion = CARD_obj@Proportion_CARD,
spatial_location = CARD_obj@spatial_location,
colors = colors)
p2<-SpatialPlot(GBM4,group.by = 'Region',cols = c('Normal'='#007799','Tumor'='#AA0000'))
p1+p2
左图反卷积结果显示,Tumor区域主要是OPC like细胞
选择一些感兴趣的细胞类型进行可视化,查看细胞类型比例的空间分布
#选择一些感兴趣的细胞类型分别进行可视化
ct.visualize = c("OPC like","AC like","MES like")
p3 <- CARD.visualize.prop(proportion = CARD_obj@Proportion_CARD,
spatial_location = CARD_obj@spatial_location,
ct.visualize = ct.visualize,
colors = c("lightblue","lightyellow","red"),
NumCols = 3,pointSize = 1)#图中spot大小
p3
图片向右旋转了90°,可以自行对CARD.visualize.prop函数进行更改。
#原始的基因表达
p4 <- CARD.visualize.gene(
spatial_expression = CARD_obj@spatial_countMat,
spatial_location = CARD_obj@spatial_location,
gene.visualize = c("SNAP25","SYT1","JUNB"),
colors = NULL,
NumCols =3)
p4
#保存 CARD 反卷积结果
save(CARD_obj,file = 'CARD_obj.rdata')
以上就是本次实战3的内容,下期将对ST空间转录组数据进行空间细胞通讯分析!!