这里不介绍安装,直接介绍seurat如何结合Slingshot进行分析,想要看从头的,可以查看R包官网说明
Slingshot: Trajectory Inference for Single-Cell Data
做轨迹分析前已经有一个经过质控、降维、聚类并且完成注释的 seurat
对象
1、slingshot
接收的是 sce
对象,因此我们首先要构建一个
library(SingleCellExperiment)
library(slingshot)
#-----------------------------------step1 构建SingleCellExperiment对象
#导入seurat对象
load('seurat.Rdata')
#提取标化完后的数据以及细胞名称(一般是2000HVG)
scale.data <- scobj@assays$RNA@scale.data
scale.gene <- rownames(scale.data)
#提取原始count值(2000HVG)
counts <- scobj@assays$RNA@counts
counts <- counts[scale.gene,]
# 将表达矩阵转换为SingleCellExperiment对象
sim <- SingleCellExperiment(assays = List(counts = counts))
#----------------------------------step2 把seurat的umap信息加入SingleCellExperiment对象
# umap reduction
umap = scobj@reductions$umap@cell.embeddings
colnames(umap) = c('UMAP-1', 'UMAP-2')
# 将降维的结果添加到SingleCellExperiment对象中
reducedDims(sim) = SimpleList(UMAP = umap)
rd = umap
plot(rd,pch=16, asp = 1)
# metadata
meta = scobj@meta.data
# colData(sim)相当于meta.data,但他不是data.frame格式
# 所以需要分步赋予
colData(sim)$orig.ident = meta$orig.ident
colData(sim)$celltype = meta$celltype
#----------------------------------step3 使用slingshot构建分化谱系并进行拟时推断
sim <- slingshot(sim,
clusterLabels = 'celltype', # 选择colData中细胞注释的列名
reducedDim = 'UMAP',
start.clus= "8cell", # 选择起点
end.clus = NULL # 这里我们不指定终点
)
colnames(colData(sim))
可以根据列名,看到具体有几个时序的变化
#-------------------------------step4 可视化
#加入颜色
colors <- colorRampPalette(brewer.pal(11,'Spectral')[-6])(100) # 我们把这些颜色变成100个梯度,模拟渐变色
plotcol <- colors[cut(sim$slingPseudotime_3, breaks=100)] # 这里我们用cut函数把 lineage2 分割成100个区间,同一区间的细胞给与同一个颜色
plotcol[is.na(plotcol)] <- "lightgrey" # 不属于 lineage3 的点为NA,我们把他们变成灰色
plotcol
sim@colData$plotcol<-plotcol
plot(reducedDims(sim)$UMAP, col = plotcol, pch=16, asp = 1)
lines(SlingshotDataSet(sim), lwd=2, col=brewer.pal(9,"Set1"))
legend("right",
legend = paste0("lineage",1:6),
col = unique(brewer.pal(6,"Set1")),
inset=0.8,
pch = 16)
#-----------------------------------step5 tradeSeq 下游分析
library(tradeSeq)
# Fit negative binomial model
counts <- sim@assays@data$counts
crv <- SlingshotDataSet(sim)
#拟合负二项式模型 需要决定结的数量
set.seed(111)
icMat <- evaluateK(counts = counts,
sds = crv,
k = 3:10, # no more than 12
nGenes = 500,
verbose = T)
set.seed(111)
pseudotime <- slingPseudotime(crv, na = FALSE)
cellWeights <- slingCurveWeights(crv)
# fit negative binomial GAM
# 2k cells ~13 min
# system.time()这个函数可以计算运行时间
sce <- fitGAM(counts = counts,
pseudotime = pseudotime,
cellWeights = cellWeights,
nknots = 6,
verbose = FALSE)
#探索基因表达与拟时序的相关性
assoRes <- associationTest(sce)
head(assoRes)
#寻找与起止点相关性最高的基因
startRes <- startVsEndTest(sce)
head(startRes)
# 按相关性进行排序
oStart <- order(startRes$waldStat, decreasing = TRUE)
# 挑选相关性最强的基因,并可视化
sigGeneStart <- names(sce)[oStart[1]]
plotSmoothers(sce, counts, gene = sigGeneStart)
#--------------美化基因表达量变化图
# 取celltpye和配色信息
coldata <- data.frame(celltype = sim@colData$celltype,
plotcol = sim@colData$plotcol)
rownames(coldata) = colnames(sim)
# 把sce中的3000个细胞对应信息取出
filter_coldata <- coldata[colnames(sce),]
# 添加拟时序信息
filter_coldata$Pseudotime = sce$crv$pseudotime.Lineage1
# top6 genes
top6 <- names(sce)[oStart[1:6]]
top6_exp = sce@assays@data$counts[top6,]
top6_exp = log2(top6_exp + 1) %>% t()
# 获得最终数据
plt_data = cbind(filter_coldata, top6_exp)
colnames(plt_data)
#画图
#画图
library(ggplot2)
library(ggsci)
library(ggpubr)
library(RColorBrewer)
getPalette = colorRampPalette(brewer.pal(12, "Paired"))
mycolors = getPalette(length(unique(top6)))
# 为了拼图美观,我们把legend隐藏掉
plt_list = list()
for (gene in top6) {
print(gene)
p = ggscatter(data = plt_data,
x = 'Pseudotime',
y = gene,
color = 'celltype',
size = 0.6)+
geom_smooth(se = F, color = 'orange')+
theme_bw()+
scale_color_manual(values = mycolors)+
theme(legend.position = 'none')
plt_list[[gene]] = p
}
library(patchwork)
wrap_plots(plt_list)