紧接着UMAP visualization 测试练手版-CSDN博客进行后续操作!
本文仅供本人练习使用!下面所有代码都进行了较大幅度的修正!
https://mp.weixin.qq.com/s/t6-h48XSCWTkhnfL693eDw
单细胞专题 | Seurat V5一行代码去除单细胞测序批次效应_哔哩哔哩_bilibili
# 加载包
library(dplyr)
library(Seurat)
library(patchwork)
# 设置路径,导入文件
setwd("/data3/shenzq/test")
# 加载seurat对象
seurat_cluster <- readRDS("GSE242889_cluster.rds")
## 查看先前的操作是否存在批次效应,按照样本分开展示umap图,查看“在1个claster中6个样本都有一定的占比+在1个claster中样本的来源单一=>存在批次效应”
colors <- c("#8DD3C7", "#FFFFB3", "#BEBADA", "#FB8072", "#80B1D3", "#FDB462", "#B3DE69", "#FCCDE5", "#D9D9D9", "#BC80BD", "#CCEBC5", "#FFED6F", "#1F78B4", "#33A02C")
sample_colors <- c("#FF6F61", "#6B5B95", "#70C1B3", "#FFBE0B", "#FF85A1", "#A05195")
p <- DimPlot(seurat_cluster, reduction = "umap", group.by = "orig.ident",cols = sample_colors)
p
若有批次效应,则需要从头开始操作
setwd("/data3/shenzq/test")
# 加载
seurat <- readRDS("GSE242889.rds")
## 质控(与之前操作一样)
# [[ 符号可以向Seurat的metadata中添加列。把用于质控数据放进去
### 如果是小鼠要改成"^mt-"
seurat[["percent.mt"]] <- PercentageFeatureSet(seurat, pattern = "^MT-")
# 按标准筛选细胞
seurat <- subset(seurat, subset = nFeature_RNA > 200 & nFeature_RNA < 4000 & percent.mt < 20 & nCount_RNA < 30000)
## Normalize data
seurat <- NormalizeData(seurat, normalization.method = "LogNormalize", scale.factor = 10000)
## 识别高变基因(筛选基因)
seurat<- FindVariableFeatures(seurat, selection.method = "vst", nfeatures = 2000)
## scale data
all.genes <- rownames(seurat)
seurat <- ScaleData(seurat, features = all.genes)
##线性降维
seurat_pca <- RunPCA(seurat, features = VariableFeatures(object = seurat))
开始去除批次效应
方法一:Seurat V4去除批次harmony
## 安装harmony包
# install.packages("devtools")
# devtools::install_github("immunogenomics/harmony")
library(harmony)
##harmony
# RunHarmony,group.by.vars样本的来源,即“你认为谁与谁之间造成了批次效应”,若认为样本与样本间都有批次效应,则填写orig.ident(每一个细胞所属的样本,样本来源)
seurat_harmony_v4 <-RunHarmony(seurat_pca,group.by.vars = "orig.ident")
# 降维方式 reduction='harmony'
seurat_harmony_v4 <-FindNeighbors(object=seurat_harmony_v4, reduction='harmony', dims=1:20)
# FindClusters可对resolution进行修改
seurat_harmony_v4 <-FindClusters(object= seurat_harmony_v4, resolution=0.1)
# RunUMAP,reduction = "harmony"
seurat_harmony_v4 <- RunUMAP(seurat_harmony_v4,reduction = "harmony",dims = 1:20)
# RunTSNE,reduction = "harmony"
#seurat_harmony_v4 <- RunTSNE(seurat_harmony_v4,reduction = "harmony",dims = 1:20)
# 查看去除批次效应之后,细胞混合情况
## 原始情况展示
p1 <- DimPlot(seurat_harmony_v4,reduction = "umap")
## 按样本分开展示
p2 <- DimPlot(seurat_harmony_v4,reduction = "umap",group.by = "orig.ident")
## 换个颜色
p3 <- DimPlot(seurat_harmony_v4,reduction = "umap",cols = colors)
p4 <- DimPlot(seurat_harmony_v4,reduction = "umap",group.by = "orig.ident",cols = sample_colors)
# 可以看出不同样本之间的细胞混合得不错,每个cluster中的细胞都是来自多个样本的,去除批次成功!
p3+p4
方法二:Seurat V5一行代码去批次
用Seurat V5中新增的IntegrateLayers函数,可以实现一行代码去批次,当前支持以下几种主流的单细胞去批次,其中包含了我们上面用到的harmony。
Anchor-based CCA integration (method=CCAIntegration)
Anchor-based RPCA integration (method=RPCAIntegration)
harmony (method=HarmonyIntegration)
FastMNN (method= FastMNNIntegration)
前面的分析流程还是一样的,跑完RunPCA这一步后,去除批次效应,上面我们把PCA过后的seurat对象保存为seurat_pca,下面直接展示去除批次的部分。
# 1.CCA
## IntegrateLayers,object = seurat_pca做完pca之后,orig.reduction = "pca", new.reduction = "cca"
seurat_merge_v5 <- IntegrateLayers(
object = seurat_pca, method = CCAIntegration,
orig.reduction = "pca", new.reduction = "cca",
verbose = FALSE
)
# 2.RPCA
## IntegrateLayers,new.reduction = "rpca"
seurat_merge_v5 <- IntegrateLayers(
object = seurat_merge_v5, method = RPCAIntegration,
orig.reduction = "pca", new.reduction = "rpca",
verbose = FALSE
)
# 3.harmony
## IntegrateLayers,new.reduction = "harmony"
seurat_merge_v5 <- IntegrateLayers(
object = seurat_merge_v5, method = HarmonyIntegration,
orig.reduction = "pca", new.reduction = "harmony",
verbose = FALSE
)
#remotes::install_github("satijalab/seurat-wrappers")
#BiocManager::install('batchelor')
#devtools::install_github("satijalab/seurat-data", force = TRUE)
library(SeuratData)
library(batchelor)
library(SeuratWrappers)
# 4.FastMNN
seurat_merge_v5 <- IntegrateLayers(
object = seurat_merge_v5, method = FastMNNIntegration,
new.reduction = "mnn",
verbose = FALSE
)
去批次结果可视化
#####CCA可视化######
seurat_merge_v5 <- FindNeighbors(seurat_merge_v5, reduction = "cca", dims = 1:20)
seurat_merge_v5 <- FindClusters(seurat_merge_v5, resolution = 0.1, cluster.name = "cca_clusters")
## RunUMAP, reduction = "cca",reduction.name = "umap.cca"
seurat_merge_v5 <- RunUMAP(seurat_merge_v5, reduction = "cca",
dims = 1:20,
reduction.name = "umap.cca")
cc <- list(c(color,sample_colors))
p5 <- DimPlot(
seurat_merge_v5,
reduction = "umap.cca",
group.by = c("cca_clusters"),cols =colors)+
DimPlot(
seurat_merge_v5,
reduction = "umap.cca",
group.by = c("orig.ident"),cols =sample_colors)
p5
#####RPCA可视化######
seurat_merge_v5 <- FindNeighbors(seurat_merge_v5, reduction = "rpca", dims = 1:20)
seurat_merge_v5 <- FindClusters(seurat_merge_v5, resolution = 0.1, cluster.name = "rpca_clusters")
seurat_merge_v5 <- RunUMAP(seurat_merge_v5, reduction = "rpca",
dims = 1:20,
reduction.name = "umap.rpca")
p6 <- DimPlot(
seurat_merge_v5,
reduction = "umap.rpca",
group.by = c("rpca_clusters"),cols =colors)+
DimPlot(
seurat_merge_v5,
reduction = "umap.rpca",
group.by = c("orig.ident"),cols =sample_colors)
p6
#####harmony可视化######
seurat_merge_v5 <- FindNeighbors(seurat_merge_v5, reduction = "harmony", dims = 1:20)
seurat_merge_v5 <- FindClusters(seurat_merge_v5, resolution = 0.1, cluster.name = "harmony_clusters")
seurat_merge_v5 <- RunUMAP(seurat_merge_v5, reduction = "harmony",
dims = 1:20,
reduction.name = "umap.harmony")
p7 <- DimPlot(
seurat_merge_v5,
reduction = "umap.harmony",
group.by = c("harmony_clusters"),cols =colors)+
DimPlot(
seurat_merge_v5,
reduction = "umap.harmony",
group.by = c("orig.ident"),cols =sample_colors)
p7
#####FastMNN可视化########
seurat_merge_v5 <- FindNeighbors(seurat_merge_v5, reduction = "mnn", dims = 1:20)
seurat_merge_v5 <- FindClusters(seurat_merge_v5, resolution = 0.1, cluster.name = "mnn_clusters")
seurat_merge_v5 <- RunUMAP(seurat_merge_v5, reduction = "mnn",
dims = 1:20,
reduction.name = "umap.mnn")
p8 <- DimPlot(
seurat_merge_v5,
reduction = "umap.mnn",
group.by = c("mnn_clusters"),cols =colors)+
DimPlot(
seurat_merge_v5,
reduction = "umap.mnn",
group.by = c("orig.ident"),cols =sample_colors)
p8
以上部分完成了去除批次效应,接下来进行细胞类型注释!