倒数第二天啦辛苦花花
今天学习二次分群和细胞周期
二次分群可以对有意义的细胞进行深入探索(也可以单独针对某种细胞做monocle等下游呐
细胞周期主要完成步骤为这一步
marrow <- ScaleData(marrow, vars.to.regress = c("S.Score", "G2M.Score"), features = rownames(marrow))
思路是,先读取一个受到细胞周期影响严重的官方文档
用这个文档计算出来细胞周期相关的评分
然后和原数据比较,把这个影响regress掉
“Seurat does not use the discrete classifications (G2M/G1/S) in downstream cell cycle regression. Instead, it uses the quantitative scores for G2M and S phase. However, we provide our predicted classifications in case they are of interest.”
Seurat并不是武断的定义所有周期(非黑即白)而是计算出一个连续的评分,用于计算
#单样本细胞周期
rm(list = ls())
library(tidyverse)
library(Seurat)
ct = Read10X("input/")
seu.obj <- CreateSeuratObject(counts = ct,
min.cells = 3,
min.features = 200)
set.seed(1234)
seu.obj = subset(seu.obj,downsample = 3000)
seu.obj[["percent.mt"]] <- PercentageFeatureSet(seu.obj, pattern = "^MT-")
VlnPlot(seu.obj,
features = c("nFeature_RNA",
"nCount_RNA",
"percent.mt"),
ncol = 3,pt.size = 0.5)
seu.obj = subset(seu.obj,
nFeature_RNA < 6000 &
nCount_RNA < 30000 &
percent.mt < 18)
#读取细胞周期数据
exp.mat <- read.delim("nestorawa_forcellcycle_expressionMatrix.txt",row.names = 1)
marrow <- CreateSeuratObject(counts = exp.mat,
project = "b",
min.cells = 3,
min.features = 200)
marrow[["percent.mt"]] <- PercentageFeatureSet(marrow, pattern = "^MT-")
head(marrow@meta.data, 3)
VlnPlot(marrow,
features = c("nFeature_RNA",
"nCount_RNA",
"percent.mt"),
ncol = 3,pt.size = 0.5)
#计算细胞周期评分CellCycleScoring
check_cc = function(ob){
s.genes <- intersect(cc.genes$s.genes,rownames(ob))
g2m.genes <- intersect(cc.genes$g2m.genes,rownames(ob))
ob = ob %>%
NormalizeData() %>%
FindVariableFeatures() %>%
CellCycleScoring(s.features = s.genes,
g2m.features = g2m.genes) %>%
ScaleData(features = rownames(.)) %>%
RunPCA(features = c(s.genes,g2m.genes))
return(ob)
}
ch1 = check_cc(seu.obj)
head(ch1@meta.data)
#比较两个数据的细胞周期评分和PCA
table(ch1$Phase)
ch2 = check_cc(marrow)
table(ch2$Phase)
PCAPlot(ch1,group.by = "Phase")+ PCAPlot(ch2,group.by = "Phase")
##xlim和ylim是设置横纵坐标范围
library(patchwork)
PCAPlot(ch1,group.by = "Phase")+
PCAPlot(ch2,group.by = "Phase")&
xlim(-60,15)&
ylim(-10,15)
p1 = VlnPlot(ch1,"S.Score",group.by = "Phase")
p2 = VlnPlot(ch2,"S.Score",group.by = "Phase")
wrap_plots(p1,p2,nrow = 1) & ylim(-0.6,0.6)
p1 = VlnPlot(ch1,"G2M.Score",group.by = "Phase")
p2 = VlnPlot(ch2,"G2M.Score",group.by = "Phase")
wrap_plots(p1,p2,nrow = 1) & ylim(-0.5,1)
#比较去除和不去除细胞周期影响的下游注释
##不去除
f = "ob1.Rdata"
if(!file.exists(f)){
ob1 = seu.obj %>%
NormalizeData() %>%
FindVariableFeatures() %>%
ScaleData(features = rownames(.)) %>%
RunPCA(features = VariableFeatures(.)) %>%
FindNeighbors(dims = 1:15) %>%
FindClusters(resolution = 0.5) %>%
RunUMAP(dims = 1:15) %>%
RunTSNE(dims = 1:15)
save(ob1,file = f)
}
load(f)
#去除
s.genes <- intersect(cc.genes$s.genes,rownames(seu.obj))
g2m.genes <- intersect(cc.genes$g2m.genes,rownames(seu.obj))
f = "ob2.Rdata"
if(!file.exists(f)){
ob2 = seu.obj %>%
NormalizeData() %>%
FindVariableFeatures() %>%
CellCycleScoring(s.features = s.genes, g2m.features = g2m.genes) %>%
ScaleData(vars.to.regress = c("S.Score", "G2M.Score"),features = rownames(.)) %>% #运行极其慢
RunPCA(features = VariableFeatures(.)) %>%
FindNeighbors(dims = 1:15) %>%
FindClusters(resolution = 0.5) %>%
RunUMAP(dims = 1:15) %>%
RunTSNE(dims = 1:15)
save(ob2,file = f)
}
load(f)
p1 <- DimPlot(ob1, reduction = "umap",label = T)+NoLegend()
p2 <- DimPlot(ob2, reduction = "umap",label = T)+NoLegend()
p1+p2
#用singleR来注释
library(celldex)
library(SingleR)
ls("package:celldex")
f = "ref_BlueprintEncode.RData"
if(!file.exists(f)){
ref <- celldex::HumanPrimaryCellAtlasData()
save(ref,file = f)
}
ref <- get(load(f))
library(BiocParallel)
scRNA = ob1
test = scRNA@assays$RNA$data
pred.scRNA <- SingleR(test = test,
ref = ref,
labels = ref$label.main,
clusters = scRNA@active.ident)
new.cluster.ids <- pred.scRNA$pruned.labels
names(new.cluster.ids) <- levels(scRNA)
scRNA <- RenameIdents(scRNA,new.cluster.ids)
p3 <- DimPlot(scRNA, reduction = "umap",label = T,pt.size = 0.5) + NoLegend()
m = scRNA
scRNA = ob2
test = scRNA@assays$RNA$data
pred.scRNA <- SingleR(test = test,
ref = ref,
labels = ref$label.main,
clusters = scRNA@active.ident)
new.cluster.ids <- pred.scRNA$pruned.labels
names(new.cluster.ids) <- levels(scRNA)
scRNA <- RenameIdents(scRNA,new.cluster.ids)
p4 <- DimPlot(scRNA, reduction = "umap",label = T,pt.size = 0.5) + NoLegend()
p3+p4
table(as.character(Idents(m))==as.character(Idents(scRNA)))