单细胞学习小组-D9

Lulu是小虎

已于 2024-06-23 21:49:43 修改

阅读量283

点赞数 8

分类专栏：花花的学习小组文章标签：学习

于 2024-06-22 23:13:09 首次发布

本文链接：https://blog.csdn.net/weixin_46817132/article/details/139889756

版权

花花的学习小组专栏收录该内容

7 篇文章 0 订阅

订阅专栏

倒数第二天啦辛苦花花

今天学习二次分群和细胞周期

二次分群可以对有意义的细胞进行深入探索（也可以单独针对某种细胞做monocle等下游呐

细胞周期主要完成步骤为这一步

marrow <- ScaleData(marrow, vars.to.regress = c("S.Score", "G2M.Score"), features = rownames(marrow))

思路是，先读取一个受到细胞周期影响严重的官方文档

用这个文档计算出来细胞周期相关的评分

然后和原数据比较，把这个影响regress掉

“Seurat does not use the discrete classifications (G2M/G1/S) in downstream cell cycle regression. Instead, it uses the quantitative scores for G2M and S phase. However, we provide our predicted classifications in case they are of interest.”

Seurat并不是武断的定义所有周期（非黑即白）而是计算出一个连续的评分，用于计算

#单样本细胞周期
rm(list = ls())
library(tidyverse)
library(Seurat)
ct = Read10X("input/")
seu.obj <- CreateSeuratObject(counts = ct, 
                              min.cells = 3, 
                              min.features = 200)
set.seed(1234)
seu.obj = subset(seu.obj,downsample = 3000)
seu.obj[["percent.mt"]] <- PercentageFeatureSet(seu.obj, pattern = "^MT-")
VlnPlot(seu.obj, 
        features = c("nFeature_RNA",
                     "nCount_RNA", 
                     "percent.mt"), 
        ncol = 3,pt.size = 0.5)
seu.obj = subset(seu.obj,
                 nFeature_RNA < 6000 &
                   nCount_RNA < 30000 &
                   percent.mt < 18)

#读取细胞周期数据
exp.mat <- read.delim("nestorawa_forcellcycle_expressionMatrix.txt",row.names = 1)
marrow <- CreateSeuratObject(counts = exp.mat,
                             project = "b",
                             min.cells = 3, 
                             min.features = 200)
marrow[["percent.mt"]] <- PercentageFeatureSet(marrow, pattern = "^MT-")
head(marrow@meta.data, 3)

VlnPlot(marrow, 
        features = c("nFeature_RNA",
                     "nCount_RNA", 
                     "percent.mt"), 
        ncol = 3,pt.size = 0.5)

#计算细胞周期评分CellCycleScoring
check_cc =  function(ob){
  s.genes <- intersect(cc.genes$s.genes,rownames(ob))
  g2m.genes <- intersect(cc.genes$g2m.genes,rownames(ob))
  ob = ob %>% 
    NormalizeData() %>%  
    FindVariableFeatures() %>%  
    CellCycleScoring(s.features = s.genes, 
                     g2m.features = g2m.genes) %>%
    ScaleData(features = rownames(.)) %>%  
    RunPCA(features = c(s.genes,g2m.genes))
  return(ob)
}
ch1 = check_cc(seu.obj)
head(ch1@meta.data)

#比较两个数据的细胞周期评分和PCA
table(ch1$Phase)
ch2 = check_cc(marrow)
table(ch2$Phase)
PCAPlot(ch1,group.by = "Phase")+ PCAPlot(ch2,group.by = "Phase")
##xlim和ylim是设置横纵坐标范围
library(patchwork)
PCAPlot(ch1,group.by = "Phase")+ 
  PCAPlot(ch2,group.by = "Phase")&
  xlim(-60,15)&
  ylim(-10,15)

p1 = VlnPlot(ch1,"S.Score",group.by = "Phase")
p2 = VlnPlot(ch2,"S.Score",group.by = "Phase")
wrap_plots(p1,p2,nrow = 1) & ylim(-0.6,0.6)

p1 = VlnPlot(ch1,"G2M.Score",group.by = "Phase")
p2 = VlnPlot(ch2,"G2M.Score",group.by = "Phase")
wrap_plots(p1,p2,nrow = 1) & ylim(-0.5,1)

#比较去除和不去除细胞周期影响的下游注释
##不去除
f = "ob1.Rdata"
if(!file.exists(f)){
  ob1 = seu.obj %>% 
    NormalizeData() %>%  
    FindVariableFeatures() %>%  
    ScaleData(features = rownames(.)) %>%  
    RunPCA(features = VariableFeatures(.))  %>%
    FindNeighbors(dims = 1:15) %>% 
    FindClusters(resolution = 0.5) %>% 
    RunUMAP(dims = 1:15) %>% 
    RunTSNE(dims = 1:15)
  save(ob1,file = f)
}
load(f)
#去除
s.genes <- intersect(cc.genes$s.genes,rownames(seu.obj))
g2m.genes <- intersect(cc.genes$g2m.genes,rownames(seu.obj))
f = "ob2.Rdata"
if(!file.exists(f)){
  ob2 = seu.obj %>% 
    NormalizeData() %>%  
    FindVariableFeatures() %>%  
    CellCycleScoring(s.features = s.genes, g2m.features = g2m.genes) %>%
    ScaleData(vars.to.regress = c("S.Score", "G2M.Score"),features = rownames(.)) %>%  #运行极其慢
    RunPCA(features = VariableFeatures(.))  %>%
    FindNeighbors(dims = 1:15) %>% 
    FindClusters(resolution = 0.5) %>% 
    RunUMAP(dims = 1:15) %>% 
    RunTSNE(dims = 1:15)
  save(ob2,file = f)
}
load(f)
p1 <- DimPlot(ob1, reduction = "umap",label = T)+NoLegend()
p2 <- DimPlot(ob2, reduction = "umap",label = T)+NoLegend()
p1+p2

#用singleR来注释
library(celldex)
library(SingleR)
ls("package:celldex")
f = "ref_BlueprintEncode.RData"
if(!file.exists(f)){
  ref <- celldex::HumanPrimaryCellAtlasData()
  save(ref,file = f)
}
ref <- get(load(f))
library(BiocParallel)
scRNA = ob1
test = scRNA@assays$RNA$data
pred.scRNA <- SingleR(test = test, 
                      ref = ref,
                      labels = ref$label.main, 
                      clusters = scRNA@active.ident)
new.cluster.ids <- pred.scRNA$pruned.labels
names(new.cluster.ids) <- levels(scRNA)
scRNA <- RenameIdents(scRNA,new.cluster.ids)
p3 <- DimPlot(scRNA, reduction = "umap",label = T,pt.size = 0.5) + NoLegend()
m = scRNA

scRNA = ob2
test = scRNA@assays$RNA$data
pred.scRNA <- SingleR(test = test, 
                      ref = ref,
                      labels = ref$label.main, 
                      clusters = scRNA@active.ident)
new.cluster.ids <- pred.scRNA$pruned.labels
names(new.cluster.ids) <- levels(scRNA)
scRNA <- RenameIdents(scRNA,new.cluster.ids)
p4 <- DimPlot(scRNA, reduction = "umap",label = T,pt.size = 0.5) + NoLegend()
p3+p4

table(as.character(Idents(m))==as.character(Idents(scRNA)))

Lulu是小虎

关注

8
点赞
踩
2

收藏

觉得还不错? 一键收藏
0
评论
单细胞学习小组-D9

二次分群可以对有意义的细胞进行深入探索（也可以单独针对某种细胞做monocle等下游呐。Seurat并不是武断的定义所有周期（非黑即白）而是计算出一个连续的评分，用于计算。思路是，先读取一个受到细胞周期影响严重的官方文档。然后和原数据比较，把这个影响regress掉。用这个文档计算出来细胞周期相关的评分。细胞周期主要完成步骤为这一步。今天学习二次分群和细胞周期。倒数第二天啦辛苦花花。
复制链接

扫一扫