day6 marker基因和注释（from 花花）_if(!file.exists(f)){ + ref <- celldex::databaseimm-CSDN博客

本文链接：https://blog.csdn.net/keyanrenzhe/article/details/140405036

1.加载数据

加载昨日跑的数据

rm(list = ls())
library(Seurat)
load("../day5/obj.Rdata")
p1 = DimPlot(seu.obj,reduction = "umap",label = T)
p1

2.markergene

marker是指某个基因在某一簇细胞里表达量都很高，在其他簇表达量很低，这个基因是这簇细胞的特征基因。

library(dplyr)
f = "markers.Rdata"
if(!file.exists(f)){
  markers <- FindAllMarkers(seu.obj, only.pos = TRUE,min.pct = 0.25)
  save(markers,file = f)
}
load(f)

3.取每个细胞簇logFC排名前n的marker基因

mks = markers %>% group_by(cluster) %>% top_n(n = 2, wt = avg_log2FC)
g = unique(mks$gene)

n=2就是每个簇取前2，n= 6就是每个簇取前6

4.markergene的五种可视化

（1）

library(ggplot2)
DoHeatmap(seu.obj, features = g) + NoLegend()+
  scale_fill_gradientn(colors = c("#2fa1dd", "white", "#f87669"))

热图的一行是一个基因，一列是一个细胞（按簇排列），红色代表高表达。

（2）

气泡图，红色代表高表达,气泡的大小代表基因在某一簇里的表达比例

DotPlot(seu.obj, features = g,cols = "RdYlBu") +
  RotatedAxis()

（3）

VlnPlot(seu.obj, features = g[1:2])

（4）

FeaturePlot(seu.obj, features = g[1:4])

（5）

RidgePlot(seu.obj, features = g[1:2])

5.注释亚群

（1）手动注释

writeLines(readLines("my_markers.txt"))
a = read.table("my_markers.txt",sep = ",")
gt = split(a[,2],a[,1])

DotPlot(seu.obj, features = gt,cols = "RdYlBu") +
  RotatedAxis()

unique(a$V1)  #列出细胞类型，方便后面复制

writeLines(paste0(0:11,","))

新建一个txt，把上面代码输出的结果复制粘贴到txt里面，然后照着Dotplot来填空，保存为anno.txt即可。

celltype = read.table("anno.txt",sep = ",") 
celltype

levels(Idents(seu.obj))

new.cluster.ids <- celltype$V2
names(new.cluster.ids) <- levels(seu.obj)
sce <- RenameIdents(seu.obj, new.cluster.ids)
save(sce,file = "sce.Rdata")
p2 <- DimPlot(sce, reduction = "umap", 
              label = TRUE, pt.size = 0.5) + NoLegend()
p1+p2

（2）自动注释

library(celldex)
library(SingleR)
ls("package:celldex")

## [1] "BlueprintEncodeData"              "DatabaseImmuneCellExpressionData"
## [3] "HumanPrimaryCellAtlasData"        "ImmGenData"                      
## [5] "MonacoImmuneData"                 "MouseRNAseqData"                 
## [7] "NovershternHematopoieticData"

除了4和6是小鼠的，其他都是人类的。

f = "ref_BlueprintEncode.RData"
if(!file.exists(f)){
  ref <- celldex::BlueprintEncodeData()
  save(ref,file = f)
}
ref <- get(load(f))

library(BiocParallel)
scRNA = seu.obj
test = scRNA@assays$RNA$data
pred.scRNA <- SingleR(test = test, 
                      ref = ref,
                      labels = ref$label.main, 
                      clusters = scRNA@active.ident)
pred.scRNA$pruned.labels


new.cluster.ids <- pred.scRNA$pruned.labels
names(new.cluster.ids) <- levels(scRNA)
scRNA <- RenameIdents(scRNA,new.cluster.ids)
p3 <- DimPlot(scRNA, reduction = "umap",label = T,pt.size = 0.5) + NoLegend()
p2+p3