library(dplyr)
f = "markers.Rdata"
if(!file.exists(f)){
markers <- FindAllMarkers(seu.obj, only.pos = TRUE,min.pct = 0.25)
save(markers,file = f)
}
#min.pct这个参数值的意思是基因至少要在25%的细胞中表达。
load(f)
#取每个细胞簇logFC排名前n的marker基因
mks = markers %>% group_by(cluster) %>% top_n(n = 2, wt = avg_log2FC)
#n=2就是每个簇取前2,n= 6就是每个簇取前6
#很有可能有重复,就是有一些基因在不同簇中都被计为marker
#但有重复的话后面画图就会出错,所以保险起见都加上unique(),去除重复
g = unique(mks$gene)
#markergene的五种可视化
#热图
library(ggplot2)
DoHeatmap(seu.obj, features = g) + NoLegend()+
scale_fill_gradientn(colors = c("#2fa1dd", "white", "#f87669"))
#气泡图
DotPlot(seu.obj, features = g,cols = "RdYlBu") +
RotatedAxis()
#单个基因的图
VlnPlot(seu.obj, features = g[1:2])
FeaturePlot(seu.obj, features = g[1:4])
#RidgePlot(seu.obj, features = g[1:2]) #峰峦图,用得少
#注释亚群
##手动注释
#手动注释要检查是否合理,比如T细胞和NK细胞应该是在一起的,B细胞应该是单独的。
writeLines(readLines("my_markers.txt"))
##将"my_markers.txt"文件的内容原样读入,并给我看一下
a = read.table("my_markers.txt",sep = ",")
gt = split(a[,2],a[,1]) #拆分为列表的格式(split)
DotPlot(seu.obj, features = gt,cols = "RdYlBu") +
RotatedAxis()
unique(a$V1) #用来列出细胞名
writeLines(paste0(0:11,",")) #把这些结果放到anno.txt里,开始手动注释
#注释完成
celltype = read.table("anno.txt",sep = ",")
celltype
new.cluster.ids <- celltype$V2
names(new.cluster.ids) <- levels(seu.obj)
sce <- RenameIdents(seu.obj, new.cluster.ids)
save(sce,file = "sce.Rdata")
p2 <- DimPlot(sce, reduction = "umap",
label = TRUE, pt.size = 0.5) + NoLegend()
p1+p2 #把图放到一起
##自动注释
library(celldex)
library(SingleR)
ls("package:celldex")
f = "ref_BlueprintEncode.RData"
if(!file.exists(f)){
ref <- celldex::BlueprintEncodeData()
save(ref,file = f)
}
ref <- get(load(f))
library(BiocParallel)
scRNA = seu.obj #防止替换出错
test = scRNA@assays$RNA$data
pred.scRNA <- SingleR(test = test,
ref = ref,
labels = ref$label.main,
clusters = scRNA@active.ident)
pred.scRNA$pruned.labels
new.cluster.ids <- pred.scRNA$pruned.labels
names(new.cluster.ids) <- levels(scRNA)
scRNA <- RenameIdents(scRNA,new.cluster.ids)
p3 <- DimPlot(scRNA, reduction = "umap",label = T,pt.size = 0.5) + NoLegend()
p2+p3