基于单细胞marker gene数据库富集分数的细胞群注释方法

38 篇文章 7 订阅
12 篇文章 0 订阅
#### 注释细胞群(自研方法) ###
library(readxl)
anno = read_xlsx('/public/user/huguang/SC_ref/markers/PCMDB_all_maker_info.xlsx',sheet = 3)
head(anno)
anno2 = anno[,c('Gene_id','third','first','second')]
head(anno2)
trans = read.table('../rice.trans.graph.xls',col.names = c('gene_name','Gene_id'))
anno3 = merge(trans,anno2)

# markers <- markers[markers$p_val_adj < 0.05,]
# markers_filt <- markers[markers$p_val_adj < 0.05 & abs(markers$avg_log2FC) >1,]
markers_filt <- read.table('A_cluster.Markers_specific_info.xls',header = T)

markers_filt_anno = merge(markers_filt,anno3, by.x = 'gene',by.y = 'gene_name')
matr = table(markers_filt_anno$cluster,str_split(markers_filt_anno$third,pattern = '-',simplify = T)[,2])
# total = table(str_split(markers_filt_anno$third,pattern = '-',simplify = T)[,2]) # 计算背景值
# matr2 = matr/data.frame(total)$Freq  # 计算富集分数(错误)
# matr2 = t(t(matr)/data.frame(total)$Freq) # 计算富集分数(正确)

matr2 = apply(matr,2,function(x) x/sum(x)) # 计算富集分数(正确)


pheatmap::pheatmap(matr2,scale = 'row',filename='anno_matrix.png',width = 8,height = 7)
write.table(matr2,'anno_matrix.xls',sep = '\t',quote = F,col.names = NA)

# dat = data.frame(matr2)
# dat=as.data.frame(melt(matr2))
# dat_2 = dat %>% group_by(Var2) %>% top_n(n = 1, wt = Freq)
# 
# dat_2[order(dat_2$Freq,decreasing = F),] # 排序后取最优单一注释结果
apply(matr2, 1, which.max)
dat_2 = data.frame(Var1 = rownames(matr2),Var2 = colnames(matr2)[apply(matr2, 1, which.max)])

anno_list = list()
for (i in 1:nrow(dat_2)) {
  anno_list[[as.character(dat_2$Var1[i])]] = as.character(dat_2$Var2[i])
}

i=1
sceList[[i]]@meta.data$cell_annotation = 'UnKnown'
for (name in names(anno_list)) {
  sceList[[i]]@meta.data[sceList[[i]]@meta.data$seurat_clusters %in% name,'cell_annotation'] = anno_list[[name]]
}

mycolor = c("#E41A1C","#377EB8", "#4DAF4A", "#984EA3", "#FF7F00", "#FFFF33", "#A65628", "#F781BF",'#b7bf5c',
            '#e78283','#90b4d1','#9bcd9a','#c19cc7','#f5b475','#f5f58e','#c8a089','#f1b6d5','#a2dded','#4e0c5a')
DimPlot(sceList[[i]],group.by = 'cell_annotation') + scale_color_manual(values = mycolor)
ggsave("UMAP_cell_cluster_umap_anno.png",width=8,height=7,dpi=300)
ggsave("UMAP_cell_cluster_umap_anno.pdf",width=8,height=7)


cell_num_stat = table(sceList[[i]]$orig.ident,sceList[[i]]$cell_annotation)
# cell_prc_stat = cell_num_stat/rowSums(cell_num_stat)
write.table(cell_num_stat,'cell_num_stat.xls',sep = '\t',quote = F,row.names = T,col.names = NA)

# 绘制不同样本细胞亚群堆叠柱状图
p = ggplot(sceList[[i]]@meta.data,aes(x=orig.ident),width=.2) + xlab('') + ylab('Percentage') + labs(fill="Cell type") +
  ggplot2::geom_bar(stat = 'count',aes(fill = as.factor(cell_annotation)),color='black',position = position_fill(),width = 0.9)+
  scale_fill_manual(values = mycolor) +
  theme(panel.grid = element_blank(), panel.background = element_rect(color = 'black', fill = 'transparent'),
        axis.text = element_text(size = 11), axis.title = element_text(size = 13),
        legend.text = element_text(size = 11, face='italic'))
ggsave('sce.integrated_cell_percentage.png', plot=p, dpi = 300,width = 6,height = 7)
ggsave('sce.integrated_cell_percentage.pdf', plot=p, width = 6,height = 7)
# 保存metadata数据
write.table(sceList[[i]]@meta.data,'ALL_cell_metadata.xls',sep = '\t',quote = F,col.names = NA)
### 重新定群注释方法二
tr = data.frame(cluster=0:7,cell_annotation = c('root endodermis','root procambium','mestome sheath','parenchyma cell',
                                                'parenchyma cell','root cortex','root epidermal','root hair cell'))

scATAC_list[[i]]$cell_annotation = tr$cell_annotation[match(scATAC_list[[i]]@meta.data$seurat_clusters,tr$cluster)]
###
  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值