对于细胞分群效果不理想,如上皮细胞存在异质性等,可以考虑使用非负矩阵分解(NMF)的方式去定义元程序(meta program,即功能模块)和元标签(meta label,即功能基因集)。
第一步:定义相应的函数
GetData = function(
srt,
genes = NULL,
slot = 'scale.data',
assay = NULL
){
if (is.null(assay)){
assay = DefaultAssay(srt)
}
if (is.null(genes)){
if ('RNA' %in% names(srt@assays)){
genes = rownames(GetAssayData(srt, assay = 'RNA', slot = 'counts'))
} else if ('Spatial' %in% names(srt@assays)){
genes = rownames(GetAssayData(srt, assay = 'Spatial', slot = 'counts'))
} else {
genes = rownames(GetAssayData(srt, assay = assay, slot = 'counts'))
}
}
data = GetAssayData(srt, assay = assay, slot = slot)
missing = setdiff(genes, rownames(data))
add = matrix(0, nrow = length(missing), ncol = ncol(data))
rownames(add) = missing
data = rbind(data, add)
data = data[genes, ]
return(data)
}
NMFToModules = function(
res,
gmin = 5
){
scores = basis(res)
coefs = coefficients(res)
# Remove if fewer than gmin genes
ranks_x = t(apply(-t(t(scores) / apply(scores, 2, mean)), 1, rank))
ranks_y = apply(-t(t(scores) / apply(scores, 2, mean)), 2, rank)
for (i in 1:ncol(scores)){
ranks_y[ranks_x[,i] > 1,i] = Inf
}
modules = apply(ranks_y, 2, function(m){
a = sort(m[is.finite(m)])
a = a[a == 1:length(a)]
names(a)
})
l = sapply(modules, length)
keep = (l >= gmin)
scores = scores[, keep]
coefs = coefs[keep, ]
# Find modules
ranks_x = t(apply(-t(t(scores) / apply(scores, 2, mean)), 1, rank))
ranks_y = apply(-t(t(scores) / apply(scores, 2, mean)), 2, rank)
for (i in 1:ncol(scores)){
ranks_y[ranks_x[,i] > 1,i] = Inf
}
modules = apply(ranks_y, 2, function(m){
a = sort(m[is.finite(m)])
a = a[a == 1:length(a)]
names(a)
})
names(modules) = sapply(modules, '[', 1)
names(modules) = paste('m', names(modules), sep = '_')
names(modules) = gsub('-','_',names(modules))
return(modules)
}
BuildEnrichmentMatrix = function(
genes,
type = 'GO',
db = NULL
){
if (is.null(db)){
db = FindMSigDB(type)
}
terms = names(db)
enrichment.matrix = sapply(db, function(term){
genes %in% term
})
rownames(enrichment.matrix) = genes
#enrichment.matrix = enrichment.matrix[, colSums(enrichment.matrix) > 0]
enrichment.matrix[, colSums(enrichment.matrix) == 0] = NA
return(enrichment.matrix)
}
FindMSigDB = function(
type
){
library(msigdbr)
if (type == 'GO'){
gene_sets <- msigdbr(species = "Homo sapiens",category = 'C5')%>% dplyr::select(gs_name,human_entrez_gene, gene_symbol)
db = split(gene_sets$gene_symbol,f = gene_sets$gs_name)
} else if (type == 'HALLMARK'){
gene_sets <- msigdbr(species = "Homo sapiens",category = 'H')%>% dplyr::select(gs_name,human_entrez_gene, gene_symbol)
db = split(gene_sets$gene_symbol,f = gene_sets$gs_name)
} else if (type == 'MOTIF'){
gene_sets <- msigdbr(species = "Homo sapiens",category = 'C3')%>% dplyr::select(gs_name,human_entrez_gene, gene_symbol)
db = split(gene_sets$gene_symbol,f = gene_sets$gs_name)
db = db[-grep('UNKNOWN|MIR',names(db))]
} else if (type == 'PATHWAYS'){
gene_sets <- msigdbr(species = "Homo sapiens",category = 'C2')%>% dplyr::select(gs_name,human_entrez_gene, gene_symbol)
db = split(gene_sets$gene_symbol,f = gene_sets$gs_name)
db = db[grep('BIOCARTA|REACTOME|KEGG',names(db))]
} else if (type == 'BIOCARTA'){
gene_sets <- msigdbr(species = "Homo sapiens",category = 'C2')%>% dplyr::select(gs_name,human_entrez_gene, gene_symbol)
db = split(gene_sets$gene_symbol,f = gene_sets$gs_name)
db = db[grep('BIOCARTA',names(db))]
} else if (type == 'KEGG'){
gene_sets <- msigdbr(species = "Homo sapiens",category = 'C2')%>% dplyr::select(gs_name,human_entrez_gene, gene_symbol)
db = split(gene_sets$gene_symbol,f = gene_sets$gs_name)
db = db[grep('KEGG',names(db))]
} else if (type == 'REACTOME'){
gene_sets <- msigdbr(species = "Homo sapiens",category = 'C2')%>% dplyr::select(gs_name,human_entrez_gene, gene_symbol)
db = split(gene_sets$gene_symbol,f = gene_sets$gs_name)
db = db[grep('REACTOME',names(db))]
} else {
gene_sets <- msigdbr(species = "Homo sapiens",category = 'C5')%>% dplyr::select(gs_name,human_entrez_gene, gene_symbol)
db = split(gene_sets$gene_symbol,f = gene_sets$gs_name)
db = db[grep(type, names(db), ignore.case = TRUE, value = TRUE)]
}
return(db)
}
loadRData = function(fileName){
load(fileName)
get(ls()[ls() != "fileName"])
}
Heatmap = function(
matrix,
colors = NULL,
breaks = NULL,
col = NULL,
clustering_method_rows = 'ward.D2',
clustering_method_columns = 'ward.D2',
clustering_distance_rows = 'pearson',
clustering_distance_columns = 'pearson',
show_row_names = FALSE,
show_column_names = FALSE,
is.symmetric = FALSE,
...
){
if (is.null(breaks)){
mi = min(matrix, na.rm = TRUE)
ma = max(matrix, na.rm = TRUE)
if (mi < 0 & ma > 0){
breaks = c(mi, 0, ma)
} else {
breaks = c(mi, (mi + ma)/2, ma)
}
}
if (is.null(colors)){
colors = c('blue','white','red')
}
if (is.null(col)){
col = colorRamp2(breaks = breaks, colors = colors)
}
if (is.symmetric == TRUE){
h = ComplexHeatmap::Heatmap(matrix,
col = col,
clustering_method_rows = clustering_method_rows,
clustering_distance_rows = clustering_distance_rows,
cluster_columns = FALSE,
show_row_names = show_row_names,
show_column_names = show_column_names,
...)
o = unlist(row_order(h))
return(ComplexHeatmap::Heatmap(matrix,
col = col,
clustering_method_rows = clustering_method_rows,
clustering_distance_rows = clustering_distance_rows,
cluster_columns = FALSE,
column_order = o,
show_row_names = show_row_names,
show_column_names = show_column_names,
...))
} else {
return(ComplexHeatmap::Heatmap(matrix,
col = col,
clustering_method_rows = clustering_method_rows,
clustering_method_columns = clustering_method_columns,
clustering_distance_rows = clustering_distance_rows,
clustering_distance_columns = clustering_distance_columns,
show_row_names = show_row_names,
show_column_names = show_column_names,
...))
}
}
GeneToEnrichment <- function (srt, type = "GO", db = NULL, method = "rand", genes = NULL,
assay = NULL, do.rescale = FALSE, min.cells = 0, min.genes = 0,
min.var = 0, min.var.rescaled = 0, auc_percentile = 0.05,
db_rand = NULL, nrand = 4, nbin = 25, ...)
{
if (is.null(assay)) {
assay = DefaultAssay(srt)
}
if (is.null(db)) {
db = FindMSigDB(type)
}
counts = as.matrix(GetData(srt, assay = assay, slot = "counts"))
genes = rownames(counts)
genes.expr = rownames(counts)[rowSums(counts) > min.cells]
if (method == "metagene") {
data = as.matrix(GetAssayData(srt, assay = assay, slot = "scale.data"))
db = lapply(db, intersect, genes.expr)
enrichment.profile = t(sapply(names(db), function(m) {
colMeans(data[db[[m]], ], na.rm = TRUE)
}))
enrichment.profile = enrichment.profile[sapply(names(db),
function(x) {
v = var(enrichment.profile[x, ])
l = length(db[[x]])
return(l > min.genes && v > min.var && v * l^2 >
min.var.rescaled)
}), ]
if (do.rescale) {
mn = apply(enrichment.profile, 1, mean)
v = apply(enrichment.profile, 1, var)
enrichment.profile = (enrichment.profile - mn)/sqrt(v)
}
srt = AddMetaData(srt, t(enrichment.profile), col.name = rownames(enrichment.profile))
}
if (method == "auc") {
data = as.matrix(GetData(srt, assay = assay, slot = "data"))
cells_rankings = AUCell_buildRankings(data)
cells_AUC = AUCell_calcAUC(db, cells_rankings, aucMaxRank = nrow(cells_rankings) *
auc_percentile)
enrichment.profile = getAUC(cells_AUC)
if (do.rescale) {
mn = apply(enrichment.profile, 1, mean)
v = apply(enrichment.profile, 1, var)
enrichment.profile = (enrichment.profile - mn)/sqrt(v)
}
srt = AddMetaData(srt, t(enrichment.profile), col.name = rownames(enrichment.profile))
}
if (method == "score") {
temp = AddModuleScore(srt, features = db, assay = assay,
name = names(db), nbin = nbin, ...)
enrichment.profile = t(temp@meta.data[, names(db)])
if (do.rescale) {
mn = apply(enrichment.profile, 1, mean)
v = apply(enrichment.profile, 1, var)
enrichment.profile = (enrichment.profile - mn)/sqrt(v)
}
srt = AddMetaData(srt, t(enrichment.profile), col.name = rownames(enrichment.profile))
}
if (method == "rand") {
data = as.matrix(GetData(srt, assay = assay, slot = "scale.data"))
db = lapply(db, intersect, genes)
if (is.null(db_rand)) {
db_rand = MakeRand(srt, db, nrand = nrand, nbin = nbin)
}
else {
nrand = log10(length(db_rand[[1]]))
}
enrichment.profile = t(sapply(names(db), function(m) {
ra = sapply(db_rand[[m]], function(i) {
colMeans(data[i, ], na.rm = TRUE)
})
re = colMeans(data[db[[m]], ], na.rm = TRUE)
p = rowMeans(ra >= re)
p = -log10(p)
return(p)
}))
enrichment.profile[is.infinite(enrichment.profile)] = nrand
enrichment.profile = enrichment.profile/nrand
srt = AddMetaData(srt, t(enrichment.profile), col.name = rownames(enrichment.profile))
}
return(srt)
}
MakeRand <-function (srt, db, assay = NULL, nrand = 3, nbin = 25)
{
if (is.null(assay)) {
assay = DefaultAssay(srt)
}
data = GetData(srt, slot = "data")
db = lapply(db, intersect, rownames(data))
data.avg = sort(rowMeans(x = data))
data.cut = cut_number(x = data.avg + rnorm(n = length(data.avg))/1e+30,
n = nbin, labels = FALSE, right = FALSE)
names(x = data.cut) = names(x = data.avg)
binned = split(names(data.cut), data.cut)
db_rand = lapply(names(db), function(m) {
lapply(1:10^nrand, function(i) {
used = vector()
unused = binned
for (g in db[[m]]) {
pool = data.cut[g]
new = sample(unused[[pool]], 1)
used = c(used, new)
unused[[pool]] = setdiff(unused[[pool]], new)
}
return(used)
})
})
names(db_rand) = names(db)
return(db_rand)
}
第二步:读取上皮细胞数据
## 要求如下: ## 上皮细胞清洗 ## 聚类分析,排除明显的双细胞 ## 筛选基因,保留两套数据共享的基因 ## 删除细胞数少于200的样本 EpithelialCell_180 <- readRDS(file = "180.Epi.RDS") EpithelialCell_161 <- readRDS(file = "161.Epi.RDS") selectedGene <- intersect(EpithelialCell_180@assays$RNA@counts@Dimnames[[1]],EpithelialCell_161@assays$RNA@counts@Dimnames[[1]]) EpithelialCell_180 <- subset(EpithelialCell_180,features = selectedGene) EpithelialCell_161 <- subset(EpithelialCell_161,features = selectedGene) EpithelialCell_obj <- merge(EpithelialCell_180, EpithelialCell_161) selectedSample <- names(table(EpithelialCell_obj@meta.data$Patient.id))[(table(EpithelialCell_obj@meta.data$Patient.id) > 200)] EpithelialCell_obj <- subset(EpithelialCell_obj, Patient.id %in% selectedSample) subType <- function(x){ x <- NormalizeData(x, normalization.method = "LogNormalize") x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 3000) x <- ScaleData(x, vars.to.regress = c('nCount_RNA')) x <- RunPCA(x) x <- RunHarmony(object = x,group.by.vars = c('Dataset')) x <- RunUMAP(x,reduction = "harmony",dims = 1:30,seed.use = 12345) x <- FindNeighbors(x,reduction = 'harmony', dims = 1:30, verbose = FALSE) x <- FindClusters(x,resolution = 0.5, verbose = FALSE,random.seed=20220727) return(x) } EpithelialCell_obj <- subType(EpithelialCell_obj)
第三步:拆分为单样本数据集,并进行NMF
设置方法中的相关参数
range = 4:12 # 迭代次数 gmin = 5 #最低关联数量 ncores = 5 #运行核数量
拆分单样本数据集
DefaultAssay(EpithelialCell_obj) <- 'RNA' malignant_list <- SplitObject(object = EpithelialCell_obj, split.by = "Patient.id") saveRDS(malignant_list ,file = "./malignant_list.rds") malignant_list <- readRDS(file = "./malignant_list.rds") malignant_list_names <- names(malignant_list) for(i in malignant_list_names){ saveRDS(malignant_list[[i]],file = paste0('./malignant_RDS/',i,'_malignant_Seurat.RDS')) } saveRDS(malignant_list_names,file='./malignant_list_names.RDS') malignant_list_names <- readRDS(file='./malignant_list_names.RDS') path <- "./sample_list/" sample_name <- c() for(i in 1:length(dir("./sample_list/"))){ sample_name <- c(sample_name, strsplit(dir("./sample_list/")[i], split = "res.list")[[1]][1]) } for(i in malignant_list_names) { if(i %in% sample_name){ print(paste0(i, " is already over!")) next; } obj <- readRDS(file = paste0('./malignant_RDS/',i,'_malignant_Seurat.RDS')) obj <- SCTransform(obj, return.only.var.genes = TRUE) saveRDS(obj,file = paste0('./malignant_RDS/',i,'_malignant_Seurat.RDS')) data <- GetAssayData(obj, slot = 'scale.data') %>% as.matrix() data <- data[VariableFeatures(obj),] rm(obj) gc() data[data < 0] <- 0 data <- data[apply(data, 1, var) > 0, ] res.list <- parallel::mclapply(range, function(r){ nmf(data, rank = r, nrun = 1, seed = 'ica', method = 'nsNMF') }, mc.cores = ncores) names(res.list) = range saveRDS(res.list,file = paste0(path,i,"res.list.rds")) print(paste0(i, " is already over!")) }
拆分后重新读取NMF结果
malignant_list <- readRDS("./malignant_list.rds") path <- "./sample_list/" res.list_L<- list() for(i in names(malignant_list)){ a <- readRDS(paste0(path,i ,"res.list.rds")) res.list_L[[i]] <- a } names(res.list_L) <- names(malignant_list)
提取modules
#### 获取最优的NMF聚类rank,提取 modules_L <- list() res_optima_L <- list() for (i in 1:length(malignant_list)){ modules.list = lapply(res.list_L[[i]], NMFToModules, gmin = gmin) print(sapply(modules.list,length)) comp = as.numeric(names(modules.list)) - sapply(modules.list, length) mi = min(comp) r = names(which(comp == mi)) r = r[length(r)] res = res.list_L[[i]][[r]] modules = NMFToModules(res, gmin = gmin) modules_L[[i]] <- modules res_optima_L[[i]] <- res } saveRDS(object = modules_L,file = "./modules_L.rds") saveRDS(object = res_optima_L,file = "./res_optima_L.rds")
获取所有的modules
names(res_optima_L) <- names(malignant_list) modules.list <- lapply(res_optima_L, NMFToModules) all = unlist(modules.list, recursive = FALSE, use.names = FALSE) names(all) = unlist(sapply(modules.list, names)) ta = table(unlist(all)) genes.use = names(ta)[ta > 1] for (i in 1:5){ all = unlist(modules.list, recursive = FALSE, use.names = TRUE) all = lapply(all, intersect, genes.use) sim = sapply(all, function(x){ sapply(all, function(y){ length(intersect(x,y))/length(union(x,y)) }) }) keep = rownames(sim)[apply(sim, 1, function(x){ sum(x > 0.05) >= 3 #大于等于模块数相关的情况,我这里2或3就行 })] print(keep) all = all[keep] modules.list = lapply(names(modules.list), function(x){ li = modules.list[[x]] li[names(li)[paste(x,names(li),sep='.') %in% keep]] }) names(modules.list) = names(res_optima_L) ta = table(unlist(all)) genes.use = names(ta)[ta > 1] print(length(all)) } options(repr.plot.height = 7.5, repr.plot.width = 8) pheatmap(sim, show_rownames = F, show_colnames = F)
可视化结果如下:
确认最终的modules,并根据GO注释modules
sub = matrix(0, nrow = length(genes.use), ncol = length(genes.use)) rownames(sub) = genes.use colnames(sub) = genes.use for (s in names(modules.list)){ for (mod in modules.list[[s]]){ mod = intersect(mod, genes.use) for (x in mod){ for (y in mod){ sub[x,y] = sub[x,y] + 1 } } } } diag(sub) = 0 adj_keep = sub # Remove low connections adj = adj_keep adj[] = (adj >= 3) #adj[adj <= 1] = 0 for (i in 1:5){ keep = names(which(rowSums(adj) >= 5)) adj = adj[keep,keep] print(dim(adj)) } g = graph_from_adjacency_matrix(adj, diag = FALSE, mode = 'undirected', weighted = TRUE) modules = communities(cluster_infomap(g, nb.trials = 100)) names(modules) = paste0('m_', sapply(modules, '[', 1)) save(modules,file = "./modules.Rdata") #差异基因GO功能富集分析 library(AnnotationDbi) library(org.Hs.eg.db) library(clusterProfiler) library(dplyr) library(ggplot2) #基因ID转换为ENTREZID for(i in 1:length(modules)){ modules[[i]] <- bitr(modules[[i]], fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Hs.eg.db) modules[[i]] <- modules[[i]][,2] } #GO富集分析 ego_GO <- list() for(i in 1:length(modules)){ ego_GO[[i]] <- enrichGO(gene = modules[[i]], OrgDb = org.Hs.eg.db, keyType = "ENTREZID", ont = "ALL", #CC表示细胞组分,MF表示分子功能,BP表示生物学过程,ALL表示同时富集三种过程 pAdjustMethod = "BH", minGSSize = 1, pvalueCutoff = 0.01, #通常为0.01或0.05 qvalueCutoff = 0.05, readable = TRUE) } library(Hmisc) #根据GO富集的信息最终注释为以下模块 module_GO <- c("Cell-substrate adhesion","Protein and stress","Epithelial cell differentiation","Tissue homeostasis", "Immunoreaction","Caryomitosis","Cytoskeleton","Antigen processing and presentation ","Unknown", "Tissue remodeling","Cytoplasmic translation") load("./modules.Rdata") for(i in 1:length(modules)){ names(modules)[[i]] <- module_GO[i] } sim = sapply(all, function(x){ sapply(modules, function(y){ pval = phyper(length(intersect(x, y)), length(x), 2*10^4 - length(x), length(y), lower.tail = FALSE) return(-log10(pval)) }) }) sim[is.infinite(sim)] = max(sim[is.finite(sim)]) sim = sim[,apply(sim, 2, function(x){any(x > 3)})] df = data.frame('sample' = sapply(colnames(sim), function(x){ y = sapply(strsplit(x, '.', fixed = TRUE), '[', 1) })) df$top = apply(sim, 2, which.max) df$top = factor(names(modules)[df$top], levels = names(modules)) EpithelialCell_180 <- readRDS(file = "180.Epi.RDS") EpithelialCell_161 <- readRDS(file = "161.Epi.RDS") selectedGene <- intersect(EpithelialCell_180@assays$RNA@counts@Dimnames[[1]],EpithelialCell_161@assays$RNA@counts@Dimnames[[1]]) EpithelialCell_180 <- subset(EpithelialCell_180,features = selectedGene) EpithelialCell_161 <- subset(EpithelialCell_161,features = selectedGene) EpithelialCell_obj <- merge(EpithelialCell_180, EpithelialCell_161) Epi.meta <- EpithelialCell_obj@meta.data[,c(1,11,12,15)] Epi.meta$Molecular.typing[Epi.meta$Molecular.typing == "Triple negative tumor"] <- "TNBC" Epi.meta$Molecular.typing[Epi.meta$Molecular.typing == "Her-2"] <- "HER2+" Epi.meta$Molecular.typing[Epi.meta$Molecular.typing == "ER+ tumour_primary"] <- "ER+" Epi.meta$Molecular.typing[Epi.meta$Molecular.typing == "ER+ tumour_lymph"] <- "ER+" Epi.meta$Molecular.typing[Epi.meta$Molecular.typing == "Triple negative BRCA1 tumour"] <- "TNBC" Epi.meta$Molecular.typing[Epi.meta$Molecular.typing == "HER2+ tumour"] <- "HER2+" Epi.meta$Molecular.typing[Epi.meta$Molecular.typing == "PR+ tumour"] <- "PR+" Epi.meta$Molecular.typing[Epi.meta$Molecular.typing == "ER+ tumour"] <- "ER+" names(Epi.meta)[2] <- "sample" Epi.meta <- distinct(Epi.meta) df$subtype <- "non" for(i in 1:dim(df)[1]){ df$subtype[i] <- Epi.meta$Molecular.typing[which(Epi.meta$sample == df$sample[i])] } side_ann = HeatmapAnnotation(df = df[, c('subtype','top')], which = 'row') library(ComplexHeatmap) options(repr.plot.height = 7.5, repr.plot.width = 8) h = Heatmap(name = 'Module overlap p-value', sim, cluster_columns = FALSE, cluster_rows = FALSE, column_order = order(df$top), show_row_names = TRUE, border_gp = gpar(lty = 2, col = "red"), show_heatmap_legend = T, heatmap_legend_param = list( title = "Overlap p-value", legend_height = unit(4, "cm")) ) print(h)
可视化结果如下:
根据样本信息绘制热图
ovlp = sapply(all, function(x){ sapply(all, function(y){ pval = phyper(length(intersect(x, y)), length(x), 2*10^4 - length(x), length(y), lower.tail = FALSE) return(-log10(pval)) }) }) ovlp[is.infinite(ovlp)] = max(ovlp[is.finite(ovlp)]) ovlp = ovlp[colnames(sim),colnames(sim)] save(ovlp,file = "./ovlp.Rdata") bk <- seq(0, 6,length = 7) mycolors<-rev(brewer.pal(7, "RdBu")) set.seed(123) h = Heatmap(name = 'Module overlp p-value', ovlp,column_order = order(df$top), row_order = order(df$top), breaks = bk,colors =mycolors ) + side_ann print(h) fi = data.frame(sapply(modules,'[', 1:max(sapply(modules, length))), stringsAsFactors = FALSE) fi[is.na(fi)] = '' write.table(fi, file = './modules.csv', sep = ',', row.names = FALSE, col.names = TRUE, quote = FALSE)
可视化结果如下:
第四步:发掘淋巴结转移相关的模块
通过AUCell方法计算模块活性然后进行差异比较
#根据细胞可能富集的模块计算模块活性 library(AUCell) calAUCell <- function(mode,x){ cells_rankings <- AUCell_buildRankings(x) #x为表达矩阵 cells_AUC <- AUCell_calcAUC(mode, cells_rankings, aucMaxRank = nrow(cells_rankings)*0.1) return(cells_AUC) } filterAUCell <- function(x){ cells_assignment <-AUCell_exploreThresholds(x, plotHist = TRUE, nCores = 1, assign = TRUE) #x为cells_AUC return(cells_assignment) } cells_rankings <- AUCell_buildRankings(EpithelialCell_obj@assays$RNA@data) modules_list2 <- list() for(i in names(modules)){ modules_list2[[i]] <- intersect(modules[[i]],all_gene) } cells_AUC <- AUCell_calcAUC(modules_list2, cells_rankings, aucMaxRank=nrow(cells_rankings)*0.1) warningMsg <- sapply(cells_assignment, function(x){x$aucThr$comment}) warningMsg[which(warningMsg!="")] AUC <- getAUC(cells_AUC) sample.meta <- unique(EpithelialCell_obj@meta.data$orig.ident) for(i in 1:length(sample.meta)){ sample.data <- AUC[,grep(sample.meta[i],colnames(AUC))] medians <- apply(sample.data,1,median) if(i == 1){ AUC.sample <- data.frame(medians) colnames(AUC.sample)[i] <- sample.meta[i] } else { AUC.sample <- cbind(AUC.sample, data.frame(medians)) colnames(AUC.sample)[i] <- sample.meta[i] } } sample.meta <- EpithelialCell_obj@meta.data[,c("orig.ident","TumorType")] sample.meta <- distinct(sample.meta) p.ind <- c() l.ind <- c() for(i in 1:dim(AUC.sample)[2]){ type <- sample.meta[which(colnames(AUC.sample)[i] == sample.meta[,1]),2] if(type == "Primary"){ p.ind <- c(p.ind,i) } else if(type == "Lymph"){ l.ind <- c(l.ind,i) } } AUC.L <- AUC.sample[,l.ind] AUC.P <- AUC.sample[,p.ind] sample.data <- data.frame(value = 0, id = "GSM", module = "mode", type = "tumor") for(i in 1:dim(AUC.L)[2]){ data <- data.frame(value = AUC.L[,i], id = colnames(AUC.L)[i], module = rownames(AUC.L), type = "Lymph") sample.data <- rbind(sample.data, data) } for(i in 1:dim(AUC.P)[2]){ data <- data.frame(value = AUC.P[,i], id = colnames(AUC.P)[i], module = rownames(AUC.P), type = "Primary") sample.data <- rbind(sample.data, data) } sample.data <- sample.data[-1,] library(ggpubr) library(ggsignif) options(repr.plot.height = 8, repr.plot.width = 10) ggboxplot(sample.data, x = "module", y = "AUC value", color = "type", add = "jitter")+ #palette可以按照期刊选择相应的配色,如"npg"等 stat_compare_means(aes(group = type, label = paste0("p = ",signif(as.numeric(..p.format..), 2))))+ theme(panel.grid.major = element_line(colour = NA), panel.grid.minor = element_blank(), text=element_text(size = 12, family = "serif"), axis.text.x = element_text(angle = 90))
可视化结果如下:
还可以添加样本亚型信息进行差异比较
Epi.meta <- EpithelialCell_obj@meta.data[,c(1,12)] Epi.meta$Molecular.typing[Epi.meta$Molecular.typing %in% c("Triple negative BRCA1 tumour","Triple negative tumor")] <- "TNBC" Epi.meta$Molecular.typing[Epi.meta$Molecular.typing %in% c("Her-2","ER+ tumour_primary","ER+ tumour_lymph","HER2+ tumour","PR+ tumour","ER+ tumour","luminal B")] <- "non-TNBC" names(Epi.meta)[1] <- "id" Epi.meta <- distinct(Epi.meta) sample.data <- merge(sample.data,Epi.meta) sample.data$subtype <- paste(sample.data$type,sample.data$Molecular.typing,sep = "-") options(repr.plot.height = 8, repr.plot.width = 10) ggboxplot(sample.data, x = "module", y = "AUC value", color = "subtype", add = "jitter")+ #palette可以按照期刊选择相应的配色,如"npg"等 stat_compare_means(aes(group = subtype, label = paste0("p = ",signif(as.numeric(..p.format..), 2))))+ theme(panel.grid.major = element_line(colour = NA), panel.grid.minor = element_blank(), text=element_text(size = 12, family = "serif"), axis.text.x = element_text(angle = 90))
可视化结果如下: