代码需要的数据,我也无法获取,需要大家自行申请。另外,哪位好心人可以私底下分享给我一份那就非常感谢了。
介绍
Cellular communities reveal trajectories of brain ageing and Alzheimer’s disease 文章提供了完整的代码和数据,可以直接复现单细胞数据分析。虽然它特别适合新手上路,但数据的获取很难,需要经过申请。
图1
source("5. Manuscript code/utils.R")
source("1. Library preprocessing/utils/ROSMAP.metadata.R")
# Loading list of snRNA-seq participants, while correcting mistakes in tracker file
participants <- openxlsx::read.xlsx("5. Manuscript code/data/ROSMAP 10X Processing Tracker.xlsx", sheet = "Processed Batches") %>%
filter(!Batch %in% c("B1", "B2", "B3") & (!StudyCode %in% c("MAP83034844", "MAP74718818"))) %>%
mutate(StudyCode = as.character(as.numeric(gsub("ROS|MAP", "", StudyCode)))) %>%
select(StudyCode, Batch) %>%
rbind(data.frame(StudyCode="44299049", Batch=55))
# The following loads ROSMAP participants' demographic- and endophenotypic characterization
# Parts of these data are available in supplementary table 1
cohort <- load.metadata() %>% `[`(unique(participants$StudyCode), )
#####################################################################################################################
# Figure 1 - Cohort & Atlas Overview #
#####################################################################################################################
# ----------------------------------------------------------------------------------------------------------------- #
# Panel B - Categorical Pathology & Cognitive Decline #
# ----------------------------------------------------------------------------------------------------------------- #
p1 <- ggplot(cohort, aes(sex, age_death)) +
geom_jitter(alpha=.4, width = .15) +
geom_boxplot(alpha=.5, fill="darkorchid4", outlier.size = 0) +
stat_summary(geom = "text", fun.data = function(x) list(y=65, label=paste0("n=", length(x)))) +
labs(x=NULL, y="Age of death", title=NULL) +
scale_y_continuous(breaks = c(70, 90, 110)) +
theme_classic()
pdf(file.path(panel.path, "1B.pdf"), width=embed.width, height=embed.height*2)
plot_grid(p1, plot_grid(plotlist = lapply(c("cogdx.grouped.txt","braak.grouped.txt","cerad.txt"), function(t)
ggplot(cohort, aes(!!sym(t), fill=!!sym(t))) +
geom_bar(color="black") +
theme_classic() +
scale_fill_manual(values = colorRampPalette(c("white","darkorchid4"))(1+length(unique(cohort[,t])))[-1]) +
scale_y_continuous(expand = c(0,0),
limits = c(0, 170),
breaks = c(50,100, 150)) +
labs(x=NULL, y=t, title=NULL)),
ncol=1),
rel_heights = c(1,3), ncol = 1)
while (!is.null(dev.list())) dev.off()
rm(p1)
# ----------------------------------------------------------------------------------------------------------------- #
# Panel C - quantative Pathology & Cognitive Decline #
# ----------------------------------------------------------------------------------------------------------------- #
df <- cohort[,c("sqrt.amyloid","sqrt.tangles","cogng_demog_slope","sex")] %>%
filter(!is.na(sqrt.amyloid) & !is.na(sqrt.tangles)) %>%
rename(x="sqrt.amyloid", y="sqrt.tangles")
grid <- expand.grid(x=seq(-.05, 1.05*max(df$x, na.rm = T), length.out=200),
y=seq(-.05, 1.05*max(df$y, na.rm = T), length.out=200))
dist <- rdist::cdist(grid, df[!is.na(df$cogng_demog_slope),1:2])^2
dens <- apply(dist, 1, function(x) sqrt(mean(sort(x, partial=10)[1:10])))
C <- as.matrix(df[!is.na(df$cogng_demog_slope),"cogng_demog_slope"])
grid$cogng_demog_slope <- exp(-dist/(.75*dens)) %*% C
pdf(file.path(panel.path, "1C.pdf"), width=embed.width, height=embed.height+1)
ggplot(df %>% arrange(!is.na(cogng_demog_slope),cogng_demog_slope), aes(x,y, color=cogng_demog_slope, shape=sex)) +
ggrastr::geom_tile_rast(aes(x,y,fill=cogng_demog_slope), grid, inherit.aes = F, alpha=.8, color=NA, raster.dpi = 10) +
geom_point(size=3.5, color="black", alpha=.4) +
geom_point(size=2.7) +
scale_fill_gradientn(colors = rev(green2purple.less.white(5))) +
scale_color_gradientn(colors = rev(green2purple.less.white(5))) +
scale_x_continuous(expand=c(0,0)) +
scale_y_continuous(expand=c(0,0)) +
labs(x="amyloid",y="tangles", shae=NULL, color="Cognitive decline rate") +
theme_classic() +
theme(legend.position = "bottom")
while (!is.null(dev.list())) dev.off()
rm(df, grid, dist, C, dens)
# ----------------------------------------------------------------------------------------------------------------- #
# Panel D - UMAP of all nuclei #
# ----------------------------------------------------------------------------------------------------------------- #
# 2D UMAP embedding of final atlas with cell-type annotations
embedding <- h5read(aggregated.data, "umap") %>% tibble::column_to_rownames("rowname") %>% `colnames<-`(c("x","y"))
shared <- intersect(rownames(embedding), rownames(annotations))
embedding <- data.frame(embedding[shared,], annotations[shared, ])
# Plot UMAP embedding of atlas
pdf(file.path(panel.path, "1D.pdf"), width=8, height=8)
ggplot(embedding, aes(x,y, color=state)) +
ggrastr::geom_point_rast(size=.05, raster.dpi = 1000) +
scale_color_manual(values = joint.state.colors, na.value = "lightgrey") +
theme_classic() +
no.labs +
no.axes +
theme_embedding +
theme(legend.position = "none")
while (!is.null(dev.list())) dev.off()
rm(embedding, shared)
# ----------------------------------------------------------------------------------------------------------------- #
# Panel E - Number of nuclei in cell groups #
# ----------------------------------------------------------------------------------------------------------------- #
df <- annotations %>% filter(projid != "NA" & grouping.by != "Immune") %>%
count(grouping.by, projid) %>%
mutate(
grouping.by = recode(grouping.by, "Excitatory Neurons"="Exc. Neurons", "Inhibitory Neurons"="Inh. Neurons", "Oligodendrocytes"="Oligodend."),
grouping.by = factor(grouping.by, levels = levels(grouping.by)))
cols <- cell.group.color %>% `names<-`(recode(names(.), "Excitatory Neurons"="Exc. Neurons", "Inhibitory Neurons"="Inh. Neurons", "Oligodendrocytes"="Oligodend."))
pdf(file.path(panel.path, "1E.pdf"), width=embed.width, height=embed.height)
ggplot(df, aes(grouping.by, n, fill=grouping.by, color=grouping.by)) +
geom_boxplot(alpha=.3, outlier.shape = NA) +
geom_point(alpha=.5, position = position_jitterdodge(jitter.width = 2.5, dodge.width = .7, jitter.height = 0)) +
labs(x=NULL, y=NULL, title="Nuclei per donor") +
scale_y_sqrt(expand=c(0,0), breaks=c(50, 100,250, 500, 800, 1500, 3000)) +
scale_color_manual(values = cols) +
theme_classic() +
theme(legend.position = "none", axis.text.x = element_text(angle = 90, hjust = 1, vjust = .5))
while (!is.null(dev.list())) dev.off()
rm(df, cols)
# ----------------------------------------------------------------------------------------------------------------- #
# Panel F - State Prevalence Heatmap #
# ----------------------------------------------------------------------------------------------------------------- #
# Cluster donors by distance in cellular landscape
df <- data.frame(data$X)
tree <- hclust(rdist::rdist(df, "euclidean")^2)
# Order states first by grouping and then by their numeric order (i.e Exc.2 < Exc.10)
ord <- states %>% rownames_to_column("state") %>%
arrange(grouping.by, suppressWarnings(as.numeric(gsub("^.*\\.","", state))), gsub("^.*\\.","", state)) %>%
pull(state)
# Flatten data for plotting
df <- df %>% rownames_to_column("projid") %>%
melt(id.vars = "projid", variable.name="state") %>%
mutate(state = factor(gsub(" ", ".", state), levels = gsub(" ", ".", rev(ord))),
projid = factor(projid, levels = rownames(df)[tree$order], ordered = T))
pdf(file.path(panel.path, "1F.pdf"), width=embed.width*2/3, height=embed.height)
bars <- ggplot(df, aes(projid,value,fill=state)) +
geom_bar(stat = "identity") +
scale_fill_manual(values=joint.state.colors) +
scale_y_continuous(expand = c(0,0)) +
scale_x_discrete(expand = c(0,0)) +
coord_flip() +
labs(x="Donors", y="Proportion of cell population") +
theme_classic() +
theme(legend.position = "none",
axis.text = element_blank(),
axis.ticks = element_blank(),
strip.background = element_blank())
bars + ggtree::ggtree(tree, size=.25) + aplot::ylim2(bars) + patchwork::plot_layout(widths = c(1,.2))
while (!is.null(dev.list())) dev.off()
rm(df, tree, ord, bars)
#####################################################################################################################
# Supp Figure 1 - Library pre-processing #
#####################################################################################################################
cols <- list(
"Exc" = "midnightblue",
"Inh" = "firebrick4",
"Olig" = "olivedrab4",
"Astr" = "darkorchid4",
"Micr" = "chocolate3",
"OPC" = "springgreen4",
"Endo" = "darkgoldenrod4",
"Peri" = "cyan4")
# ----------------------------------------------------------------------------------------------------------------- #
# Panel A - batch trait distributions #
# ----------------------------------------------------------------------------------------------------------------- #
df <- cohort %>% select(sex, cogdx, cerad.txt, braak.grouped.txt) %>%
merge(participants, by.x="row.names", by.y="StudyCode") %>%
mutate(Batch = as.numeric(gsub("B", "", Batch))) %>%
mutate(Batch = factor(Batch, levels = unique(sort(Batch))))
pdf(file.path(panel.path, "s1A.pdf"), width=embed.width*1.5, height=embed.height*1.5)
lapply(c("sex","cogdx","braak.grouped.txt","cerad.txt"), function(p) {
ggplot(df, aes_string("Batch", fill=p)) +
geom_bar(stat = "count") +
scale_x_discrete(expand = c(0,0)) +
scale_y_continuous(expand = c(0,0), breaks = c(2,4,6,8)) +
scale_fill_manual(values = colorRampPalette(c("white","darkorchid4"))(1+length(unique(df[,p])))[-1]) +
labs(x=NULL, y=NULL) +
theme_classic() +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
panel.border = element_rect(color="black", fill=NA, size=1))
}) %>%
base::Reduce(`+`, .) + patchwork::plot_layout(ncol=1)
while (!is.null(dev.list())) dev.off()
rm(df)
# ----------------------------------------------------------------------------------------------------------------- #
# Panel C - classifier predictions over example libraרies #
# ----------------------------------------------------------------------------------------------------------------- #
pdf(file.path(panel.path, "s1C.pdf"), width=embed.width*2, height=embed.height*2)
lapply(c("191213-B7-B", "200720-B36-A"), function(n) {
o <- readRDS(paste0("1. Library preprocessing/data/low.qual.thr.libs/", n, ".seurat.rds") )
plot_grid(
DimPlot(o, group.by = "cell.type", pt.size = 1.75, ncol = 1, cols=cols, raster = T, raster.dpi = c(1024,1024)) +
theme_embedding + labs(x=NULL, y=NULL, title=NULL) + theme(legend.position = "none"),
FeaturePlot(o, features = "cell.type.entropy", pt.size = 1.75, ncol = 1, cols=viridis::inferno(11),
order = T, raster = T, raster.dpi = c(1024,1024)) +
theme_embedding + labs(x=NULL, y=NULL, title=NULL) + theme(legend.position = c(3,3)))
}) %>% plot_grid(plotlist = ., nrow=2) %>% print()
while (!is.null(dev.list())) dev.off()
# ----------------------------------------------------------------------------------------------------------------- #
# Panel D - manual curation of low quality thresholds #
# ----------------------------------------------------------------------------------------------------------------- #
o <- readRDS("1. Library preprocessing/data/low.qual.thr.libs/191213-B7-B.seurat.rds")
cols2 <- colorRampPalett
cols2 <- cols(length(unique(o$SCT_snn_res.0.2)))
pdf(file.path(panel.path, "s1D.pdf"), width=embed.width, height=embed.height*2)
plot_grid(
DimPlot(o, group.by = "SCT_snn_res.0.2", pt.size = 1.75, ncol = 1, cols = cols2,
raster = T, raster.dpi = c(1024,1024), label=T) +
theme_embedding + labs(x=NULL, y=NULL, title=NULL) + theme(legend.position = "none"),
plot_grid(VlnPlot(o, features=c("nCount_RNA"), pt.size = 0, ncol = 1, log = T) +
labs(x=NULL, y=NULL, title=NULL) +
scale_fill_manual(values=cols2) +
theme(legend.position = "none", axis.text.x = element_text(angle = 0, hjust = .5)),
VlnPlot(o, features=c("nFeature_RNA"), pt.size = 0, ncol = 1, log = T) +
scale_fill_manual(values=cols2) +
labs(x=NULL, y=NULL, title=NULL) +
theme(legend.position = "none", axis.text.x = element_text(angle = 0, hjust = .5)),
VlnPlot(o, features=c("cell.type.entropy"), pt.size = 0, ncol = 1) +
scale_fill_manual(values=cols2) +
scale_y_sqrt() + labs(x=NULL, y=NULL, title=NULL) +
theme(legend.position = "none", axis.text.x = element_text(angle = 0, hjust = .5)),
ncol=1, align = "hv"),
ncol=1, rel_heights = c(1,1.3))
while (!is.null(dev.list())) dev.off()
rm(o, cols2)
# ----------------------------------------------------------------------------------------------------------------- #
# Panels E-F - nUMAI & nFeature distributions in curated libraries #
# ----------------------------------------------------------------------------------------------------------------- #
# The code for these panels can be found under Library preprocessing/low.qual.thresholds.cell.level.R
# ----------------------------------------------------------------------------------------------------------------- #
# Panel G - Example library Demux doublets UMAP #
# ----------------------------------------------------------------------------------------------------------------- #
o <- readRDS("5. Manuscript code/data/library.for.fig.191213-B7-B.seurat.rds")
pdf(file.path(panel.path, "s1G.pdf"), width=embed.width, height=embed.height)
FetchData(o, c("UMAP_1","UMAP_2","is.doublet.demux")) %>%
`colnames<-`(c("x", "y", "demux")) %>%
#arrange(demux) %>%
ggplot(aes(x,y,color=demux)) +
geom_point(size=.75) +
scale_color_manual(values = c("#468E46","#8D59A8")) +
labs(x=NULL, y=NULL) +
theme_classic() +
theme(axis.text = element_blank(),
axis.ticks = element_blank(),
axis.line = element_blank(),
legend.position = c(3,3))
while (!is.null(dev.list())) dev.off()
rm(o)
# ----------------------------------------------------------------------------------------------------------------- #
# Panel H - Example library DoubletFinder scores #
# ----------------------------------------------------------------------------------------------------------------- #
o <- readRDS("5. Manuscript code/data/library.for.fig.191213-B7-B.seurat.rds")
pdf(file.path(panel.path, "s1H.pdf"), width=embed.width, height=embed.height)
FetchData(o, c("UMAP_1","UMAP_2","doublet.score")) %>%
`colnames<-`(c("x", "y", "score")) %>%
arrange(score) %>%
ggplot(aes(x,y,color=score)) +
ggrastr::geom_point_rast(size=.75, raster.dpi = 600) +
scale_color_viridis_c(option = "inferno") +
labs(x=NULL, y=NULL) +
theme_classic() +
theme(axis.text = element_blank(),
axis.ticks = element_blank(),
axis.line = element_blank(),
legend.position = c(3,3))
while (!is.null(dev.list())) dev.off()
rm(o)
# ----------------------------------------------------------------------------------------------------------------- #
# Panel I - MCC distributions across libraries #
# ----------------------------------------------------------------------------------------------------------------- #
libs <- list.files("1. Library preprocessing/data/snRNA-seq libraries/", "*.seurat.rds", full.names = TRUE, recursive = TRUE)
pb <- progress_bar$new(format="Loading libraries :current/:total [:bar] :percent in :elapsed. ETA :eta",
total = length(libs), clear=FALSE, width=100, force = TRUE)
library(ROCit)
mcc <- lapply(libs, function(p) {
obj <- readRDS(p)
doublets.df <- data.frame(class=obj$demux.droplet.type, score=obj$doublet.score, row.names = rownames(obj@meta.data))[obj$demux.droplet.type %in% c("SNG", "DBL"),]
doublets.measure <- measureit(score=doublets.df$score, class=doublets.df$class, negref = "SNG", measure = c("ACC", "FSCR", "FPR", "TPR"))
doublets.measure$MCC <- (doublets.measure$TP*doublets.measure$TN - doublets.measure$FP * doublets.measure$FN) /
sqrt((doublets.measure$TP+doublets.measure$FP)*(doublets.measure$TP+doublets.measure$FN)*(doublets.measure$TN+doublets.measure$FP)*(doublets.measure$TP+doublets.measure$FN))
pb$tick()
return(doublets.measure)
})
pdf(file.path(panel.path, "s1I.pdf"), width=embed.width, height=embed.height)
plot_grid(
lapply(seq_along(mcc), function(i) data.frame(i=i, thr=mcc[[i]]$Cutoff, MCC=mcc[[i]]$MCC)) %>%
do.call(rbind, .) %>%
ggplot(aes(x=thr, y=MCC, group=i)) +
geom_line() +
scale_x_continuous(expand = c(0,0)) +
labs(x=NULL, y=NULL) +
theme_classic()+
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank()),
data.frame(v=unlist(lapply(mcc, function(x) x$Cutoff[which.max(x$MCC)]))) %>%
ggplot(aes(v)) +
geom_boxplot(outlier.size = 0, fill="grey80") +
geom_jitter(aes(y=0), height = .1, width=0) +
scale_x_continuous(expand = c(0,0)) +
labs(x=NULL, y=NULL) +
theme_classic() +
theme(axis.text.y = element_blank(),
axis.ticks.y = element_blank()),
ncol=1,
rel_heights = c(3,1), align = "v")
while (!is.null(dev.list())) dev.off()
rm(libs, pb, mcc)
# ----------------------------------------------------------------------------------------------------------------- #
# Panel J - Example library after removing low-quality + doublets #
# ----------------------------------------------------------------------------------------------------------------- #
# Note that in the full analysis of all libraries doulbets and cells with high levels of mitochondrial genes
# expression were only removed once cell-type specific objects were created. The following is to illustrate
# this process in the scope of the specific exaple library
o <- readRDS("5. Manuscript code/data/library.for.fig.191213-B7-B.seurat.rds")
o[["percent.mt"]] <- PercentageFeatureSet(o, pattern = "^MT-")
!(o$is.doublet | o$percent.mt >= 10)
o <- subset(o, cells = colnames(o)[!(o$is.doublet | o$percent.mt >= 10)])
# Rerunning Seurat pipeline after removing cells - parameters are as used when initially analysing the library
o <- SCTransform(o, variable.features.n = 2000, conserve.memory = T)
o <- RunPCA(o, features=VariableFeatures(o), npcs = 30, verbose = F)
o <- FindNeighbors(o, reduction="pca", dims = 1:30, verbose = F)
o <- FindClusters(o, resolution = .2, verbose = F)
o <- RunUMAP(o, dims=1:30, n.components = 2)
pdf(file.path(path, "s1J.pdf"), width=embed.width, height=embed.height)
FetchData(o, c("UMAP_1","UMAP_2","cell.type")) %>%
`colnames<-`(c("x", "y", "cell.type")) %>%
ggplot(aes(x,y,color=cell.type)) +
geom_point(size=.5) +
scale_color_manual(values = cols, name="Cell Type") +
labs(x=NULL, y=NULL) +
theme_classic() +
theme(axis.text = element_blank(),
axis.ticks = element_blank(),
axis.line = element_blank(),
legend.position = c(3,3))
while (!is.null(dev.list())) dev.off()
rm(o)
# ----------------------------------------------------------------------------------------------------------------- #
# Panel K - Distribution of #nuclei for participants #
# ----------------------------------------------------------------------------------------------------------------- #
df <- annotations %>% filter(projid != "NA") %>% count(projid)
pdf(file.path(panel.path, "s1K.pdf"), width=embed.width, height=embed.height)
ggplot(df %>% rowwise() %>% mutate(n = min(n, 7000)), aes(x = n)) +
geom_histogram(alpha=.75, color="black",fill="#8D59A8", bins = 45) +
geom_vline(xintercept = 868) +
theme_classic() +
scale_y_continuous(expand = c(0,0)) +
labs(x="# Nuclei", y="# Participants")
while (!is.null(dev.list())) dev.off()
rm(df)
# ----------------------------------------------------------------------------------------------------------------- #
# Panel I - Number of #UMIs in cell groups #
# ----------------------------------------------------------------------------------------------------------------- #
df <- annotations %>% filter(projid != "NA" & grouping.by != "Immune") %>%
group_by(grouping.by, projid) %>%
summarise(umi=mean(nCount_RNA), .groups = "drop") %>%
mutate(
grouping.by = recode(grouping.by, "Excitatory Neurons"="Exc. Neurons", "Inhibitory Neurons"="Inh. Neurons", "Oligodendrocytes"="Oligodend."),
grouping.by = factor(grouping.by, levels = levels(grouping.by)))
cols <- cell.group.color %>% `names<-`(recode(names(.), "Excitatory Neurons"="Exc. Neurons", "Inhibitory Neurons"="Inh. Neurons", "Oligodendrocytes"="Oligodend."))
pdf(file.path(panel.path, "s1I.pdf"), width=embed.width*2/3, height=embed.height)
ggplot(df, aes(grouping.by, umi, fill=grouping.by, color=grouping.by)) +
geom_boxplot(alpha=.3, outlier.shape = NA) +
geom_point(alpha=.5, position = position_jitterdodge(jitter.width = 2.5, dodge.width = .7, jitter.height = 0)) +
labs(x=NULL, y=NULL, title="Average UMIs per donor") +
scale_y_sqrt(expand=c(0,0), breaks=c(1000, 5000, 10000, 20000)) +
scale_color_manual(values = cols) +
theme_classic() +
theme(legend.position = "none", axis.text.x = element_text(angle = 90, hjust = 1, vjust = .5))
while (!is.null(dev.list())) dev.off()
rm(df, cols)
# ----------------------------------------------------------------------------------------------------------------- #
# Panel M - Donor-nuclei stacked bar plot #
# ----------------------------------------------------------------------------------------------------------------- #
pdf(file.path(panel.path, "s1M.pdf"), width=embed.width*2/3, height=embed.height)
print(ggplot(annotations %>% filter(projid != "NA" & grouping.by != "Immune") %>% count(projid, grouping.by),
aes(reorder(projid, -n, sum), n, fill=reorder(grouping.by, -n, sum))) +
geom_bar(stat = "identity") +
scale_y_continuous(expand=c(0,0)) +
scale_fill_manual(values = cell.group.color) +
coord_flip() +
labs(x=NULL, y=NULL, fill="Cell Type Group", title="# Nuclei per donor") +
theme_classic() +
theme(axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
legend.position = c(.75,.35)))
print(ggplot(annotations %>% filter(projid != "NA" & grouping.by != "Immune") %>% count(projid, grouping.by),
aes(reorder(projid, -n, sum), n, fill=reorder(grouping.by, -n, sum))) +
geom_bar(stat = "identity", position="fill") +
scale_y_continuous(expand=c(0,0)) +
scale_fill_manual(values = cell.group.color) +
coord_flip() +
labs(x=NULL, y=NULL, fill="Cell Type Group", title="# Nuclei per donor") +
theme_classic() +
theme(axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
legend.position = c(.75,.35)))
while (!is.null(dev.list())) dev.off()
图2
source("5. Manuscript code/utils.R")
#####################################################################################################################
# Figure 2 - Cell-State Characterization #
#####################################################################################################################
# ----------------------------------------------------------------------------------------------------------------- #
# Cell-type UMAPS #
# ----------------------------------------------------------------------------------------------------------------- #
plot.umap <- function(name, cols=NULL) {
embedding <- h5read(aggregated.data, name)
st <- embedding$state %>% unique()
# If not specified randomly assign colors to states
if(is.null(cols)) {
cols <- unlist(lapply(split(1:length(st), 1:3), length))
cols <- setNames(c(colorRange("darkgreen")(cols[[1]]), colorRange("darkorchid4")(cols[[2]]), colorRange("midnightblue")(cols[[3]])), sample(st))
}
# Annotate plot with number of nuclei
text <- annotations[embedding$cell,] %>%
group_by(grouping.by) %>% summarise(n=n()) %>%
mutate(label=paste0(grouping.by," n=", n)) %>%
pull(label) %>% paste0(collapse = "\n")
pos <- apply(embedding[c("x","y")], 2, min)
return(ggplot(embedding, aes(x, y, color=state, label=state)) +
scale_color_manual(values=cols) +
ggrastr::geom_point_rast(size=.01, raster.dpi = 800) +
geom_text(data = embedding %>% group_by(state) %>% summarise_at(vars(x, y), list(mean)), color="black") +
annotate(geom = "text", label=text, x=pos[1], y=pos[2], hjust=0)+
no.labs +
theme_embedding +
theme(legend.position = "none"))
}
pdf(file.path(panel.path, "2.UMAPs.pdf"), width=embed.width, height = embed.height)
for(ct in c("vascular.niche","microglia","astrocytes","oligodendroglia","inhibitory","excitatory","neuronal")) {
plot.umap(file.path(mapping[[ct]], ifelse(ct %in% c("excitatory","neuronal"), "umap.ref","umap")),
cols = state.colors[[ct]]) %>% print(.)
}
while (!is.null(dev.list())) dev.off()
rm(ct, plot.umap)
# ----------------------------------------------------------------------------------------------------------------- #
# State QCs #
# ----------------------------------------------------------------------------------------------------------------- #
pdf(file.path(panel.path, "2.QCs.pdf"), width=embed.width, height=embed.height)
for(ct in names(atlas)) {
o <- LoadH5Seurat(paste0(ct,"/data/", ct, ".H5Seurat"), assays=list(SCT=c("data")), misc=F, graphs=F, reductions=F, neighbors=F, verbose=F)
Idents(o) <- factor(as.character(Idents(o)), levels=atlas[[ct]]$state.order)
if(any(is.na(Idents(o))))
o <- subset(o, cells = colnames(o)[!is.na(Idents(o))])
plot_grid(
VlnPlot(o, features=c("nCount_RNA","nFeature_RNA"), pt.size = 0, ncol = 1, log = TRUE, cols = state.colors[[ct]]) &
labs(x=NULL, y=NULL, title=NULL) &
NoLegend() &
theme(axis.text.x = element_blank()),
melt(data$X) %>%
`colnames<-`(c("donor","state","prev")) %>%
filter(state %in% levels(Idents(o))) %>%
mutate(state = factor(state, levels = levels(Idents(o)))) %>%
ggplot(aes(state, prev, fill=state, color=state)) +
geom_boxplot(alpha=.3, outlier.shape = NA) +
geom_jitter(size=.5, width=.2, alpha=.75) +
scale_color_manual(values=state.colors[[ct]]) +
scale_fill_manual(values=state.colors[[ct]]) +
scale_y_sqrt(breaks = c(.01, .05, .1, .25, .5, .75), labels = paste0(as.integer(100*c(.01, .05, .1, .25, .5, .75)), "%"), expand = expansion(0)) +
labs(x=NULL, y=NULL) +
theme_classic() +
theme(legend.position = "none"),
ncol=1,
rel_heights = c(1,1)) %>% print(.)
rm(o); gc()
}
while (!is.null(dev.list())) dev.off()
rm(ct)
# ----------------------------------------------------------------------------------------------------------------- #
# Signature scores #
# ----------------------------------------------------------------------------------------------------------------- #
# Removing Sadick et al annotations in favor of their integrated annotations
excluded.signatures = list(dummy=c("ABC"), `Sadick J.S. et al (2022)`=c())
pdf(file.path(panel.path, "2.signatures.pdf"), height=embed.height*1.25, width = embed.width*1.5)
for(ct in c("endo","microglia","astrocytes","oligodendroglia")) {
sigs <- h5read(aggregated.data, file.path(mapping[[ct]], "signatures"))
sigs$scores <- sigs$scores %>% mutate(sig.full=paste(reference, signature)) %>%
# Remove specified signatures
merge(., stack(excluded.signatures) %>% `colnames<-`(c("signature","reference")) %>% mutate(to.exclude=T), all.x=T) %>%
filter(is.na(to.exclude)) %>%
dplyr::select(-to.exclude) %>%
# Remove all signatures from specified references
filter(! reference %in% names(Filter(is.null, excluded.signatures)))
labels <- sigs$scores %>% dplyr::select(sig.full, signature) %>% unique %>% `rownames<-`(NULL) %>% column_to_rownames("sig.full")
references <- sigs$scores %>% dplyr::select(sig.full, reference) %>% unique %>% `rownames<-`(NULL) %>% column_to_rownames("sig.full")
mtx <- sigs$scores %>% pivot_wider(id_cols = "state", names_from = "sig.full", values_from = "mean") %>%
column_to_rownames("state") %>%
`[`(atlas[[ct]]$state.order, ) %>%
scale
significance <- sigs$scores %>%
rowwise() %>%
mutate(sig = ifelse(p.adjust <= .01, "*", "")) %>%
pivot_wider(id_cols = "state", names_from = "sig.full", values_from = "sig", values_fill = "") %>%
column_to_rownames("state") %>%
`[`(rownames(mtx), colnames(mtx)) %>%
as.matrix()
v <- max(abs(mtx), na.rm = T)
draw(Heatmap(mtx,
col = circlize::colorRamp2(seq(-v, v, length.out=21), green2purple(21)),
cluster_rows=F,
row_split = atlas[[ct]]$main.group,
show_column_dend = F,
column_labels = labels[colnames(mtx),],
column_names_rot = 45,
row_title = " ",
layer_fun = function(j, i, x, y, w, h, fill) grid.text(pindex(significance, i, j), x, y),
bottom_annotation = HeatmapAnnotation(ref=references[colnames(mtx),]),
height = unit(18, "cm"),
width = unit(14, "cm"),
border=T,
row_gap = unit(0,"pt")
), merge_legend=T)
}
while (!is.null(dev.list())) dev.off()
rm(ct, sigs, labels, references, mtx, significance, v,excluded.signatures)
# ----------------------------------------------------------------------------------------------------------------- #
# Signature gene expression #
# ----------------------------------------------------------------------------------------------------------------- #
# Removing Sadick et al annotations in favor of their integrated annotations
excluded.signatures = list(dummy=c("ABC"), `Sadick J.S. et al (2022)`=c())
pdf(file.path(panel.path, "2.signatures.genes.pdf"), height=embed.height*3, width = embed.width*1.5)
for(ct in c("vascular.niche","microglia","astrocytes","oligodendroglia")) {
# Load signatures' gene sets
sigs <- h5read(aggregated.data, file.path(mapping[[ct]], "signatures"))
sigs <- sigs$genes %>% mutate(section=paste(reference, signature), col=paste(reference, signature, gene)) %>%
# Remove specified signatures
merge(., stack(excluded.signatures) %>% `colnames<-`(c("signature","reference")) %>% mutate(to.exclude=T), all.x=T) %>%
filter(is.na(to.exclude)) %>%
dplyr::select(-to.exclude) %>%
# Remove all signatures from specified references
filter(! reference %in% names(Filter(is.null, excluded.signatures)))
de <- h5read(aggregated.data, file.path(mapping[[ct]], "de")) %>%
filter(avg_log2FC > 0 & p_val_adj < .01 & gene %in% unique(sigs$gene)) %>%
mutate(sig = "*") %>%
dplyr::select(cluster, gene, sig)
exp <- h5read(aggregated.data, file.path(mapping[[ct]], "gene.exp")) %>% filter(gene %in% unique(sigs$gene))
# Append expression levels and if gene is DE in state
df <- sigs %>%
merge(., exp, by.x = "gene", by.y="gene", all.x = T) %>%
merge(., de, by.x = c("state","gene"), by.y=c("cluster","gene"), all.x = T) %>%
mutate(sig = replace_na(sig, " "))
mtx <- pivot_wider(df %>% filter(!is.na(state)), id_cols = "state", names_from = "col", values_from = "mean.exp", values_fill = 0) %>%
column_to_rownames("state") %>%
`[`(atlas[[ct]]$state.order, intersect(sigs$col,colnames(.))) %>%
as.matrix %>%
scale %>%
t
significance <- pivot_wider(df %>% filter(!is.na(state)), id_cols = "state", names_from = "col", values_from = "sig", values_fill = "") %>%
column_to_rownames("state") %>%
`[`(atlas[[ct]]$state.order, intersect(sigs$col,colnames(.))) %>%
t
labeling <- sigs %>% column_to_rownames("col") %>% mutate(section = gsub(" .* ", " - ", section)) %>% `[`(intersect(sigs$col, rownames(mtx)),)
gaps <- labeling %>% dplyr::select(reference, section) %>% unique() %>% mutate(gap = ifelse(reference == lag(reference), 0, 3)) %>% pull(gap) %>% Filter(Negate(is.na), .)
draw(Heatmap(mtx,
col = green2purple(21),
cluster_columns = F,
column_labels = case_when(ct == "vascular.niche"~colnames(mtx), T~gsub("^.*\\.", "", colnames(mtx))),
column_names_rot = 0,
row_labels = labeling$gene,
row_split = labeling$section,
cluster_rows = T,
cluster_row_slices = F,
show_row_dend = F,
layer_fun = function(j, i, x, y, w, h, fill) grid.text(pindex(significance, i, j), x, y),
left_annotation = rowAnnotation(ref = labeling$reference),
heatmap_legend_param = list(title = "scaled mean exp."),
border=T,
row_gap = unit(gaps, "pt"),
width = unit(5,"cm"),
height= unit(20, "cm")))
}
while (!is.null(dev.list())) dev.off()
rm(ct, sigs, de, exp, df, mtx, significance, labeling)
# ----------------------------------------------------------------------------------------------------------------- #
# Dotplots of top and selected DEGs #
# ----------------------------------------------------------------------------------------------------------------- #
n.genes = list(inhibitory=4, excitatory=4, endo=4, astrocytes=3, oligodendroglia=4, microglia=3)
pdf(file.path(panel.path, "2.dotplots.pdf"), height=embed.height, width = embed.width*2)
for(ct in names(atlas)) {
additional.genes = c()
if("genes" %in% names(atlas[[ct]]))
additional.genes = unname(unlist(atlas[[ct]]$genes))
genes <- h5read(aggregated.data, file.path(mapping[[ct]], "de")) %>%
filter(avg_log2FC > 0) %>%
group_by(gene) %>%
slice_max(avg_log2FC, n=1) %>%
group_by(cluster) %>%
arrange(desc(avg_log2FC)) %>%
filter(row_number() <= n.genes[[ct]] | gene %in% additional.genes) %>%
unstack(gene~cluster) %>%
as.list
hm <- h5read(aggregated.data, file.path(mapping[[ct]], "gene.exp")) %>%
filter(gene %in% (genes[atlas[[ct]]$state.order] %>% unlist)) %>%
gheatmap(color.by="mean.exp", size.by="pct.exp",
row.order = list(a=atlas[[ct]]$state.order),
column.order = genes[atlas[[ct]]$state.order],
cluster_columns=FALSE, column_gap = unit(0, "mm"),
cols = green2purple,
border=TRUE,
cluster_rows=FALSE, scale="col")
draw(hm[[1]], merge_legend=TRUE, annotation_legend_list=hm[[2]])
}
while (!is.null(dev.list())) dev.off()
rm(ct, genes, hm, additional.genes, n.genes)
# ----------------------------------------------------------------------------------------------------------------- #
# Dotplots of top and selected DEGs - Oligodendroglia specific #
# ----------------------------------------------------------------------------------------------------------------- #
# Retrieve oligodendroglia DEGs + pseudobulk gene expression
de <- h5read(aggregated.data, file.path(mapping[["oligodendroglia"]], "de")) %>% filter(avg_log2FC > 0)
exp <- h5read(aggregated.data, file.path(mapping[["oligodendroglia"]], "gene.exp"))
# Create general OPC vs Oligo heatmap
genes <- list(OPCs=c("PCDH15", "VCAN", "PDGFRA"),
Oligodendrocytes=c("MBP", "MOG", "MAG"))
ct.hm <- exp %>% filter(gene %in% unlist(genes)) %>%
gheatmap(color.by="mean.exp", size.by="pct.exp",
name = "Cell-type marker",
row.order = list(a=atlas$oligodendroglia$state.order),
column.order = genes,
cluster_columns=FALSE, column_gap = unit(0, "mm"),
cols = green2purple,
border=TRUE,
cluster_rows=FALSE, scale="col")
# Create within OPCs and within Oligos heatmaps
states <- atlas$oligodendroglia$state.order %>% split(., if_else(grepl("Oli.", .),"Oligodendrocytes", "OPCs"))
additional.genes <- list(
OPCs = c("PINK1", "TOMM20", "TOMM7", "SERPINA3", "OSMR", "CNTN2", "SOX10", "SOX6"),
Oligodendrocytes = c("QDPR", "DPYD", "S100A6", "SEMA3C", "SLC38A2", "DNAJB1", "DNAJB6", "DNAJC1", "HSPA1A", "HSPA1B", "HSPA4L", "HSPH1", "PTGES3"))
hms <- lapply(names(states), \(ct) {
genes <- de %>%
filter(cluster %in% states[[ct]]) %>%
group_by(gene) %>%
slice_max(avg_log2FC, n=1) %>%
group_by(cluster) %>%
arrange(desc(avg_log2FC)) %>%
filter(row_number() <= 3 | gene %in% additional.genes[[ct]]) %>%
unstack(gene~cluster) %>% as.list
exp %>% filter(gene %in% unlist(genes)) %>%
gheatmap(color.by="mean.exp", size.by="pct.exp",
name = paste0(ct, "\nmean exp."),
row.order = list(a=atlas$oligodendroglia$state.order),
column.order = genes[states[[ct]]],
cluster_columns=FALSE, column_gap = unit(0, "mm"),
cols = green2purple,
border=TRUE,
cluster_rows=FALSE, scale="col")
})
pdf(file.path(panel.path, "2.dotplots.oligodendroglia.specific.pdf"), height=embed.height, width = embed.width*2)
draw(ct.hm[[1]] + hm[[2]][[1]] + hm[[1]][[1]] , merge_legend=TRUE,
annotation_legend_list=hm[[1]][[2]][[1]])
dev.off()
# ----------------------------------------------------------------------------------------------------------------- #
# Clustering-goodness confusion plots #
# ----------------------------------------------------------------------------------------------------------------- #
library(SeuratDisk)
library(dplyr)
library(Seurat)
objs <- c(sapply(c("microglia","astrocytes","oligodendroglia","inhibitory"),
\(ct) paste0(ct, "/data/", ct, ".h5Seurat")),
endo = "vascular.niche/data/vascular.niche.h5Seurat",
`cux2-`="excitatory/data/cux2-.h5Seurat",
`cux2+`="excitatory/data/cux2+.h5Seurat")
for (ct in names(objs)) {
message(ct)
obj <- LoadH5Seurat(objs[[ct]], assays=list(SCT=c("data")), verbose=F)
clusters <- Idents(obj)
if(! "SCT_snn" %in% names(obj@graphs))
obj <- FindNeighbors(obj, reduction="pca", dims = 1:50)
combs <- expand.grid(clusters %>% unique() %>% as.vector(),
clusters %>% unique() %>% as.vector()) %>% t()
dists <- apply(combs, 2, function(pair_){
cells1 <- names(clusters)[clusters == pair_[[1]] %>% as.vector()]
cells2 <- names(clusters)[clusters == pair_[[2]] %>% as.vector()]
(rowSums(obj@graphs$SCT_snn[cells1, cells2])/rowSums(obj@graphs$SCT_snn[cells1, ])) %>% mean()
})
saveRDS(list(dists = rbind(combs, dists) %>% t() %>% as.data.frame() %>% tidyr::spread(Var1, dists) %>% tibble::column_to_rownames("Var2"),
clusters = clusters), paste0("5. Manuscript code/data/", ct, ".knn.rds"))
rm(obj, clusters, combs, dists)
}
pdf(file.path(panel.path, "2.cluster.goodness.confusions.pdf"), height=embed.height*2, width = embed.width*2)
for(ct in names(objs)) {
df <- readRDS(paste0("5. Manuscript code/data/", ct, ".knn.rds"))
mtx <- sapply(df$dists, as.numeric) %>% `rownames<-`(rownames(df$dists))
if(ct %in% c("cux2+", "cux2-"))
ord <- intersect(atlas$excitatory$state.order, rownames(mtx))
else
ord <- atlas[[ct]]$state.order
Heatmap(t(mtx[ord, ord]),
width = unit(embed.width, "cm"), height = unit(embed.height, "cm"),
col=colorRampPalette(c("white","darkgreen","#5F9E5F","#A074B6","darkorchid4"))(21),
cluster_rows = F, cluster_columns = F, border=T) %>% draw()
}
while (!is.null(dev.list())) dev.off()
# ----------------------------------------------------------------------------------------------------------------- #
# Heatmaps of predicted neuronal subtypes #
# ----------------------------------------------------------------------------------------------------------------- #
neurons <- merge(annotations %>% rownames_to_column("cell"), h5read(aggregated.data, "neuronal/allen.annotations")) %>%
count(grouping.by, state, allen.labels) %>%
group_by(grouping.by, state) %>%
mutate(prop = n/sum(n)) %>%
ungroup()
neuronal.subtypes <- neurons %>%
select(allen.labels) %>% unique %>%
tidyr::separate(col = "allen.labels", sep = " ", into = c("type", "layer", "marker.1", "marker.2"), extra = "merge", remove = FALSE) %>%
mutate(marker.1 = factor(marker.1, rev(c("LAMP5", "PAX6", "SST", "VIP", "PVALB","RORB", "LINC00507","THEMIS","FEZF2"))),
across(c(layer, marker.2), ~factor(., sort(unique(.))))) %>%
column_to_rownames("allen.labels")
ord <- c(
"Exc L2 LINC00507 GLRA3", "Exc L2 LAMP5 KCNG3", "Exc L2 LINC00507 ATP7B", "Exc L2-3 LINC00507 DSG3",
"Exc L3 LAMP5 CARM1P1", "Exc L3 THEMIS ENPEP", "Exc L2-3 RORB RTKN2", "Exc L2-3 RORB PTPN3",
"Exc L2-3 RORB CCDC68", "Exc L3-5 RORB TNNT2", "Exc L3-5 RORB LAMA4", "Exc L3 RORB OTOGL",
"Exc L3-5 RORB LINC01202", "Exc L3-5 RORB LNX2", "Exc L3-5 RORB RPRM", "Exc L5 THEMIS SLC22A18",
"Exc L6 THEMIS LINC00343", "Exc L6 THEMIS SNTG2", "Exc L5-6 THEMIS TNFAIP6", "Exc L6 THEMIS SLN",
"Exc L5 RORB MED8", "Exc L5 THEMIS FGF10", "Exc L5 THEMIS VILL", "Exc L5-6 FEZF2 IFNG-AS1",
"Exc L5 FEZF2 NREP-AS1", "Exc L5 FEZF2 RNF144A-AS1", "Exc L5 FEZF2 PKD2L1", "Exc L5-6 FEZF2 LPO",
"Exc L6 FEZF2 KLK7", "Exc L6 FEZF2 POGK", "Exc L6 FEZF2 FFAR4", "Exc L6 FEZF2 PROKR2",
"Exc L5-6 FEZF2 CFTR", "Exc L6 FEZF2 PDYN", "Exc L5-6 FEZF2 C9orf135-AS1", "Exc L5-6 FEZF2 SH2D1B",
"Exc L5 THEMIS RGPD6", "Exc L5-6 FEZF2 FILIP1L", "Exc L5-6 FEZF2 OR1L8", "Exc L5 THEMIS LINC01116",
"Exc L5-6 THEMIS SMYD1", "Exc L5 FEZF2 CSN1S1", "Exc L3-5 FEZF2 ASGR2", "Exc L3-5 FEZF2 LINC01107",
"Inh L1-6 SST NPY", "Inh L1-6 LAMP5 AARD", "Inh L1 LAMP5 RAB11FIP1", "Inh L1-6 LAMP5 NES", "Inh L5-6 LAMP5 CRABP1",
"Inh L1-6 LAMP5 CA1", "Inh L1 PAX6 MIR101-1", "Inh L3-6 PAX6 LINC01497", "Inh L5-6 SST BEAN1",
"Inh L5-6 SST DNAJC14", "Inh L5-6 SST KLHL1", "Inh L5-6 SST FBN2", "Inh L5-6 SST C4orf26",
"Inh L1-2 SST PRRT4", "Inh L3-5 SST GGTLC3", "Inh L1-2 SST CLIC6", "Inh L2 PVALB FRZB",
"Inh L2-3 SST NMU", "Inh L1-2 SST CCNJL", "Inh L1-3 SST FAM20A", "Inh L3-5 SST CDH3",
"Inh L5 SST RPL35AP11", "Inh L5-6 SST PAWR", "Inh L3-5 SST OR5AH1P", "Inh L5-6 SST PIK3CD",
"Inh L5-6 PVALB SST CRHR2", "Inh L5-6 SST ISX", "Inh L1 SST DEFB108B", "Inh L1 LAMP5 BMP2",
"Inh L1 PVALB SST ASIC4", "Inh L1 PAX6 CHRFAM7A", "Inh L1-6 VIP SLC7A6OS", "Inh L1 LAMP5 PVRL2",
"Inh L1 SST P4HA3", "Inh L1 LAMP5 NMBR", "Inh L2 PAX6 FREM2", "Inh L1-2 VIP HTR3A", "Inh L1-2 VIP WNT4",
"Inh L1-5 VIP PHLDB3", "Inh L1-5 VIP LINC01013", "Inh L3-6 VIP UG0898H09", "Inh L3-6 VIP ZIM2-AS1",
"Inh L1-5 VIP CD27-AS1", "Inh L2-5 VIP SOX11", "Inh L1-2 VIP PTGER3", "Inh L2-5 VIP BSPRY", "Inh L5-6 VIP COL4A3",
"Inh L1-2 VIP SCML4", "Inh L2 VIP SLC6A16", "Inh L1-3 VIP HSPB6", "Inh L1-5 VIP SMOC1", "Inh L1-3 VIP CHRNA2",
"Inh L3-5 VIP HS3ST3A1", "Inh L1-2 VIP EXPH5", "Inh L1-3 VIP FNDC1", "Inh L1 VIP KLHDC8B", "Inh L1-3 VIP CBLN1",
"Inh L3-5 VIP IGDCC3", "Inh L3-5 VIP TAC3", "Inh L1-6 PVALB COL15A1", "Inh L5-6 PVALB ZFPM2-AS1",
"Inh L6 SST TH", "Inh L5-6 PVALB GAPDHP60", "Inh L5-6 PVALB KCNIP2", "Inh L1-2 SST CLIC6",
"Inh L2 PVALB FRZB", "Inh L2-5 PVALB RPH3AL", "Inh L1-2 PVALB CDK20", "Inh L3 PVALB SAMD13",
"Inh L3-5 PVALB ISG20", "Inh L2-5 PVALB HHIPL1", "Inh L5-6 PVALB FAM150B", "Inh L5-6 PVALB MEPE", "Inh L5 PVALB LRIG3")
sets = list(c("Inhibitory Neurons", "inhibitory"), c("Excitatory Neurons", "excitatory"))
pdf(file.path(panel.path, "2.neuronal.confusions.pdf"), height=embed.height*2, width = embed.width*2)
for(set in sets) {
df <- pivot_wider(neurons %>% filter(grouping.by == set[[1]]), id_cols="allen.labels", names_from = "state", values_from = "prop", values_fill = 0) %>%
column_to_rownames("allen.labels") %>%
`[`(intersect(v, rownames(.)), atlas[[set[[2]]]]$state.order) %>%
as.matrix
Heatmap(df,
col=colorRampPalette(c("white","darkgreen","#5F9E5F","#A074B6","darkorchid4"))(21),
width = unit(ncol(df)*.35,"cm"),
border=T,
column_order = atlas[[set[[2]]]]$state.order,
cluster_rows = F, cluster_row_slices = F,
right_annotation = rowAnnotation(
`Cortical layer` = anno_text(paste(neuronal.subtypes[rownames(df), "layer"], " ")),
`Marker 1` = anno_text(paste(neuronal.subtypes[rownames(df), "marker.1"], " ")),
`Marker 2` = anno_text(neuronal.subtypes[rownames(df), "marker.2"])
),
left_annotation = rowAnnotation(
`Cortical layer` = neuronal.subtypes[rownames(df), "layer"],
marker1 = neuronal.subtypes[rownames(df), "marker.1"]),
show_row_names = F) %>% draw()
}
while (!is.null(dev.list())) dev.off()
rm(df, sets, set, ord, neuronal.subtypes, neurons)
图3
source("5. Manuscript code/utils.R")
#####################################################################################################################
# Figure 3 - State-Trait Associations & Validations #
#####################################################################################################################
# ----------------------------------------------------------------------------------------------------------------- #
# Panel B+C - joint - snuc+bulk Trait Associations #
# ----------------------------------------------------------------------------------------------------------------- #
pdf(file.path(panel.path, "3B-D.pdf"), width=2*3 + 1, height=5)
plot.trait.associations.cross.cohort(
names(AD.traits), AD.traits, fdr.thr = .01, use_raster=TRUE, raster_quality = 10)
while (!is.null(dev.list())) dev.off()
# ----------------------------------------------------------------------------------------------------------------- #
# Panel B+C - snuc+bulk Trait Associations #
# ----------------------------------------------------------------------------------------------------------------- #
pdf(file.path(panel.path, "3B.pdf"), width=3.5, height=5)
plot.trait.associations(py_to_r(data$uns$trait.analysis$snuc),
params = names(AD.traits),
column_title="snRNA-seq",
column_labels = AD.traits,
column_names_rot = 45,
column_names_centered = T,
row_names_side = "left",
use_raster=T,
raster_quality = 10) %>% print()
while (!is.null(dev.list())) dev.off()
pdf(file.path(panel.path, "3C.pdf"), width=3.5, height=5)
plot.trait.associations(py_to_r(data$uns$trait.analysis$celmod),
params = names(AD.traits),
column_title="bulk-predicted",
column_labels = AD.traits,
column_names_rot = 45,
column_names_centered = T,
row_names_side = "left",
use_raster=T,
raster_quality = 10) %>% print()
while (!is.null(dev.list())) dev.off()
# ----------------------------------------------------------------------------------------------------------------- #
# Panel D - Meta analysis of associations #
# ----------------------------------------------------------------------------------------------------------------- #
pdf(file.path(panel.path, "3D.pdf"), width=3.5, height=5)
plot.trait.associations(py_to_r(data$uns$trait.analysis$meta.analysis),
params = names(AD.traits),
value.by = "z.meta", pval.by = "adj.pval.meta",
column_title="meta-analysis",
column_labels = AD.traits,
column_names_rot = 45,
column_names_centered = T,
row_names_side = "left") %>% print()
while (!is.null(dev.list())) dev.off()
# ----------------------------------------------------------------------------------------------------------------- #
# Panels F-J - Causal modeling #
# ----------------------------------------------------------------------------------------------------------------- #
# Panels were designed based on the results in the Other analyses/3.causal modeling.R file
#####################################################################################################################
# Supp Figure 3 - State-Trait Associations & Validations #
#####################################################################################################################
# ----------------------------------------------------------------------------------------------------------------- #
# Panel A - CelMod correlations for states #
# ----------------------------------------------------------------------------------------------------------------- #
pdf(file.path(panel.path, "s6A.pdf"), width=14, height=3)
print(py_to_r(data$uns$celmod$test.corrs) %>%
rownames_to_column("state") %>%
mutate(group = factor(data$var[state, "grouping.by"], levels=names(cell.group.color))) %>%
ggplot(aes(reorder(state, -corr),corr, fill=group, label=sig)) +
geom_bar(stat="identity") +
geom_text(nudge_y = .01, angle=90, hjust=0, vjust=.65) +
facet_wrap(.~group, scales = "free_x", nrow = 1) +
scale_fill_manual(values=scales::alpha(cell.group.color, .75)) +
scale_y_continuous(expand = expansion(add=c(.1, .2))) +
labs(x=NULL, y=NULL) +
theme_classic() +
theme(legend.position = "none",
strip.background = element_blank(),
axis.text.x = element_text(angle=90, hjust = 1, vjust = .5)))
while (!is.null(dev.list())) dev.off()
# ----------------------------------------------------------------------------------------------------------------- #
# Panel B - snRNAseq/bulk tstat comparison #
# ----------------------------------------------------------------------------------------------------------------- #
pdf(file.path(panel.path, "s6B.pdf"), width=embed.width, height=embed.height.small*3)
traits = c(AD.traits, AD.traits.cat)
df <- py_to_r(data$uns$trait.analysis$meta.analysis) %>%
filter(trait %in% names(traits)) %>%
group_by(trait) %>%
mutate(trait = factor(trait, names(traits)),
label = if_else(adj.pval.meta <= .05, state, NA_character_),
color = case_when(is.na(label) ~ NA_character_,
tstat.sc > 0 ~ "1",
tstat.sc < 0 ~ "-1"))
corrs <- do.call(rbind, lapply(unique(df$trait), function(t)
data.frame(cor.test(df[df$trait == t,]$tstat.sc,
df[df$trait == t, ]$tstat.b,
method="spearman")[c("estimate","p.value")], row.names = t))) %>%
mutate(adj.pval = p.adjust(p.value, method = "BH"))
plot_grid(plotlist = lapply(levels(df$trait), function(t) {
.df <- df[df$trait == t,]
range.min <- min(.df[,c("tstat.sc","tstat.b")], na.rm = T)
range.max <- max(.df[,c("tstat.sc","tstat.b")], na.rm = T)
.label <- paste0("R=", round(corrs[t,]$estimate, 2),
"\nFDR=", scales::scientific(corrs[t,]$adj.pval))
ggplot(.df, aes(tstat.sc, tstat.b, label=label)) +
geom_abline(linetype="dashed", color="grey30") +
geom_point(size=.75) +
ggplot2::annotate(geom="text", x=range.min, y=.95*range.max, hjust=0, vjust=1,label=.label) +
ggrepel::geom_text_repel(aes(color=color), max.overlaps = 30, show.legend = F, min.segment.length = unit(0, "pt")) +
scale_color_manual(values = list("1"=green2purple(3)[3], "-1"=green2purple(3)[1])) +
scale_x_continuous(limits = c(range.min, range.max), expand = expansion(mult = .025)) +
scale_y_continuous(limits = c(range.min, range.max), expand = expansion(mult = .025)) +
labs(x="t-stat snRNA-seq", y="t-stat bulk pred.", title=traits[[t]])
}),
nrow=3) %>% print()
rm(df, corrs, traits)
while (!is.null(dev.list())) dev.off()
# ----------------------------------------------------------------------------------------------------------------- #
# Panel C - snuc+bulk Trait Associations #
# ----------------------------------------------------------------------------------------------------------------- #
pdf(file.path(panel.path, "s6C.joint.pdf"), width=2*3 + 1, height=5)
plot.trait.associations.cross.cohort(names(AD.traits.cat), AD.traits.cat,
fdr.thr = .01, use_raster=TRUE, raster_quality = 10)
while (!is.null(dev.list())) dev.off()
pdf(file.path(panel.path, "s6C.pdf"), width=3.5, height=5)
plot.trait.associations(py_to_r(data$uns$trait.analysis$snuc),
params = names(AD.traits.cat),
column_title="snRNA-seq",
column_labels = AD.traits.cat,
column_names_rot = 45,
column_names_centered = T,
row_names_side = "left",
use_raster=T,
raster_quality = 10) %>% print()
plot.trait.associations(py_to_r(data$uns$trait.analysis$celmod),
params = names(AD.traits.cat),
column_title="bulk-predicted",
column_labels = AD.traits.cat,
column_names_rot = 45,
column_names_centered = T,
row_names_side = "left",
use_raster=T,
raster_quality = 10) %>% print()
plot.trait.associations(py_to_r(data$uns$trait.analysis$meta.analysis),
params = names(AD.traits.cat),
value.by = "z.meta", pval.by = "adj.pval.meta",
column_title="meta-analysis",
column_labels = AD.traits.cat,
column_names_rot = 45,
column_names_centered = T,
row_names_side = "left") %>% print()
while (!is.null(dev.list())) dev.off()
# ----------------------------------------------------------------------------------------------------------------- #
# Panels D-I - Causal modeling #
# ----------------------------------------------------------------------------------------------------------------- #
# Panels were designed based on the results in the `3. Other analyses/3.causal modeling.R` file
图4
source("5. Manuscript code/utils.R")
# The following results file is generated by `Other analyses/RNAscope.analysis.R`
validations <- readRDS("3. Other analyses/data/RNAscope.rds")
#####################################################################################################################
# Figure 4 - RNAscope Validations #
#####################################################################################################################
# ----------------------------------------------------------------------------------------------------------------- #
# Panel A - Mic12/13 pathways vs all #
# ----------------------------------------------------------------------------------------------------------------- #
pathways <- h5read(aggregated.data, file.path(mapping[["microglia"]], "pa")) %>%
filter(state %in% c("Mic.12","Mic.13") & Count > 2) %>%
mutate(direction.n = if_else(direction == "upregulated", 1, -1),
signed.pval = -log10(p.adjust) * direction.n) %>%
pivot_wider(id_cols = c("Description"), names_from = c(state, state), values_from = c(signed.pval, direction.n)) %>%
mutate(across(contains("direction.n"), ~replace_na(., 0)),
tot.direction = direction.n_Mic.12 + direction.n_Mic.13) %>%
arrange(tot.direction, signed.pval_Mic.12, signed.pval_Mic.13) %>%
column_to_rownames("Description")
annotate <- c("negative regulation of cell migration",
"negative regulation of response to oxidative stress",
"actin binding", "regulation of lipase activity",
"foam cell differentiation", "lipid catabolic process",
"lipid catabolic process", "exploration behavior",
"glial cell migration", "synapse organization",
"positive regulation of protein polymerization",
"Platelet activation, signaling and aggregation")
annotate <- which(rownames(pathways) %in% annotate)
pdf(file.path(panel.path, "4A.pdf"), width=embed.width*.7, height=embed.height*1.2)
Heatmap(pathways %>% select(contains("signed.pval")) %>% as.matrix,
col = green2purple.less.white(21),
cluster_rows = F,
cluster_columns = F,
show_row_names = F,
row_split = pathways$tot.direction,
column_labels = c("Mic.12", "Mic.13"),
row_title = " ",
right_annotation = rowAnnotation(text=anno_mark(at=annotate, labels=rownames(pathways)[annotate])),
border=T,
row_gap = unit(0,"pt"),
na_col = "grey90",
height = unit(6,"cm"),
width = unit(.8, "cm"))
while (!is.null(dev.list())) dev.off()
rm(pathways, annotate)
# ----------------------------------------------------------------------------------------------------------------- #
# Panel B - Mic12 vs Mic13 pathways #
# ----------------------------------------------------------------------------------------------------------------- #
pathways <- h5read(aggregated.data, file.path(mapping[["microglia"]], "pa.pairwise")) %>%
filter(comparison %in% c("Mic.12 vs. Mic.13","Mic.13 vs. Mic.12")) %>%
dplyr::rename(state=comparison) %>%
mutate(direction = ifelse(state == "Mic.13 vs. Mic.12", 1, -1), rowname=Description,
signed.pval = -log10(p.adjust)*direction) %>%
arrange(signed.pval) %>%
column_to_rownames("rowname")
annotate = c("MHC class II protein complex", "negative regulation of leukocyte apoptotic process",
"Antigen processing and presentation", "axonogenesis",
"extracellular matrix organization", "cell junction assembly", "lamellipodium assembly",
"lamellipodium assembly and organization", "negative regulation of immune system process",
"negative regulation of locomotion", "immunological synapse")
annotate <- which(rownames(pathways) %in% annotate)
pdf(file.path(panel.path, "4B.pdf"), width=embed.width*.7, height=embed.height*1.2)
Heatmap(pathways %>% select(signed.pval) %>% as.matrix,
col = green2purple(21),
cluster_rows = F, show_row_names = F, show_column_names = F,
row_title = " ",
right_annotation = rowAnnotation(text=anno_mark(at=annotate, labels=rownames(pathways)[annotate])),
row_split = pathways$direction,
border=T,
row_gap = unit(0,"pt"),
height = unit(6,"cm"),
width = unit(.4, "cm"))
while (!is.null(dev.list())) dev.off()
rm(annotate, pathways)
# ----------------------------------------------------------------------------------------------------------------- #
# Panel C - Selective genes differential between Mic12/13/Mac #
# ----------------------------------------------------------------------------------------------------------------- #
genes = list(`Mic.12 genes` = c("CD163","HLA-DRA","FOXP1","PELI1","PELI2"),
`Both` = c("MSR1","PRKCE"),
`Mic.13 genes` = c("PTPRG","PPARG","SPP1","TGFBR1","SMAD3","SCIN","WIPF3"),
`AD risk genes`= c("ADAM10","TREM2","APOE","GPNMB"))
exp <- h5read(aggregated.data, "glia/microglia/gene.exp") %>% filter(gene %in% unlist(genes))
# Summarize "other" microglia (non Mic.12/13/Mac/Mono) gene expression
n.mic <- exp %>% select(state, n) %>% unique %>% pull(n) %>% sum
exp <- exp %>% mutate(collapse.state = !state %in% c("Mic.12","Mic.13","Monocytes","Macrophages"),
state.prop = if_else(collapse.state, n/n.mic, 1),
state = if_else(collapse.state, "Other", state)) %>%
mutate(mean.exp = mean.exp*state.prop,
pct.exp = pct.exp*state.prop) %>%
group_by(state, gene) %>%
summarise(across(c(mean.exp, pct.exp), sum))
hm <- gheatmap(df = exp, color.by = "mean.exp", size.by="pct.exp", cols = green2purple,
row.order = list(a=c("Monocytes","Macrophages","Mic.12","Mic.13","Other")), column.order = genes, scale.by = "col", cluster_rows=F, border=T, show_column_dend=F, cluster_columns=F)
pdf(file.path(panel.path, "4C.pdf"), width=10, height=3)
draw(hm[[1]], annotation_legend_list = hm[[2]], merge_legend=T)
while (!is.null(dev.list())) dev.off()
rm(n.mic, exp, genes, hm)
# ----------------------------------------------------------------------------------------------------------------- #
# Panel E - Markers Distributions - snRNAseq #
# ----------------------------------------------------------------------------------------------------------------- #
# The following code produces scatter plots shown in panel 4e, as well as in Extended Data Figure 7d
cols <- c("Mic.12"="paleturquoise3", "Mic.13"="red3","Macrophages"="orchid3")
pdf(file.path(panel.path, "4E.pdf"), width=embed.width.small*3, height=embed.height.small*2)
ord <- list(c("TPRG1","CPM"), c("TPRG1","MRC1"), c("CPM","MRC1"))
snuc <- lapply(ord, function(pair)
validations$RNAscope$snuc.exp %>%
filter(state != "Other") %>%
dplyr::select(state, g1=pair[[1]], g2=pair[[2]]) %>%
mutate(group = factor(paste(pair, collapse = "-")),
state = factor(state, levels = rev(c("Mic.12","Mic.13","Macrophages"))))) %>%
do.call(rbind, .) %>%
rowwise() %>%
count(state, g1, g2, group) %>%
`[`(sample(rownames(.)),) %>%
ggplot(., aes(g1,g2, color=state)) +
geom_jitter(alpha=.75) +
scale_color_manual(values = cols) +
scale_x_continuous(expand = expansion(add=2)) +
scale_y_continuous(expand = expansion(add=2)) +
facet_wrap("group", scales = "free") +
labs(x=NULL, y=NULL, color=NULL) +
guides(colour = guide_legend(override.aes = list(size=3))) +
theme_classic() +
theme(strip.background = element_blank())
df <- validations$RNAscope$df %>%
rowwise() %>%
mutate(state = c("Mic.12","Mic.13","Macrophages", "Other", "none")[which.max(c_across(c(Mic.12,Mic.13,Macrophages, Other, none)))]) %>%
data.frame
scope <- lapply(ord, function(pair) df %>% dplyr::select(state, g1=pair[[1]], g2=pair[[2]]) %>%
mutate(group = factor(paste(pair, collapse = "-")))) %>%
do.call(rbind, .) %>%
rowwise() %>%
count(state, g1, g2, group) %>%
mutate(state = factor(state, c("Mic.12","Mic.13","Macrophages"))) %>%
`[`(sample(rownames(.)),) %>%
ggplot(., aes(g1,g2, color=state)) +
scale_alpha_binned(range=c(.1, .5))+
geom_jitter(alpha=.75) +
scale_x_continuous(expand = expansion(add=.2)) +
scale_y_continuous(expand = expansion(add=.2)) +
scale_color_manual(values = cols, na.value = "lightgrey") +
guides(colour = guide_legend(override.aes = list(size=3))) +
facet_wrap("group", scales = "free") +
labs(x=NULL, y=NULL) +
theme_classic() +
theme(strip.background = element_blank())
print(snuc / scope)
while (!is.null(dev.list())) dev.off()
rm(snuc, scope, ord, df)
# ----------------------------------------------------------------------------------------------------------------- #
# Panel F - Prevalence disease regression #
# ----------------------------------------------------------------------------------------------------------------- #
pdf(file.path(panel.path, "4F.pdf"), width=embed.width.small, height=embed.height)
cols <- c("AD"="red3", "MCI"="chocolate2", "No AD"="#191970")
format <- paste(rep("t=%.2g, P=%.2g", 2), sep="*\", \"*")
merge(validations$RNAscope$predicted.proportions,
validations$pTau$summarised,
by.x = c("row.names","batch","AD"), by.y = c("sample","batch","AD")) %>%
pivot_longer(cols = c(Mic.12,Mic.13), names_to = "state", values_to = "prop") %>%
ggplot(aes(pTau_tangles_plaques_median, prop)) +
geom_smooth(formula = "y~x", method="lm",color="black", alpha=.2) +
ggpmisc::stat_fit_tidy(aes(label=sprintf(format, after_stat(`x_stat`), after_stat(`x_p.value`)))) +
geom_point(aes(color=AD), size=2) +
scale_color_manual(values = cols) +
scale_y_continuous(expand = expansion(mult=.02), oob=scales::squish) +
scale_x_continuous(expand = expansion(mult=.01)) +
facet_wrap(~state, ncol=1, scales = "free") +
labs(x=NULL, y=NULL) +
theme_classic() +
theme(strip.background = element_blank(),
legend.position = c(3,3))
rm(format, cols)
while (!is.null(dev.list())) dev.off()
# ----------------------------------------------------------------------------------------------------------------- #
# Panel G - associating morphology with state probability #
# ----------------------------------------------------------------------------------------------------------------- #
pdf(file.path(panel.path, "4G.pdf"), width=embed.width, height=embed.height)
split(validations$RNAscope$morpholoy.association, validations$RNAscope$morpholoy.association$trait) %>%
lapply(., function(df) {
v = max(abs(df$tstat))
text = paste(round(df$tstat, 2), df$sig, sep = "\n")
col.range <- green2purple(21)
if(df[1,"trait"] == "eccentricity")
col.range <- rev(col.range)
Heatmap(df[,"tstat"],
row_labels = df$state,
cluster_rows = F,
show_column_names = F,
cluster_columns = F,
width = unit(.8,"cm"),
height = unit(8, "cm"),
col = circlize::colorRamp2(seq(-v, v, length.out=21), col.range),
cell_fun = function(j, i, x, y, w, h, col) grid.text(text[i], x,y),
column_title = df[1,"trait"],
row_names_side = "left",
border=T,
heatmap_legend_param = list(direction="horizontal", title="t stat"))
}) %>% base::Reduce(`+`,.)
while (!is.null(dev.list())) dev.off()
# ----------------------------------------------------------------------------------------------------------------- #
# Panel H - PAM score for microglial states #
# ----------------------------------------------------------------------------------------------------------------- #
res <- validations$ROSMAP.PAM.Association
v <- max(abs(res$t.value))
pdf(file.path(panel.path, "4H.pdf"), width=embed.width, height=embed.height)
plot_grid(
Heatmap(res[1:4,"t.value"] %>% as.matrix(),
col = circlize::colorRamp2(seq(-v, v, length.out=7), green2purple(7)),
row_labels = rownames(res)[1:4],
cluster_rows = F,
show_column_names = F,
row_names_side = "left",
border=T,
width = unit(.8,"cm"),
height = unit(8,"cm"),
column_title="PAM Score",
heatmap_legend_param = list(title="t stat"),
cell_fun = function(j, i, x, y, w, h, col) grid.text(paste(round(res[i, "t.value"],2), res[i, "sig"], sep = "\n"), x, y)) %>% draw %>% grid.grabExpr(),
Heatmap(res[5:6,"t.value"] %>% as.matrix(),
col = circlize::colorRamp2(seq(-v, v, length.out=7), green2purple(7)),
cluster_rows = F,
show_column_names = F,
row_names_side = "left",
border=T,
width = unit(.8,"cm"),
height = unit(4, "cm"),
heatmap_legend_param = list(title="t stat"),
cell_fun = function(j, i, x, y, w, h, col) grid.text(paste(round(res[i+4, "t.value"],2), res[i+4, "sig"], sep = "\n"), x, y)) %>% draw %>% grid.grabExpr(),
align = "hv") %>% print(.)
while (!is.null(dev.list())) dev.off()
rm(others, df, res, v, res.ctrl, controls)
# ----------------------------------------------------------------------------------------------------------------- #
# Panel H - PAM scores with controls for confounders #
# ----------------------------------------------------------------------------------------------------------------- #
controls <- c("pmi", "Mic.12", "Mic.13")
pdf(file.path(panel.path, "4H.2.pdf"), width=embed.width, height=embed.height*2)
lapply(c("Mic.12","Mic.13"), function(state) {
.df <- df %>% mutate(covariate = get(state))
naive <- lm(pam ~ covariate, .df)
# Controlling for confounding effects
confounders <- lm(paste0("pam~", paste(setdiff(controls, state), collapse = "+")), .df)
# Fitting pam to state after controlling for confounders
fit <- lm(resid.pam ~ covariate, .df %>% mutate(resid.pam=residuals(confounders) + mean(.df$pam)))
.df <- purrr::map2(list(naive, fit), c("direct", "controlled"),
~predict(.x, se=T) %>% `[`(c("fit","se.fit")) %>% as.data.frame %>%
mutate(model=.y, state=state) %>% cbind(., .df[, c("pam","covariate")])) %>%
do.call(rbind, .)
ggplot(.df, aes(covariate, pam, group=model, fill=model, color=model)) +
geom_ribbon(aes(ymin=fit-se.fit, ymax=fit+se.fit), alpha=.1, color=NA) +
geom_line(aes(y=fit)) +
geom_point(data=. %>% filter(model == "direct"), color="black") +
labs(x=state, y="pam score")
}) %>% ggpubr::ggarrange(plotlist = ., ncol = 1, common.legend = T, legend="bottom")
while (!is.null(dev.list())) dev.off()
rm(controls)
#####################################################################################################################
# Supp Figure 4 - RNAscope Validations #
#####################################################################################################################
# ----------------------------------------------------------------------------------------------------------------- #
# Panel A - Expression of key Mic.12/13 genes without collapsing states #
# ----------------------------------------------------------------------------------------------------------------- #
genes = list(`Mic.12 genes` = c("CD163","HLA-DRA","FOXP1","PELI1","PELI2"),
`Both` = c("MSR1","PRKCE"),
`Mic.13 genes` = c("PTPRG","PPARG","SPP1","TGFBR1","SMAD3","SCIN","WIPF3"),
`AD risk genes`= c("ADAM10","TREM2","APOE","GPNMB"))
hm <- h5read(aggregated.data, "glia/microglia/gene.exp") %>%
filter(gene %in% unlist(genes)) %>%
gheatmap(color.by = "mean.exp", size.by="pct.exp", cols = green2purple,
row.order = list(a=atlas$microglia$state.order), column.order = list(a=unlist(genes)), scale.by = "col", cluster_rows=F, border=T, show_column_dend=F, cluster_columns=F)
pdf(file.path(panel.path, "s7A.pdf"), width=7.5, height=5)
draw(hm[[1]], annotation_legend_list = hm[[2]], merge_legend=T)
while (!is.null(dev.list())) dev.off()
rm(genes, hm)
# ----------------------------------------------------------------------------------------------------------------- #
# Panel B - Expression of validation markers #
# ----------------------------------------------------------------------------------------------------------------- #
res <- h5read(aggregated.data, file.path(mapping[["microglia"]], "gene.exp")) %>%
filter(gene %in% c("CPM","TPRG1", "MRC1")) %>%
mutate(pct.exp = 100*pct.exp) %>%
gheatmap("state", "gene", "mean.exp", "pct.exp",
scale.by = "col",
cols = green2purple,
cluster_columns = F,
row_order = atlas$microglia$state.order,
column_order = c("CPM","TPRG1", "MRC1"),
row_names_side="left",
cluster_rows=F,
row_title = " ",
border=T,
size.by.round=0)
pdf(file.path(panel.path, "s7B.pdf"), width=embed.width*.75, height=embed.height)
draw(res[[1]], merge_legends=T, annotation_legend_list = res[[2]])
while (!is.null(dev.list())) dev.off()
rm(res)
图5
source("5. Manuscript code/utils.R")
#####################################################################################################################
# Figure 5 - Cellular Landscape & State/Trait Dynamics #
#####################################################################################################################
# ----------------------------------------------------------------------------------------------------------------- #
# Panel B - Branch Probabilities #
# ----------------------------------------------------------------------------------------------------------------- #
pdf(file.path(panel.path, "5B.pdf"), width=embed.width.small*.65, height=embed.height.small)
plot.landscape(data$uns$trajectories$palantir$branch.probs %>% py_to_r %>% mutate(diff=prAD-ABA) %>% dplyr::select(diff),
smoothened = F,
size = 1,
cols = green2purple.less.white,
legend.position = c(3,3))
while (!is.null(dev.list())) dev.off()
# ----------------------------------------------------------------------------------------------------------------- #
# Panel C - State Prevalence PHATE #
# ----------------------------------------------------------------------------------------------------------------- #
pdf(file.path(panel.path, "5C.pdf"), width=embed.width.small*.75*4, height=embed.height.small*2)
plot.landscape(c("Ast.1", "Mic.12","Ast.10","Ast.5", "Mic.2","Mic.13","Oli.7","OPC.3"), enforce.same.color.scale = FALSE, smoothened = TRUE, ncol=4) %>% print(.)
while (!is.null(dev.list())) dev.off()
# ----------------------------------------------------------------------------------------------------------------- #
# Panel D - Trait Prevalence PHATE #
# ----------------------------------------------------------------------------------------------------------------- #
pdf(file.path(panel.path, "5D.pdf"), width=embed.width.small*.75, height=embed.height.small*3)
plot_grid(
plot.landscape(c("sqrt.amyloid_mf","sqrt.tangles_mf"), enforce.same.color.scale = FALSE, ncol = 1),
plot.landscape(c("cogng_demog_slope"), cols = function(n) colorRampPalette(rev(c("#30123BFF", "#4777EFFF", "#1BD0D5FF", "#62FC6BFF", "#D2E935FF", "#FE9B2DFF", "#DB3A07FF")))(n), sort.direction = -1, enforce.same.color.scale = FALSE, ncol = 1),
ncol=1, rel_heights = c(2,1)
)
while (!is.null(dev.list())) dev.off()
# ----------------------------------------------------------------------------------------------------------------- #
# Panel E - Trait Dynamics #
# ----------------------------------------------------------------------------------------------------------------- #
pdf(file.path(panel.path, "5E.pdf"), width=embed.width, height=embed.height*1.75)
plot_grid(plotlist = lapply(names(AD.traits), function(trait)
plot.dynamics(c(trait), dynamics = data$uns$trajectories$palantir$dynamics, cols=AD.traits.colors,
overlap.pseudotime=.112, ncol=2, strip.position="top", scales="free",
label = F, legend.position = c(2.5,0), include.points=F) +
labs(x=NULL, y=trait, title=NULL)),
ncol=1) %>% print()
while (!is.null(dev.list())) dev.off()
# ----------------------------------------------------------------------------------------------------------------- #
# Panel F - Landscape Bulk #
# ----------------------------------------------------------------------------------------------------------------- #
bulk <- anndata::read_h5ad("4. BEYOND/data/Celmod.subpopulation.proportion.h5ad")
# prAD-ABA landscape
pdf(file.path(panel.path, "5F.traj.prob.pdf"), width=embed.width.small, height=embed.height.small)
plot.landscape(bulk$uns$trajectories$branch.probs %>% py_to_r %>% mutate(diff=prAD.like-ABA.like) %>% dplyr::select(diff),
"X_umap",
smoothened = F,
size = 1,
cols = green2purple.less.white,
legend.position = c(3,3), data. = bulk)
while (!is.null(dev.list())) dev.off()
# Trajectories root point
pdf(file.path(panel.path, "5F.root.pdf"), width=embed.width.small, height=embed.height.small)
plot.landscape(data$obs %>% mutate(root = if_else(data$uns$trajectories$pseudotime ==0, "root", NA_character_)) %>%
dplyr::select(root), "X_umap",data. = bulk)
while (!is.null(dev.list())) dev.off()
# Landscape of key states
pdf(file.path(panel.path, "5F.states.pdf"), width=embed.width.small, height=embed.height.small)
plot.landscape(c("Ast.1","Ast.5","Ast.10","Mic.13"), "X_umap", enforce.same.color.scale = FALSE, size = 1, data. = bulk)
while (!is.null(dev.list())) dev.off()
# AD trait dynamics
pdf(file.path(panel.path, "5F.traits.pdf"),width=embed.width, height=embed.height*1.75)
plot_grid(plotlist = lapply(names(AD.traits), function(trait)
plot.dynamics(c(trait), dynamics = bulk$uns$trajectories$dynamics, cols=AD.traits.colors,
overlap.pseudotime=.2, ncol=2, strip.position="top", scales="free",
label = F, legend.position = c(2.5,0), include.points=F) +
labs(x=NULL, y=trait, title=NULL)),
ncol=1) %>% print()
while (!is.null(dev.list())) dev.off()
rm(bulk)
#####################################################################################################################
# Supp Figure 5 - Cellular Landscape & State/Trait Dynamics #
#####################################################################################################################
# ----------------------------------------------------------------------------------------------------------------- #
# Panel A - Plain 3D PHATE #
# ----------------------------------------------------------------------------------------------------------------- #
pdf(file.path(panel.path, "s8A.pdf"), width=embed.width, height=embed.height)
plot.landscape.3D(c("clusters"), theta = 135, phi = 30, cols = scales::hue_pal(), legend.position = "left")
while (!is.null(dev.list())) dev.off()
# ----------------------------------------------------------------------------------------------------------------- #
# Panel B - 3D PHATE from additional states #
# ----------------------------------------------------------------------------------------------------------------- #
pdf(file.path(panel.path, "s8B.pdf"), width=embed.width*2, height=embed.height)
plot.landscape.3D(c("Mic.6","Ast.9","Ast.2","Mic.7","Oli.8","Oli.4"), theta = 135, phi = 30, smoothened = TRUE)
while (!is.null(dev.list())) dev.off()
# ----------------------------------------------------------------------------------------------------------------- #
# Panel C - State Prevalence UMAP+tSNE #
# ----------------------------------------------------------------------------------------------------------------- #
source("4. BEYOND/utils/utils.R")
sc <- reticulate::import("scanpy")
sub <- data[,data$uns$celmod$celmod.states]
sc$external$tl$phate(sub, k = as.integer(10), n_components = as.integer(2), a = as.integer(40), knn_dist = "cosine",
mds_dist = "cosine", mds_solver = "smacof", verbose = F)
sub$obsp[["similarity_X_phate"]] <- embedding.similarity(sub$obsm[["X_phate"]], knn = 5)
pdf(file.path(panel.path, "s8C.pdf"), width=embed.width*2.5, height=embed.height)
args <- list(features = c("Ast.1","Mic.2","Mic.12","Mic.13","Oli.7","Ast.10","OPC.3","Ast.5","Ast.2","Ast.9"),
enforce.same.color.scale = FALSE,
nrow=1, size=1,
data. = data)
p1 <- plot_grid(do.call(plot.landscape, modifyList(args, list(embedding = "X_umap"))),
do.call(plot.landscape, modifyList(args, list(embedding = "X_tsne"))),
do.call(plot.landscape, modifyList(args, list(embedding = "X_phate", data.=sub))),
ncol=1)
args <- list(features = c("clusters"), cols = scales::hue_pal(), size=1, nrow=1, data. = data)
p2 <- plot_grid(do.call(plot.landscape, modifyList(args, list(embedding = "X_umap"))),
do.call(plot.landscape, modifyList(args, list(embedding = "X_tsne"))),
do.call(plot.landscape, modifyList(args, list(embedding = "X_phate", data.=sub))),
ncol=1)
plot_grid(p2, p1, ncol=2, rel_widths = c(1,10)) %>% print(.)
while (!is.null(dev.list())) dev.off()
rm(sub, sc, args, p1, p2)
# ----------------------------------------------------------------------------------------------------------------- #
# Panel D - VIA model over 3D PHATE #
# ----------------------------------------------------------------------------------------------------------------- #
df <- data.frame(pseudotime=data$uns$trajectories$via$pseudotime,
entropy=apply(py_to_r(data$uns$trajectories$via$branch.probs), 1, function(i) -sum(i*log2(i)) %>% ifelse(is.nan(.), 0, .)),
py_to_r(data$uns$trajectories$via$branch.probs))
pdf(file.path(panel.path, "s8D.pdf"), width=embed.width, height=embed.height*1.5)
plot.landscape.3D(df, ncol=2, theta = 135, phi = 30, smoothened = F, legend.position = c(3,3), enforce.same.color.scale = F, size=1.5)
while (!is.null(dev.list())) dev.off()
rm(df)
# ----------------------------------------------------------------------------------------------------------------- #
# Panel E - Palantir model over PHATE #
# ----------------------------------------------------------------------------------------------------------------- #
df <- data.frame(pseudotime=data$uns$trajectories$palantir$pseudotime,
entropy=apply(py_to_r(data$uns$trajectories$palantir$branch.probs), 1, function(i) -sum(i*log2(i)) %>% ifelse(is.nan(.), 0, .)),
py_to_r(data$uns$trajectories$palantir$branch.probs))
pdf(file.path(panel.path, "s8E.pdf"), width=embed.width*.85, height=embed.height)
plot.landscape(df, ncol=2, smoothened = F, legend.position = c(3,3), enforce.same.color.scale = F, size=1.5)
while (!is.null(dev.list())) dev.off()
rm(df)
# ----------------------------------------------------------------------------------------------------------------- #
# Panel F - Palantir VIA comparison #
# ----------------------------------------------------------------------------------------------------------------- #
m <- lapply(data$uns$trajectories, function(t) list(prob = as.matrix(py_to_r(t$branch.probs)), ps = t$pseudotime))
sig <- outer(colnames(m$palantir$prob), colnames(m$via$prob),
Vectorize(function(p,v) cor.test(m$palantir$prob[,p], m$via$prob[,v], use="pairwise.complete.obs")[["p.value"]])) %>%
p.adjust(method = "BH") %>%
cut(., c(-.1, .0001, .001, .01, .05, Inf), c("****","***", "**", "*", "")) %>% matrix(ncol=ncol(m$via$prob))
pdf(file.path(panel.path, "s8F1.pdf"), width=embed.width, height=embed.height)
Heatmap(cor(m$palantir$prob, m$via$prob, use = "pairwise.complete.obs"),
col = circlize::colorRamp2(seq(-1,1, length.out=11), green2purple(11)),
cell_fun = function(j, i, x, y, w, h, fill) grid.text(sig[i,j], x,y),
cluster_rows = F, cluster_columns = F)
while (!is.null(dev.list())) dev.off()
pdf(file.path(panel.path, "s8F2.pdf"), width=embed.width, height=embed.height)
ggplot(data.frame(p=m$palantir$ps, v=m$via$ps) %>% arrange(p),aes(p,v)) +
geom_point(size=1) +
geom_abline() +
scale_x_continuous(expand = c(.01,.01)) +
scale_y_continuous(expand = c(.01,.01)) +
ggpubr::stat_cor(aes(label = paste("Pearson", after_stat(r.label), after_stat(p.label), sep = "~`,`~")), label.y = .95) +
ggpubr::stat_cor(aes(label = paste("Spearman", after_stat(r.label), after_stat(p.label), sep = "~`,`~")), method="spearman", label.y = .9) +
theme_classic() +
labs(x="Pseudotime: Palantir", y="Pseudotime: VIA")
while (!is.null(dev.list())) dev.off()
# ----------------------------------------------------------------------------------------------------------------- #
# Panel G - Entropy over pseudotime #
# ----------------------------------------------------------------------------------------------------------------- #
df <- data.frame(pseudotime=data$uns$trajectories$palantir$pseudotime,
entropy=apply(py_to_r(data$uns$trajectories$palantir$branch.probs), 1, function(i) -sum(i*log2(i)) %>% ifelse(is.nan(.), 0, .)),
py_to_r(data$uns$trajectories$palantir$branch.probs))
pdf(file.path(panel.path, "s8G.pdf"), width=embed.width.small, height=embed.height)
ggplot(df, aes(pseudotime, entropy, color=prAD-ABA)) +
geom_ribbon(aes(x,ymax=ymax,ymin=ymin),
data.frame(x=c(0, .1), ymax=rep(1-.01, 2), ymin=rep(0,2)),
fill="black", alpha=.05, inherit.aes = F) +
geom_vline(xintercept = .1, linetype="dashed") +
geom_point(size=1.5) +
scale_color_gradientn(colors = green2purple.less.white(21),
seq(-1,1, length.out=21), na.value = "lightgrey", aesthetics = "color") +
scale_x_continuous(expand = c(0,0),
labels=function(x) recode(x, `0`="0", `1`="1", .default = as.character(x))) +
scale_y_continuous(expand = expansion(add = .01),
labels=function(x) recode(x, `0`="0", `1`="1", .default = as.character(x))) +
theme_classic() +
theme(legend.position=c(3,3))
while (!is.null(dev.list())) dev.off()
rm(df)
# ----------------------------------------------------------------------------------------------------------------- #
# Panel H - Trait Dynamics - With points #
# ----------------------------------------------------------------------------------------------------------------- #
pdf(file.path(panel.path, "s8H.pdf"), width=embed.width.small, height=embed.height)
plot_grid(plotlist = lapply(names(AD.traits), function(trait)
plot.dynamics.wrapper(data$uns$trajectories$palantir$dynamics, c(trait), cols=AD.traits.colors[trait],
overlap.pseudotime=.1, ncol=2, strip.position="top", scales="free", size=1,
label = F, legend.position = c(2.5,0), include.points=TRUE) +
labs(x=NULL, y=trait, title=NULL)),
ncol=1) %>% print()
while (!is.null(dev.list())) dev.off()
# ----------------------------------------------------------------------------------------------------------------- #
# Panel I - Replication Palantir model #
# ----------------------------------------------------------------------------------------------------------------- #
bulk <- anndata::read_h5ad("4. BEYOND/data/Celmod.subpopulation.proportion.h5ad")
df <- data.frame(pseudotime=bulk$uns$trajectories$pseudotime,
entropy=apply(py_to_r(bulk$uns$trajectories$branch.probs), 1, function(i) -sum(i*log2(i)) %>% ifelse(is.nan(.), 0, .)),
py_to_r(bulk$uns$trajectories$branch.probs))
pdf(file.path(panel.path, "s8I.clusters.pdf"), width=embed.width, height=embed.height)
plot.landscape(c("clusters"),"X_umap", legend.position = c(3,3), cols = scales::hue_pal(), data. = bulk)
while (!is.null(dev.list())) dev.off()
pdf(file.path(panel.path, "s8I.palantir.pdf"), width=embed.width*3, height=embed.height)
plot.landscape(df %>% dplyr::select(-entropy), "X_umap", ncol=3, smoothened = F, legend.position = c(3,3), enforce.same.color.scale = F, data. = bulk)
while (!is.null(dev.list())) dev.off()
pdf(file.path(panel.path, "s8J.entropy.drop.pdf"), width=embed.width.small, height=embed.height)
ggplot(df, aes(pseudotime, entropy, color=prAD.like-ABA.like)) +
geom_ribbon(aes(x,ymax=ymax,ymin=ymin),
data.frame(x=c(0, .2), ymax=rep(1-.01, 2), ymin=rep(0,2)),
fill="black", alpha=.05, inherit.aes = F) +
geom_vline(xintercept = .2, linetype="dashed") +
geom_point(size=1.5) +
scale_color_gradientn(colors = green2purple.less.white(21),
seq(-1,1, length.out=21), na.value = "lightgrey", aesthetics = "color") +
scale_x_continuous(expand = c(0,0),
labels=function(x) recode(x, `0`="0", `1`="1", .default = as.character(x))) +
scale_y_continuous(expand = expansion(add = .01),
labels=function(x) recode(x, `0`="0", `1`="1", .default = as.character(x))) +
theme_classic() +
theme(legend.position=c(3,3))
while (!is.null(dev.list())) dev.off()
rm(df)
图6
source("5. Manuscript code/utils.R")
#####################################################################################################################
# Figure 6 - Communities Structure #
#####################################################################################################################
# ----------------------------------------------------------------------------------------------------------------- #
# Panel A - Dynamics - states of interest #
# ----------------------------------------------------------------------------------------------------------------- #
pdf(file.path(panel.path, "6A.pdf"), width=embed.width, height=embed.height.small)
for(states in state.dynamics.sets) {
print(plot.dynamics(names(states), cols = states, overlap.pseudotime=.1, label=TRUE) +
theme(strip.text = element_blank(),
legend.position="none",
axis.line = element_line(),
axis.text.y = element_text()) +
labs(x=NULL, y=NULL, title=NULL))
}
while (!is.null(dev.list())) dev.off()
rm(states)
# ----------------------------------------------------------------------------------------------------------------- #
# Panel B - Dynamics & adjacency matrices #
# ----------------------------------------------------------------------------------------------------------------- #
library(dendextend)
communities <- data$uns$communities
membership <- split(data$var_names, data$var$community)
dynamics <- communities$similarities$dynamics %>% `dimnames<-`(list(data$var_names, communities$dynamics.colnames))
corr <- communities$similarities$correlation %>% `dimnames<-`(list(data$var_names, data$var_names))
dyn.adjacency <- communities$similarities$dynamics.adjacency %>% `dimnames<-`(list(data$var_names, data$var_names))
# States to annotate
mark.states <- c("Oli.2","Oli.3","Ast.1","Mic.2","OPC.1","Ast.3","Oli.6","Mic.12","Mic.13","Ast.10","Oli.7","Ast.5","OPC.3","Mic.6","Mic.7")
# Specify dynamics column-order: zero pseudotime in the middle
dynamics <- dynamics[,c(rev(colnames(dynamics)[grepl("ABA", colnames(dynamics))]),
colnames(dynamics)[grepl("prAD", colnames(dynamics))])]
# Hierarchically split community's membership, creating dynamics heatmap within each community/sub-community
hm.comms <- data$var %>% split(., as.character(.$community)) %>%
lapply(., function(inner) {
if(any(!is.na(inner$sub.community)))
lst <- split(rownames(inner), as.character(inner$sub.community))
else
lst <- list(rownames(inner))
lapply(lst, function(states) {
mtx <- dyn.adjacency[states,states] + corr[states,states]
dend <- dendsort::dendsort(hclust(dist(mtx))) %>% as.dendrogram() %>% set("labels_to_character")
prepare(Heatmap(dynamics[states,],
column_split = factor(gsub("_.*", "", colnames(dynamics)), levels=c("ABA","prAD")),
cluster_rows = dend,
cluster_row_slices = F,
cluster_columns = F,
show_column_names = F,
show_row_names = T,
show_row_dend = T,
row_dend_width = unit(10,"pt"),
col = circlize::colorRamp2(seq(-4,4,length.out=21),
colorRampPalette(c("darkorchid4","white","#E65100"))(21)),
column_title = NULL,
row_names_side = "left",
column_gap = unit(1, "pt"),
right_annotation = rowAnnotation(states = anno_mark(which(rownames(mtx) %in% mark.states), intersect(rownames(mtx), mark.states)))))
}) %>% unlist(., recursive=FALSE)
}) %>% unlist(., recursive=FALSE)
hm.comms <- hm.comms %>% `[`(c("C3", "C1.C1.1", "C1.C1.2","C2.C2.2","C2.C2.3","C2.C2.1"))
# Create annotations of traits and pseudotime
df <- data$uns$trajectories$palantir$dynamics$pred.vals %>% py_to_r(.) %>%
filter(feature %in% names(AD.traits)) %>%
mutate(col=paste(trajectory, x, sep = "_"))
traits <- df %>% reshape2::dcast(col~feature, value.var="fit") %>% column_to_rownames("col")
hm.traits <- lapply(names(AD.traits), function(trait) {
mtx <- t(traits[colnames(dynamics),trait])
Heatmap(mtx,
col = circlize::colorRamp2(seq(min(mtx), max(mtx), length.out=21),
colorRampPalette(c("white", AD.traits.colors[[trait]]))(21)),
name = AD.traits[[trait]],
cluster_columns = F,
height = unit(.25,"cm"),
right_annotation = rowAnnotation(n=anno_mark(1, trait)))
}) %>% `names<-`(names(AD.traits))
pseudotime <- df %>% dplyr::select(col, x) %>% unique() %>% dcast(1~col, value.var = "x") %>% column_to_rownames("1") %>% `[`(,colnames(dynamics)) %>% as.matrix()
pseudotime <- Heatmap(pseudotime,
col = viridis::turbo(21),
name = "pseudotime",
show_row_names = F,
show_column_names = F,
cluster_columns = F,
height = unit(.25,"cm"),
right_annotation = rowAnnotation(n=anno_mark(1, "pseudotime")))
# Save figure of all heatmaps
pdf(file.path(panel.path, "6B.1.pdf"), width=embed.width*1.5, height=embed.height*2)
draw(modifyList(modifyList(hm.comms, hm.traits), list(ps=pseudotime))
%>% base::Reduce("%v%", .), ht_gap = unit(1, "pt"))
while (!is.null(dev.list())) dev.off()
# Retrieve state order to be used in composite adjacency matrix - Fig 5c
state.order <- do.call(rbind, lapply(hm.comms, function(hm) data.frame(comm = hm@name, state=rownames(hm@matrix)[row_order(hm)])))
rm(dynamics, hm.comms, df, traits, hm.traits, pseudotime)
# Plot dynamics adjacency and state-state correlations in lower/upper triangles
dyn.adjacency <- dyn.adjacency[state.order$state, state.order$state]
corr <- corr[state.order$state, state.order$state]
marks <- sapply(mark.states, function(s) which(rownames(dyn.adjacency) == s))
matrices <- list(dyn.adjacency, corr)
colors <- list(circlize::colorRamp2(seq(0,1,length.out=21), colorRampPalette(c("white","salmon","red","firebrick4"))(21)),
circlize::colorRamp2(seq(-1,1,length.out=21), green2purple.less.white(21)))
pdf(file.path(path, "6B.2.pdf"), width=embed.width*2, height=embed.height*2)
for (i in 1:2) {
Heatmap(
matrices[[i]],
row_split = state.order$comm,
column_split = state.order$comm,
cluster_rows = F,
cluster_columns = F,
col = colors[[i]],
cell_fun = function(j, i, x, y, w, h, col) {
if(i==j) grid.rect(x,y,w,h, gp = gpar(fill="black", col=NA))
},
column_title = NULL,
row_title = NULL,
show_heatmap_legend = F,
show_row_names = F,
show_column_names = F,
row_gap = unit(2, "pt"),
column_gap = unit(2, "pt"),
right_annotation = rowAnnotation(states = anno_mark(marks, names(marks)))
) %>% draw()
}
while (!is.null(dev.list())) dev.off()
rm(dyn.adjacency, corr, breaks, marks, mark.states, state.order, communities, membership, i, matrices, colors)
# ----------------------------------------------------------------------------------------------------------------- #
# Panel C - Communities dynamics #
# ----------------------------------------------------------------------------------------------------------------- #
pdf(file.path(panel.path, "6C.pdf"), width=embed.width, height=embed.height/2)
plot.dynamics(c("C1", "C2.2","C2.3","C3"), dynamics = data$uns$communities$dynamics, cols = comm.cols, legend.position = "none", overlap.pseudotime = .1) + labs(x=NULL, y=NULL)
while (!is.null(dev.list())) dev.off()
# ----------------------------------------------------------------------------------------------------------------- #
# Panel D - Communities trait associations #
# ----------------------------------------------------------------------------------------------------------------- #
pdf(file.path(panel.path, "6D.pdf"), width=embed.width*.5, height=embed.height)
plot.trait.associations(py_to_r(data$uns$communities$trait.association) %>%
rename(state=covariate),
params = names(AD.traits),
show.only.significant = F,
column_labels = AD.traits,
column_names_rot = 45,
column_names_centered = T,
cluster_rows=F,
row_names_side = "left",
use_raster=F,
raster_quality = 10,
border=T)
while (!is.null(dev.list())) dev.off()
# ----------------------------------------------------------------------------------------------------------------- #
# Panel E - Community pathways #
# ----------------------------------------------------------------------------------------------------------------- #
states <- data$var %>%
mutate(grouping.by = recode(grouping.by, "Excitatory Neurons" = "excitatory","Inhibitory Neurons" = "inhibitory",
"Oligodendrocytes" = "oligodendrocytes", "Astrocyte" = "astrocytes","Microglia" = "microglia", "OPCs" = "opcs",
"Vascular Niche" = "endo") %>% as.character()) %>%
rownames_to_column() %>%
unstack(rowname~grouping.by)
pdf(file.path(panel.path, "6F.pdf"), width=embed.width, height=embed.height)
lapply(list(c("Mic.12","Mic.13"), c("OPC.1","Ast.3"), c("Ast.9","Ast.10","Oli.13")), function(vals)
do.call(rbind, lapply(names(states), function(ct)
h5read(data.extended, file.path(mapping[[ct]], "pa")) %>%
filter(state %in% vals & direction == "upregulated") %>%
dplyr::select(state, Description))) %>%
split(., .$state) %>%
lapply(., function(x) x$Description) %>%
`[`(rev(vals)) %>%
make_comb_mat %>%
t %>%
UpSet(set_order = vals,
comb_order = order(comb_degree(.)),
border=TRUE,
width=unit(2,"cm"),
top_annotation = NULL,
right_annotation = upset_right_annotation(., gp = gpar(fill="grey80"))) %>%
draw %>%
grid.grabExpr) %>%
plot_grid(plotlist = ., ncol = 1)
while (!is.null(dev.list())) dev.off()
# ----------------------------------------------------------------------------------------------------------------- #
# Panel E - Community pathways #
# ----------------------------------------------------------------------------------------------------------------- #
clustered.pathways <- openxlsx::read.xlsx("5. Manuscript code/data/community.pathways.xlsx", sheet = 1)
clustering.args <- openxlsx::read.xlsx("5. Manuscript code/data/community.pathways.xlsx", sheet = 2, rowNames = T)
pathway.annotations <- openxlsx::read.xlsx("5. Manuscript code/data/figures.data.xlsx", sheet = "community.pathways")
settings <- expand.grid(rownames(clustering.args), c("upregulated")) %>%
`colnames<-`(c("comm","dirc")) %>%
filter(comm %in% c("C4.2","C4.3"))
pdf(file.path(path, "6F.pdf"), width=embed.width*2, height=embed.height*2)
for(i in 1:nrow(settings)) {
message(i)
comm = as.character(settings[i, "comm"])
dirc = settings[i, "dirc"]
# Load community clustered pathways as well as "raw" state-specific pathways
states <- data$var %>%
mutate(grouping.by = recode(grouping.by,
"Excitatory Neurons" = "excitatory","Inhibitory Neurons" = "inhibitory",
"Oligodendrocytes" = "oligodendrocytes", "Astrocyte" = "astrocytes",
"Microglia" = "microglia", "OPCs" = "opcs","Vascular Niche" = "endo") %>% as.character()) %>%
filter(community == comm | sub.community == comm) %>%
`[`(c("Mic.12","Mic.13","Ast.3","OPC.1","Ast.10","Oli.7","Ast.9","Mic.11"),) %>%
rownames_to_column()
states <- split(states, states$grouping.by) %>% lapply(., function(x) x$rowname)
pathways <- do.call(rbind, lapply(names(states), function(ct)
h5read(data.extended, file.path(mapping[[ct]], "pa")) %>% filter(state %in% states[[ct]])))
pathways <- clustered.pathways %>%
dplyr::select(-gene, -geneID) %>%
tidyr::separate_rows(state, sep="/") %>%
merge(., pathways,
by.x = c("state","direction","ID", "Description"),
by.y = c("state","direction","ID", "Description"),
suffixes = c(".grouped",""))
# Prepare state~pathways data for plotting
df <- pathways %>% filter(direction == dirc) %>%
dplyr::select("state","direction","Description","p.adjust","membership") %>%
reshape2::dcast(membership+direction+Description~state, value.var = "p.adjust", fun.aggregate = mean)
# Determine clustered pathways ordering in heatmap
ord <- df[,-(1:3)] %>% mutate_all(~replace(., is.na(.), 2)) %>% as.matrix %>%
Heatmap(show_row_dend = F, show_column_dend = F, row_split = df$membership) %>% prepare()
row.ord <- row_order(ord)
col.ord <- column_order(ord)
# Plot clustered heatmap with pathway annotations
row.splitting <- stack(row.ord) %>% arrange(values) %>% pull(ind)
text <- pathway.annotations %>% filter(community == comm & direction == dirc) %>% tidyr::separate_rows(pathways, sep="/") %>% unstack(pathways~cluster)
anno <- anno_textbox(align_to = row.splitting,
text = text,
gp = gpar(fontsize=12, col="black"),
background_gp = gpar(fill="grey95", col="grey80"),
max_width=unit(6, "cm"),
text_space = unit(8, "pt"))
cols <- green2purple(3)
if(dirc == "downregulated")
cols <- cols[1:2]
else
cols <- rev(cols[2:3])
draw(Heatmap(df[stack(row.ord)[[1]],-(1:3)][,col.ord] %>% as.matrix,
col = cols,
row_split = row.splitting,
cluster_rows = F, cluster_columns = F, show_row_names = F,
border=T, row_gap = unit(0,"pt"),
na_col = "grey90",
right_annotation = rowAnnotation(pathways=anno),
height = unit(10,"cm"),
width = unit(.4*length(col.ord), "cm")))
}
while (!is.null(dev.list())) dev.off()
# ----------------------------------------------------------------------------------------------------------------- #
# Panel G - Mic.13-Ast.10 colocalization #
# ----------------------------------------------------------------------------------------------------------------- #
cols <- list(`Early/ABA`=green2purple(5)[2], prAD=green2purple(5)[4])
df <- readRDS("3. Other analyses/data/ST.validation.rds")$mic13.ast10.colocalization %>%
mutate(trajectory = case_when(trajectory %in% c("Early","ABA") ~ "Early/ABA", .default = trajectory))
pdf(file.path(panel.path, "6F.pdf"), width=embed.width.small*1.2, height=embed.height.small)
ggplot(df, aes(trajectory, cor, fill=trajectory)) +
geom_boxplot(outlier.size = 0) +
geom_point(aes(shape=is.sig)) +
scale_fill_manual(values=cols) +
scale_shape_manual(values = list("TRUE"=16, "FALSE"=1))+
theme_classic()
while (!is.null(dev.list())) dev.off()
# ----------------------------------------------------------------------------------------------------------------- #
# Panel H - Ast.10-Ast.5 exclusivity #
# ----------------------------------------------------------------------------------------------------------------- #
cols <- list(ABA=green2purple(5)[2], prAD=green2purple(5)[4], Early = "grey70")
df <- readRDS("3. Other analyses/data/ST.validation.rds")$ast10.ast5.colocalization
pdf(file.path(panel.path, "6H.1.pdf"), width=embed.width.small, height=embed.height)
df %>% pivot_longer(cols = c("Ast.10", "Ast.5"), names_to="state") %>%
select(trajectory, state, value) %>%
ggplot(aes(trajectory, value, fill=trajectory)) +
geom_violin() +
geom_point() +
facet_wrap(~state, ncol=1, scales = "free") +
theme_classic() +
theme(strip.background = element_blank(),
legend.position = "none") +
scale_fill_manual(values = cols)
while (!is.null(dev.list())) dev.off()
pdf(file.path(panel.path, "6H.2.pdf"), width=embed.width, height=embed.height)
ggplot(df %>% filter(trajectory != "Early"), aes(trajectory, `10.vs.5`, fill=trajectory)) +
geom_boxplot(outlier.size = 0) +
geom_point(aes(shape=is.sig)) +
scale_fill_manual(values=cols) +
scale_shape_manual(values = list("TRUE"=16, "FALSE"=1))+
theme_classic()
while (!is.null(dev.list())) dev.off()
#####################################################################################################################
# Supp Figure 6 - Communities Structure #
#####################################################################################################################
# ----------------------------------------------------------------------------------------------------------------- #
# Panel A - Dynamics - states of interest - including points #
# ----------------------------------------------------------------------------------------------------------------- #
pdf(file.path(panel.path, "s9A.pdf"), width=embed.width, height=embed.height.small)
for(states in state.dynamics.sets) {
print(plot.dynamics(names(states), cols = states, overlap.pseudotime=.1, label=TRUE, include.points = TRUE) +
theme(strip.text = element_blank(),
legend.position="none",
axis.line = element_line(),
axis.text.y = element_text()) +
labs(x=NULL, y=NULL, title=NULL))
}
while (!is.null(dev.list())) dev.off()
rm(states)
# ----------------------------------------------------------------------------------------------------------------- #
# Panel B - Communities node graphs #
# ----------------------------------------------------------------------------------------------------------------- #
library(ggraph)
library(tidygraph)
communities <- data$uns$communities
labelled <- c("Ast.1","Mic.2","Oli.3", "Mic.6","Mic.7","OPC.3","Ast.5","Mic.12","Mic.13","OPC.1","Oli.6","Ast.3","Oli.7","Ast.10")
graph <-
# Convert similarity matrices to undirected flat edge dataframes
lapply(c("correlation", "dynamics.adjacency"), function(n)
communities$similarities[[n]] %>%
`dimnames<-`(list(data$var_names, data$var_names)) %>%
melt %>%
filter(value != 0 & Var1 != Var2) %>%
mutate(type=n)) %>%
do.call(rbind, .) %>%
# Convert to tidygraph - with workaround for duplicated edges
as_tbl_graph(directed=FALSE) %>%
activate(edges) %>%
group_by(from, to, value, type) %>%
filter(row_number() == 1) %>%
ungroup() %>%
# Append community information
activate(nodes) %>%
mutate(community = data$var[name, "community"],
sub.community = data$var[name, "sub.community"],
grouping.by = data$var[name,"grouping.by"],
label = ifelse(name %in% labelled, name, NA_character_)) %>%
activate(edges) %>%
mutate(cross = .N()$community[from] != .N()$community[to]) %>%
activate(nodes)
pos <- list(C1=c(0,0), C2 = c(-2, -.5), C3=c(2, -.5))
la <- lapply(names(pos), function(comm)
graph %>% filter(community == comm) %>%
activate(edges) %>%
filter(abs(value) >= .1) %>%
create_layout(layout = "graphopt") %>%
mutate(across(c(x,y), ~scales::rescale(., c(-.5,.5))),
x = x+pos[[comm]][1], y = y+pos[[comm]][2])
) %>% do.call(rbind, .) %>%
column_to_rownames("name") %>%
`[`(graph %>% pull(name),)
pdf(file.path(panel.path, "s9B.pdf"),width=3*1.5, height=3)
for(configuration in list(list("correlation", .2, TRUE),
list("correlation", .2, FALSE),
list("dynamics.adjacency", .2, TRUE),
list("dynamics.adjacency", .2, FALSE))) {
# Subset edges based on configuration
.sub <- graph %>% activate(edges) %>%
filter(type == configuration[[1]] &
abs(value) >= configuration[[2]] &
cross == configuration[[3]]) %>%
activate(nodes)
p <- ggraph(.sub, layout = "manual", x=la$x, y=la$y)
if (configuration[[3]]) # If plotting between community edges
p <- p + geom_edge_fan(aes(colour = value, edge_width=abs(value)/50), n=10, alpha=.6)
p <- p + ggforce::geom_mark_hull(
aes(x, y, group = community),
fill="grey90", concavity = 5, expand = unit(2, "mm"), alpha = .6)
if(!configuration[[3]]) # If plotting within community edges
p <- p + geom_edge_fan(aes(colour = value, edge_width=abs(value)/50), n=10, alpha=.6)
if(configuration[[1]] == "correlation")
p <- p + scale_edge_colour_gradient2(low=green2purple(2)[[1]], high = green2purple(2)[[2]], limit=c(-1,1))
else
p <- p + scale_edge_colour_gradient2(low="grey", high = "firebrick4", limit=c(0,1))
p <- p +
scale_edge_width_binned(range=c(.1, 1), breaks=c(.2,.4,.6,.8,1)) +
geom_node_point(aes(fill=grouping.by), size=2, shape=21) +
scale_fill_manual(values = cell.group.color) +
geom_node_text(aes(filter = !is.na(label), label = label), colour = "black", repel = TRUE) +
theme(panel.background = element_blank())
print(p)
}
dev.off()
rm(p, configuration, la, pos, graph, labelled, communities)
# ----------------------------------------------------------------------------------------------------------------- #
# Panel C - Communities dynamics #
# ----------------------------------------------------------------------------------------------------------------- #
pdf(file.path(panel.path, "s9C.pdf"), width=embed.width, height=embed.height*1.5)
plot_grid(plot.dynamics(c("C1", "C2","C3"), dynamics = data$uns$communities$dynamics, cols = comm.cols, include.points = TRUE, legend.position = c(3,3), overlap.pseudotime = .1) + labs(x=NULL, y=NULL),
plot.dynamics(c("C1.1","C1.2"), dynamics = data$uns$communities$dynamics, cols = comm.cols, include.points = TRUE,legend.position = c(3,3), overlap.pseudotime = .1) + labs(x=NULL, y=NULL),
plot.dynamics(c("C2.1","C2.2","C2.3"), dynamics = data$uns$communities$dynamics, cols = comm.cols, include.points = TRUE,legend.position = c(3,3), overlap.pseudotime = .1) + labs(x=NULL, y=NULL),
ncol=1)
while (!is.null(dev.list())) dev.off()
# ----------------------------------------------------------------------------------------------------------------- #
# Panel D - Replication state dynamics #
# ----------------------------------------------------------------------------------------------------------------- #
bulk <- anndata::read_h5ad("4. BEYOND/data/Celmod.subpopulation.proportion.h5ad")
pdf(file.path(panel.path, "s9D.pdf"), width=embed.width, height=embed.height.small)
for(states in state.dynamics.sets) {
print(plot.dynamics(names(states), cols = states, data. = bulk, dynamics = bulk$uns$trajectories$dynamics, overlap.pseudotime=.2, label=TRUE, include.points = TRUE) +
theme(strip.text = element_blank(),
legend.position="none",
axis.line = element_line(),
axis.text.y = element_text()) +
labs(x=NULL, y=NULL, title=NULL))}
while (!is.null(dev.list())) dev.off()
rm(states, bulk)
# ----------------------------------------------------------------------------------------------------------------- #
# Panel E - ST participants in landscape #
# ----------------------------------------------------------------------------------------------------------------- #
cols <- list(ABA=green2purple(2)[1], prAD=green2purple(2)[2], Early = "grey30")
df <- data$obsm$X_core_phate %>% `colnames<-`(c("PHATE_1","PHATE_2")) %>% filter(!is.na(PHATE_1)) %>%
merge(data$X[,c("Mic.13","Ast.10","Ast.5")], by.x="row.names", by.y="row.names") %>%
merge(data.frame(data$uns$trajectories$palantir$branch.probs %>% py_to_r, pseudotime = data$uns$trajectories$palantir$pseudotime), by.x="Row.names", by.y="row.names") %>%
merge(readRDS("3. Other analyses/data/ST.validation.state.signatures.rds") %>% select(trajectory, sample) %>% unique, by.x="Row.names", by.y="sample", all.x=TRUE) %>%
arrange(!is.na(trajectory))
pdf(file.path(panel.path, "s9E.pdf"), width=embed.width, height=embed.height)
ggplot(df, aes(PHATE_1, PHATE_2, color=trajectory)) +
geom_point() +
ggrepel::geom_label_repel(aes(label=Row.names),data = df %>% filter(!is.na(trajectory))) +
scale_color_manual(values = cols, na.value = "lightgrey") +
theme_classic() +
theme(axis.text = element_blank(),
axis.ticks = element_blank(),
legend.position = "none") +
labs(x=NULL, y=NULL)
while (!is.null(dev.list())) dev.off()
# ----------------------------------------------------------------------------------------------------------------- #
# Panel F - Mic.13-Ast.10 spatial correlation #
# ----------------------------------------------------------------------------------------------------------------- #
cols <- list(ABA=green2purple(5)[2], prAD=green2purple(5)[4], Early = "grey70")
df <- readRDS("3. Other analyses/data/ST.validation.state.signatures.rds") %>%
arrange(trajectory, sample) %>% mutate(sample=paste(trajectory, sample))
pdf(file.path(panel.path, "s9F.pdf"), width=embed.height.small*length(unique(df$sample)), height=embed.height.small)
ggplot(df , aes(Mic.13, Ast.10, group="1")) +
geom_point(size=.25, color="grey") +
geom_smooth(method="lm", color="black") +
facet_wrap(~sample, scales = "free", nrow=1) +
theme_classic() +
theme(strip.background = element_blank())
while (!is.null(dev.list())) dev.off()
# ----------------------------------------------------------------------------------------------------------------- #
# Panel G - Ast.10-Ast.5 spatial incompatibility #
# ----------------------------------------------------------------------------------------------------------------- #
cols <- list(Ast.5=green2purple(5)[2], Ast.10=green2purple(5)[4])
df <- readRDS("3. Other analyses/data/ST.validation.state.signatures.rds") %>%
arrange(trajectory, sample) %>% mutate(sample=paste(trajectory, sample))
pdf(file.path(panel.path, "s9G.pdf"), width=embed.height.small*.8*length(unique(df$sample)), height=embed.height.small)
df %>% pivot_longer(cols = c("Ast.10","Ast.5"), names_to = "state") %>%
arrange(trajectory) %>%
ggplot(., aes(state, value, fill=state)) +
geom_violin(draw_quantiles = c(0.25, 0.5, 0.75)) +
facet_wrap(~sample, scales = "free", nrow=1) +
theme_classic() +
theme(strip.background = element_blank(),
legend.position = "none") +
scale_fill_manual(values = cols)
while (!is.null(dev.list())) dev.off()