复现CFEP(五): 附图数据和代码

介绍

图4代码,数据通过网盘下载

  • 百度链接: https://pan.baidu.com/s/1nAPCdfxNeGnoPR_f3urLSg

  • 提取码: 8xhs

Pre-requisites

These packages are necessary to make the plots that make up Supplementary Figure 1.

library("ggplot2")
library("lemon")
library("tidyverse")
library("cowplot")
library("grid")

library("gridExtra")
library("ggpubr")

library(SummarizedExperiment)![请添加图片描述](https://i-blog.csdnimg.cn/direct/611dd46db7534d39900319a677d27929.jpeg)

library(GenomicRanges)
library(textshape)
library(here)

Supplementary Figure 1:

loc <- "../data/motifs_cfdna_144_167.rds"
t <- readRDS(loc)
t1_144 <- t[which((t$group == "seq144") & (t$motif_length == 1)),]
t1_144$pos <-  t1_144$pos - 10
t1_144[which(t1_144$pos >= 0),]$pos <- t1_144[which(t1_144$pos >= 0),]$pos + 1
t1_144[which(t1_144$pos >= 145),]$pos <- t1_144[which(t1_144$pos >= 145),]$pos + 1
t1_144 <- t1_144 %>%
  group_by(pos) %>%
  mutate(relFreq = Freq / sum(Freq))
plot <- ggplot(t1_144, aes(x=pos,y=relFreq, fill=Var1)) +
  geom_bar(stat = "identity") +
  theme_classic(base_size = 20) +
  theme(text = element_text(size=16),
        legend.title = element_blank(),
        axis.title.y = element_text(margin = margin(r = 20, l=20)),
        axis.title.x = element_text(margin = margin(t = 20, b=20))) +
  scale_fill_manual(values = c("#77C8DD", "#004766", "#77dd77", "#00660a")) +
  scale_x_continuous(breaks = c(1,144), name = "Position in the cfDNA fragment") +
  scale_y_continuous(name = "Relative frequency")
ggsave("../output/Supplementary_Fig_1.jpg", plot = plot, device = "jpeg",width = 20, height=12,units = "in", dpi=300)

请添加图片描述

Supplementary Figure 2:

t <- readRDS("../data/position_in_read.rds")
t_normalizer <- unique(t[,c(5,6)])
t_normalizer$normal <- t_normalizer$somatic_all / mean(t_normalizer$somatic_all)
t$normal <- t_normalizer[match(t$case, t_normalizer$case),]$normal
t$n <- t$n / t$normal
t_b_small <- tibble()
for (i in c(0.1, 0.2, 0.3, 0.4, 0.5)) {
  t_b_small_temp <- t[which(t$beta <= i),] %>% 
    group_by(diff, case, motif) %>%
    summarise(n = sum(n))
  t_b_small_temp$beta <- i
  t_b_small <- rbind(t_b_small, t_b_small_temp)
}
#t_b_small$beta <- paste0("<= ", t_b_small$beta)
t_b_large <- tibble()
for (i in c(0.5, 0.6, 0.7, 0.8, 0.9)) {
  t_b_large_temp <- t[which(t$beta >= i),] %>% 
    group_by(diff, case, motif) %>%
    summarise(n = sum(n))
  t_b_large_temp$beta <- 1 - i
  t_b_large <- rbind(t_b_large, t_b_large_temp)
}
#t_b_large$beta <- paste0(">= ", t_b_large$beta)


t_b_small$group <- "Unmethylated"
t_b_large$group <- "Methylated"

t_b <- rbind(t_b_small, t_b_large)
t_b$beta <- factor(t_b$beta, levels = c(0.1, 0.2, 0.3, 0.4, 0.5))


for (i in unique(t_b$beta)) {
  t_b[which((t_b$beta == i) & (t_b$motif == "CG") & (t_b$group == "Unmethylated")),]$n <- t_b[which((t_b$beta == i) & (t_b$motif == "CG") & (t_b$group == "Unmethylated")),]$n * (mean(t_b[which((t_b$beta == i) & (t_b$motif == "CG") & (t_b$group == "Methylated")),]$n) / mean(t_b[which((t_b$beta == i) & (t_b$motif == "CG") & (t_b$group == "Unmethylated")),]$n))
    t_b[which((t_b$beta == i) & (t_b$motif == "CCG") & (t_b$group == "Unmethylated")),]$n <- t_b[which((t_b$beta == i) & (t_b$motif == "CCG") & (t_b$group == "Unmethylated")),]$n * (mean(t_b[which((t_b$beta == i) & (t_b$motif == "CCG") & (t_b$group == "Methylated")),]$n) / mean(t_b[which((t_b$beta == i) & (t_b$motif == "CCG") & (t_b$group == "Unmethylated")),]$n))
}

t_b.summary <- t_b %>%
  group_by(diff, beta, motif, group) %>%
  summarise(
    sd = sd(n, na.rm = TRUE),
    n = mean(n)
  )
t_b.summary$beta <- factor(t_b.summary$beta, levels = c(0.1, 0.2, 0.3, 0.4, 0.5))
t_b$group_beta <- paste0(t_b$group, "_", t_b$beta)
t_b.summary$group_beta <- paste0(t_b.summary$group, "_", t_b.summary$beta)
t_b$beta <- as.numeric(as.character(t_b$beta))
t_b$beta <- paste0("Methylated: beta >= ", 1 - t_b$beta, ";\nUnmethylated: beta <= ", t_b$beta)
t_b.summary$beta <- as.numeric(as.character(t_b.summary$beta))
t_b.summary$beta <- paste0("Methylated: beta >= ", 1 - t_b.summary$beta, ";\nUnmethylated: beta <= ", t_b.summary$beta)

plot_cg <- ggplot(t_b[which((t_b$diff >= 1) & (t_b$diff <= 40) & (t_b$motif == "CG")),], aes(x=diff, y=n, colour=group_beta, fill=group_beta)) +
  geom_point(position=position_jitter(h=0.01, w=0.4), alpha = 1, size = 0.1) +
  geom_bar(stat = "identity", data = t_b.summary[which((t_b.summary$diff>= 1) & (t_b.summary$diff <= 40) & (t_b.summary$motif == "CG")),], colour="black", alpha=0.7) +
   geom_errorbar(
    aes(ymin = n-sd, ymax = n+sd),
    data = t_b.summary[which((t_b.summary$diff>= 1) & (t_b.summary$diff <= 40) & (t_b.summary$motif == "CG")),], width = 0.2, colour="black") +
  facet_grid(rows = vars(beta),
             cols = vars(group),
             scales = "free_y",
             space = "fixed") +
  theme_classic(base_size = 20) +
  theme(text = element_text(size=16),
        axis.title.y = element_text(margin = margin(l = 20, r = 20)),
        axis.title.x = element_text(margin = margin(t = 20, b = 40)),
        strip.background=element_rect(color = NA),
        panel.spacing=unit(2, "lines"),
        strip.placement = "top",
        legend.position = "none",
        strip.text.y.right = element_text(angle = 0)) +
  scale_fill_manual(values=c("#004766", "#255672", "#3d667e", "#53768A", "#698696", "#77c8dd", "#93cfe3", "#acd7e7", "#c2deeb", "#d7e6ee")) +
  scale_colour_manual(values=c("#004766", "#255672", "#3d667e", "#53768A", "#698696", "#77c8dd", "#93cfe3", "#acd7e7", "#c2deeb", "#d7e6ee")) +
  ylab("") +
  xlab("Position in fragment")

ggsave("../output/Supplementary_Fig_2.jpg", plot = plot_cg, device = "jpeg",width = 20, height=12,units = "in", dpi=300)

#CCG's locations in reads (not included, but shows a pattern that is expe)
#plot_ccg <- ggplot(t_b[which((t_b$diff >= 1) & (t_b$diff <= 40) & (t_b$motif == "CCG")),], aes(x=diff, y=n, colour=group_beta, fill=group_beta)) +
#  geom_point(position=position_jitter(h=0.01, w=0.4), alpha = 1, size = 0.1) +
#  geom_bar(stat = "identity", data = t_b.summary[which((t_b.summary$diff>= 1) & (t_b.summary$diff <= 40) & (t_b.summary$motif == "CCG")),], colour="black", alpha=0.7) +
#   geom_errorbar(
#    aes(ymin = n-sd, ymax = n+sd),
#    data = t_b.summary[which((t_b.summary$diff>= 1) & (t_b.summary$diff <= 40) & (t_b.summary$motif == "CCG")),], width = 0.2, colour="black") +
#  facet_grid(rows = vars(beta),
#             cols = vars(group),
#             scales = "free_y",
#             space = "fixed") +
#  theme_classic(base_size = 20) +
#  theme(text = element_text(size=16),
#        axis.title.y = element_text(margin = margin(l = 20, r = 20)),
#        axis.title.x = element_text(margin = margin(t = 20, b = 40)),
#        strip.background=element_rect(color = NA),
#        panel.spacing=unit(2, "lines"),
#        strip.placement = "top",
#        legend.position = "none",
#        strip.text.y.right = element_text(angle = 0)) +
#  scale_fill_manual(values=c("#004766", "#255672", "#3d667e", "#53768A", "#698696", "#77c8dd", "#93cfe3", "#acd7e7", "#c2deeb", "#d7e6ee")) +
#  scale_colour_manual(values=c("#004766", "#255672", "#3d667e", "#53768A", "#698696", "#77c8dd", "#93cfe3", "#acd7e7", "#c2deeb", "#d7e6ee")) +
#  ylab("") +
#  xlab("Position in fragment")

#ggsave("../output/Supplementary_Fig_2_NOTINCLUDED_CCG.jpg", plot = plot_ccg, device = "jpeg",width = 20, height=12,units = "in", dpi=300)

请添加图片描述

Supplementary Figure 3A:

m <- readRDS("../data/diff_startfrags_3bp.rds")
acg_m <- data.frame("motif" = "ACG",
                     "counts" = m[1,] + m[2,],
                     "rel_counts" = ((m[1,] + m[2,]) / sum((m[1,] + m[2,]))) * 51,
                     "pos" = c(-25:25),
                     "meth" = "Methylated")
ccg_m <- data.frame("motif" = "CCG",
                     "counts" = m[3,] + m[4,],
                     "rel_counts" = ((m[3,] + m[4,]) / sum((m[3,] + m[4,]))) * 51,
                     "pos" = c(-25:25),
                     "meth" = "Methylated")
gcg_m <- data.frame("motif" = "GCG",
                     "counts" = m[5,] + m[6,],
                     "rel_counts" = ((m[5,] + m[6,]) / sum((m[5,] + m[6,]))) * 51,
                     "pos" = c(-25:25),
                     "meth" = "Methylated")
tcg_m <- data.frame("motif" = "TCG",
                     "counts" = m[7,] + m[8,],
                     "rel_counts" = ((m[7,] + m[8,]) / sum((m[7,] + m[8,]))) * 51,
                     "pos" = c(-25:25),
                     "meth" = "Methylated")
acg_u <- data.frame("motif" = "ACG",
                     "counts" = m[9,] + m[10,],
                     "rel_counts" = ((m[9,] + m[10,]) / sum((m[9,] + m[10,]))) * 51,
                     "pos" = c(-25:25),
                     "meth" = "Unmethylated")
ccg_u <- data.frame("motif" = "CCG",
                     "counts" = m[11,] + m[12,],
                     "rel_counts" = ((m[11,] + m[12,]) / sum((m[11,] + m[12,]))) * 51,
                     "pos" = c(-25:25),
                     "meth" = "Unmethylated")
gcg_u <- data.frame("motif" = "GCG",
                     "counts" = m[13,] + m[14,],
                     "rel_counts" = ((m[13,] + m[14,]) / sum((m[13,] + m[14,]))) * 51,
                     "pos" = c(-25:25),
                     "meth" = "Unmethylated")
tcg_u <- data.frame("motif" = "TCG",
                     "counts" = m[15,] + m[16,],
                     "rel_counts" = ((m[15,] + m[16,]) / sum((m[15,] + m[16,]))) * 51,
                     "pos" = c(-25:25),
                     "meth" = "Unmethylated")
acg_d <- data.frame("motif" = "ACG",
                     "counts" = (m[9,] + m[10,]) - (m[1,] + m[2,]),
                     "rel_counts" = - (((m[9,] + m[10,]) / sum((m[9,] + m[10,]))) * 51) + (((m[1,] + m[2,]) / sum((m[1,] + m[2,]))) * 51),
                     "pos" = c(-25:25),
                     "meth" = "Difference")
ccg_d <- data.frame("motif" = "CCG",
                     "counts" = (m[11,] + m[12,]) - (m[3,] + m[4,]),
                     "rel_counts" = - (((m[11,] + m[12,]) / sum((m[11,] + m[12,]))) * 51) + (((m[3,] + m[4,]) / sum((m[3,] + m[4,]))) * 51),
                     "pos" = c(-25:25),
                     "meth" = "Difference")
gcg_d <- data.frame("motif" = "GCG",
                     "counts" = (m[13,] + m[14,]) - (m[5,] + m[6,]),
                     "rel_counts" = - (((m[13,] + m[14,]) / sum((m[13,] + m[14,]))) * 51) + (((m[5,] + m[6,]) / sum((m[5,] + m[6,]))) * 51),
                     "pos" = c(-25:25),
                     "meth" = "Difference")
tcg_d <- data.frame("motif" = "TCG",
                     "counts" = (m[15,] + m[16,]) - (m[7,] + m[8,]),
                     "rel_counts" = - (((m[15,] + m[16,]) / sum((m[15,] + m[16,]))) * 51) + (((m[7,] + m[8,]) / sum((m[7,] + m[8,]))) * 51),
                     "pos" = c(-25:25),
                     "meth" = "Difference")

all <- rbind(acg_m, acg_u, acg_d,
             ccg_m, ccg_u, ccg_d,
             gcg_m, gcg_u, gcg_d,
             tcg_m, tcg_u, tcg_d)

all$meth <- factor(all$meth, levels=c("Unmethylated", "Methylated", "Difference"))
all$scale <- all$pos
all[which(all$scale %in% c(0,1)),]$scale <- "C"
all[which(all$scale == 2),]$scale <- "G"
all[which((all$motif == "ACG") & (all$scale == -1)),]$scale <- "A"
all[which((all$motif == "CCG") & (all$scale == -1)),]$scale <- "C"
all[which((all$motif == "GCG") & (all$scale == -1)),]$scale <- "G"
all[which((all$motif == "TCG") & (all$scale == -1)),]$scale <- "T"
all <- all[which(all$pos %in% c(-19:21)),]
all$pos <- factor(all$pos, levels = c(-19:21))
all$motif <- factor(all$motif, levels = c("CCG", "GCG","ACG", "TCG"))

p <- ggplot(all, aes(x=pos, y=rel_counts, fill=meth)) +
  geom_bar(stat="identity") +
  scale_fill_manual(values=c("#77C8DD", "#004766", "#c3775d")) +
  facet_grid(meth~motif, scales = "free_y") + 
  coord_capped_cart(bottom='both') +
  theme_classic(base_size = 20) +
  theme(#axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
        text = element_text(size=16),
        axis.text.x = element_text(size=11),
        strip.background=element_rect(color = NA,  fill=NA),
        legend.position = "none",
        panel.spacing = unit(2, "lines"),
        axis.title.y = element_text(margin = margin(r = 20, l = 20)),
        axis.title.x = element_text(margin = margin(t = 20, b = 20)),
        strip.text.x = element_text(size = 11, hjust = 0.5)) +
  scale_y_continuous(name = "Amount of cfDNA fragments starting (normalized)") +
  scale_x_discrete(name ="Position around motif",
                  breaks=c(-19,-9,0,1,2,11,21),
                  labels=c("-20", "-10", "A", "C", "G", "10", "20")
                  ) +
  geom_vline(aes(xintercept=19.5), linetype=2) +
  geom_vline(aes(xintercept=22.5), linetype=2)

gt <- ggplotGrob(p)
gt$grobs[[18]]$children[[2]]$grobs[[2]]$children[[1]]$label <- 
  c("-20","-10","C","C","G","10","20")
gt$grobs[[19]]$children[[2]]$grobs[[2]]$children[[1]]$label <- 
  c("-20","-10","G","C","G","10","20")
gt$grobs[[20]]$children[[2]]$grobs[[2]]$children[[1]]$label <- 
  c("-20","-10","A","C","G","10","20")
gt$grobs[[21]]$children[[2]]$grobs[[2]]$children[[1]]$label <- 
  c("-20","-10","T","C","G","10","20")

ga <- ggplotify::as.ggplot(gt)

#ggsave(paste("../output/Supplementary_Fig_3_3bp.jpg"), plot =ga, device = "jpeg",width = 20, height=12,units = "in", dpi=300)

在这里插入图片描述

Supplementary Figure 3B:

m <- readRDS("../data/diff_startfrags_4bp.rds")
accg_m <- data.frame("motif" = "ACCG",
                     "counts" = m[1,] + m[2,],
                     "rel_counts" = ((m[1,] + m[2,]) / sum((m[1,] + m[2,]))) * 51,
                     "pos" = c(-25:25),
                     "meth" = "Methylated")
cccg_m <- data.frame("motif" = "CCCG",
                     "counts" = m[3,] + m[4,],
                     "rel_counts" = ((m[3,] + m[4,]) / sum((m[3,] + m[4,]))) * 51,
                     "pos" = c(-25:25),
                     "meth" = "Methylated")
gccg_m <- data.frame("motif" = "GCCG",
                     "counts" = m[5,] + m[6,],
                     "rel_counts" = ((m[5,] + m[6,]) / sum((m[5,] + m[6,]))) * 51,
                     "pos" = c(-25:25),
                     "meth" = "Methylated")
tccg_m <- data.frame("motif" = "TCCG",
                     "counts" = m[7,] + m[8,],
                     "rel_counts" = ((m[7,] + m[8,]) / sum((m[7,] + m[8,]))) * 51,
                     "pos" = c(-25:25),
                     "meth" = "Methylated")
accg_u <- data.frame("motif" = "ACCG",
                     "counts" = m[9,] + m[10,],
                     "rel_counts" = ((m[9,] + m[10,]) / sum((m[9,] + m[10,]))) * 51,
                     "pos" = c(-25:25),
                     "meth" = "Unmethylated")
cccg_u <- data.frame("motif" = "CCCG",
                     "counts" = m[11,] + m[12,],
                     "rel_counts" = ((m[11,] + m[12,]) / sum((m[11,] + m[12,]))) * 51,
                     "pos" = c(-25:25),
                     "meth" = "Unmethylated")
gccg_u <- data.frame("motif" = "GCCG",
                     "counts" = m[13,] + m[14,],
                     "rel_counts" = ((m[13,] + m[14,]) / sum((m[13,] + m[14,]))) * 51,
                     "pos" = c(-25:25),
                     "meth" = "Unmethylated")
tccg_u <- data.frame("motif" = "TCCG",
                     "counts" = m[15,] + m[16,],
                     "rel_counts" = ((m[15,] + m[16,]) / sum((m[15,] + m[16,]))) * 51,
                     "pos" = c(-25:25),
                     "meth" = "Unmethylated")
accg_d <- data.frame("motif" = "ACCG",
                     "counts" = (m[9,] + m[10,]) - (m[1,] + m[2,]),
                     "rel_counts" = - (((m[9,] + m[10,]) / sum((m[9,] + m[10,]))) * 51) + (((m[1,] + m[2,]) / sum((m[1,] + m[2,]))) * 51),
                     "pos" = c(-25:25),
                     "meth" = "Difference")
cccg_d <- data.frame("motif" = "CCCG",
                     "counts" = (m[11,] + m[12,]) - (m[3,] + m[4,]),
                     "rel_counts" = - (((m[11,] + m[12,]) / sum((m[11,] + m[12,]))) * 51) + (((m[3,] + m[4,]) / sum((m[3,] + m[4,]))) * 51),
                     "pos" = c(-25:25),
                     "meth" = "Difference")
gccg_d <- data.frame("motif" = "GCCG",
                     "counts" = (m[13,] + m[14,]) - (m[5,] + m[6,]),
                     "rel_counts" = - (((m[13,] + m[14,]) / sum((m[13,] + m[14,]))) * 51) + (((m[5,] + m[6,]) / sum((m[5,] + m[6,]))) * 51),
                     "pos" = c(-25:25),
                     "meth" = "Difference")
tccg_d <- data.frame("motif" = "TCCG",
                     "counts" = (m[15,] + m[16,]) - (m[7,] + m[8,]),
                     "rel_counts" = - (((m[15,] + m[16,]) / sum((m[15,] + m[16,]))) * 51) + (((m[7,] + m[8,]) / sum((m[7,] + m[8,]))) * 51),
                     "pos" = c(-25:25),
                     "meth" = "Difference")

all <- rbind(accg_m, accg_u, accg_d,
             cccg_m, cccg_u, cccg_d,
             gccg_m, gccg_u, gccg_d,
             tccg_m, tccg_u, tccg_d)

all$meth <- factor(all$meth, levels=c("Unmethylated", "Methylated", "Difference"))
all$scale <- all$pos
all[which(all$scale %in% c(-25:-21, -19:-11, -9:-2, 3:9, 11:19, 21:25)),]$scale <- ""
all[which(all$scale %in% c(0,1)),]$scale <- "C"
all[which(all$scale == 2),]$scale <- "G"
all[which((all$motif == "ACCG") & (all$scale == -1)),]$scale <- "A"
all[which((all$motif == "CCCG") & (all$scale == -1)),]$scale <- "C"
all[which((all$motif == "GCCG") & (all$scale == -1)),]$scale <- "G"
all[which((all$motif == "TCCG") & (all$scale == -1)),]$scale <- "T"
all <- all[which(all$pos %in% c(-20:20)),]
all$pos <- factor(all$pos, levels = c(-20:20))
all$motif <- factor(all$motif, levels = c("CCCG", "GCCG","ACCG", "TCCG"))

p <- ggplot(all, aes(x=pos, y=rel_counts, fill=meth)) +
  geom_bar(stat="identity") +
  scale_fill_manual(values=c("#77C8DD", "#004766", "#c3775d")) +
  facet_grid(meth~motif, scales = "free_y") + 
  coord_capped_cart(bottom='both') +
  theme_classic(base_size = 20) +
  theme(#axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
        text = element_text(size=16),
        axis.text.x = element_text(size=11),
        strip.background=element_rect(color = NA,  fill=NA),
        legend.position = "none",
        panel.spacing = unit(2, "lines"),
        axis.title.y = element_text(margin = margin(r = 20, l = 20)),
        axis.title.x = element_text(margin = margin(t = 20, b = 20)),
        strip.text.x = element_text(size = 11, hjust = 0.513)) +
  scale_y_continuous(name = "Amount of cfDNA fragments starting (normalized)") +
  scale_x_discrete(name ="Position around motif",
                  breaks=c(-20,-10,-1,0,1,2,10,20),
                  labels=c("-20", "-10", "A", "C", "C", "G", "10", "20")
                  ) +
  geom_vline(aes(xintercept=19.5), linetype=2) +
  geom_vline(aes(xintercept=23.5), linetype=2)

gt <- ggplotGrob(p)
gt$grobs[[18]]$children[[2]]$grobs[[2]]$children[[1]]$label <- c("-20","-10","C","C","C","G","10","20")
gt$grobs[[19]]$children[[2]]$grobs[[2]]$children[[1]]$label <- c("-20","-10","G","C","C","G","10","20")
gt$grobs[[20]]$children[[2]]$grobs[[2]]$children[[1]]$label <- c("-20","-10","A","C","C","G","10","20")
gt$grobs[[21]]$children[[2]]$grobs[[2]]$children[[1]]$label <- c("-20","-10","T","C","C","G","10","20")

gb <- ggplotify::as.ggplot(gt)

#ggsave(paste("../output/Supplementary_Fig_3_4bp.jpg"), plot =gb, device = "jpeg",width = 20, height=12,units = "in", dpi=300)

在这里插入图片描述

Supplementary Figure 3: combine Supplementary Figure 3A, Supplementary Figure 3B

g <- plot_grid(ga,gb, rel_widths = c(1,1), labels=c("a", "b"), ncol=1,label_size = 20)
#ggsave(paste("../output/Fig_3EF.pdf"), plot = small_size, device = "pdf",width = 20, height=12,units = "in", dpi=600, bg = "white", scale = 0.78)
ggsave(paste("../output/Supplementary_Fig_3.jpg"), plot = g, device = "jpg",width = 20, height=24,units = "in", dpi=600, bg = "white", scale = 1)

在这里插入图片描述

Supplementary Figure 4: an updated version of Figure 2b and 2c based on Loyfer data

t <- readRDS("../data/Loyfer_50_ratios.rds")
t[which(nchar(t$seq) == 3),]$seq <- paste0(substr(t[which(nchar(t$seq) == 3),]$seq, 1, 1), " | ", substr(t[which(nchar(t$seq) == 3),]$seq, 2, 3))
t[which(nchar(t$seq) == 4),]$seq <- paste0(substr(t[which(nchar(t$seq) == 4),]$seq, 1, 1), " | ", substr(t[which(nchar(t$seq) == 4),]$seq, 2, 4))
t$seq <- factor(t$seq, levels = c("C | CG", "G | CG", "A | CG", "T | CG",
                                  "C | CCG", "G | CCG", "A | CCG", "T | CCG"))
#t$beta <- round(t$beta, digits = 1)
t <- t %>%
  group_by(seq, beta) %>%
  summarise(start = sum(start), over = sum(over))
t$ratio <- t$start / t$over
t_add <- t %>%
  group_by(seq) %>%
  summarise(beta = 1, start = 0, over = 1, ratio = 0)
t <- rbind(t, t_add)
plot2b <- ggplot(t[which(nchar(as.character(t$seq)) == 6),], aes(x=factor(beta), y=ratio, fill=beta)) +
  geom_bar(stat="identity", position = "dodge") +
  facet_wrap(~seq, nrow=1) +
  theme_classic(base_size = 20) +
  #xlab("Methylation (beta-value)") +
  #ylab(expression(atop("Fraction of cfDNA fragments", paste("starting or ending at CGs")))) +
  ylab("Fraction of cfDNA fragments\nstarting or ending at CGs") + 
  scale_y_continuous(labels = scales::percent, limits = c(0,0.0075),
                     breaks = c(0.0075,0.005,0.0025,0)) +
  scale_x_discrete(labels=c("0", "1"),
                   breaks = c(0, 1)) +
  theme(text = element_text(size=16),
        axis.text.x = element_text(angle = 0, hjust=0.5),
        #axis.text.x = element_blank(),
        #axis.title.y = element_text(margin = margin(r = 5)),
        axis.title.x = element_blank(),
        strip.background=element_rect(color = NA),
        panel.spacing=unit(2, "lines"),
        strip.placement = "outside",
        legend.position = "none",
        axis.title.y = element_text(margin = margin(r = 20, l = 20))) +
  scale_fill_gradient(low="#77C8DD", high="#004766")

plot2c <- ggplot(t[which(nchar(as.character(t$seq)) == 7),], aes(x=factor(beta), y=ratio, fill=beta)) +
  geom_bar(stat="identity", position = "dodge") +
  facet_wrap(~seq, nrow=1) +
  theme_classic(base_size = 20) +
  xlab("Methylation (beta-value)") +
  #ylab(expression(atop("Fraction of cfDNA fragments", paste("starting or ending at CCGs")))) +
  ylab("Fraction of cfDNA fragments\nstarting or ending at CCGs") + 
  scale_y_continuous(labels = scales::percent, limits = c(0,0.008),
                     breaks = c(0.0075,0.005,0.0025,0)) +
  scale_x_discrete(labels=c("0", "1"),
                   breaks = c(0,1)) +
  theme(text = element_text(size=16),
        axis.text.x = element_text(angle = 0, hjust=0.5),
        axis.title.x = element_text(margin = margin(t = 20, b=20)),
        #axis.title.y = element_text(margin = margin(r = 5)),
        strip.background=element_rect(color = NA),
        panel.spacing=unit(2, "lines"),
        strip.placement = "outside",
        legend.position = "none",
        axis.title.y = element_text(margin = margin(r = 20, l = 20))) +
  scale_fill_gradient(low="#77C8DD", high="#004766")

#title <- textGrob("window: +/- 50 bps (total: 101 bps)", gp = gpar(col = "black", fontsize = 19))
plot2bc_50 <- plot_grid(plot2b, plot2c, ncol = 1, align="v", axis="bt", rel_heights = c(0.9, 1), labels = c("a", "b"), label_size = 20)
#plot2BC <- plot_grid(plot2b, plot2c, ncol = 1, align="v", axis="bt", rel_heights = c(5, 5), labels = c("b", "c"), label_size = 20)
ggsave("../output/Supplementary_Fig_4.jpg", plot = plot2bc_50, device = "jpeg",width = 20, height=12,units = "in", dpi=300)

请添加图片描述

Supplementary Figure 5:

t <- readRDS("../data/healthy_cpggroups.rds")
t$nchar <- nchar(t$motif)
t <- t %>%
  group_by(group, beta, nchar) %>%
  summarise(nstart = sum(start_all), nover = sum(over_all))
colnames(t) <- c("group", "beta", "nchar", "start_all", "over_all")
t$group <- sub("hg19_cpg_", "", t$group)
t$group <- sub("inter", "Open sea", t$group)
t$group <- sub("islands", "Islands", t$group)
t$group <- sub("shores", "Shores", t$group)
t$group <- sub("shelves", "Shelves", t$group)
t$group <- factor(t$group, levels = c("Islands", "Shores", "Shelves", "Open sea"))
#t[which(nchar(t$motif) == 3),]$motif <- paste0(substr(t[which(nchar(t$motif) == 3),]$motif, 1,1), " | ", substr(t[which(nchar(t$motif) == 3),]$motif, 2,3))
#t[which(nchar(t$motif) == 4),]$motif <- paste0(substr(t[which(nchar(t$motif) == 4),]$motif, 1,1), " | ", substr(t[which(nchar(t$motif) == 4),]$motif, 2,4))
t$motif <- "cg"
t[which(t$nchar == 3),]$motif <- "A/T/C/G | CG"
t[which(t$nchar == 4),]$motif <- "A/T/C/G | CCG"
t$motif <- factor(t$motif, levels = c("A/T/C/G | CG", "A/T/C/G | CCG"))
t$ratio <- t$start_all / t$over_all
plot1 <- ggplot(t[which(nchar(as.character(t$motif)) == 12),], aes(x=beta, y=ratio)) +
  geom_bar(stat="identity", position = "dodge", aes(fill=beta)) +
  facet_grid(motif~group) +
  theme_classic(base_size = 20) + 
  theme(text = element_text(size=16),
        axis.text.x = element_text(angle = 0, hjust=0.5),
        axis.title.y = element_text(margin = margin(r = 20, l=20)),
        axis.title.x = element_blank(),
        strip.background=element_rect(color = NA),
        panel.spacing=unit(2, "lines"),
        strip.placement = "outside",
        legend.position = "none") +
  scale_fill_gradient(low="#77C8DD", high="#004766") + 
  scale_y_continuous(labels = scales::percent, limits = c(0,0.01),
                     breaks = c(0.01, 0.0075,0.005,0.0025,0)) +
  scale_x_continuous(labels=c("0", "1"),
                   breaks = c(0, 1)) +
  #xlab("Methylation (beta-value)") +
  ylab("Fraction of cfDNA fragments\nstarting or ending at CGs")
plot2 <- ggplot(t[which(nchar(as.character(t$motif)) == 13),], aes(x=beta, y=ratio)) +
  geom_bar(stat="identity", position = "dodge", aes(fill=beta)) +
  facet_grid(motif~group) +
  theme_classic(base_size = 20) + 
  theme(text = element_text(size=16),
        axis.text.x = element_text(angle = 0, hjust=0.5),
        axis.title.y = element_text(margin = margin(r = 20, l=20)),
        axis.title.x = element_text(margin = margin(t = 20, b=20)),
        strip.background=element_rect(color = NA),
        panel.spacing=unit(2, "lines"),
        strip.placement = "outside",
        legend.position = "none") +
  scale_fill_gradient(low="#77C8DD", high="#004766") + 
  scale_y_continuous(labels = scales::percent, limits = c(0,0.01),
                     breaks = c(0.01, 0.0075,0.005,0.0025,0)) +
  scale_x_continuous(labels=c("0", "1"),
                   breaks = c(0, 1)) +
  xlab("Methylation (beta-value)") +
  ylab("Fraction of cfDNA fragments\nstarting or ending at CCGs")
plot <- plot_grid(plot1, plot2, nrow=2, rel_heights = c(0.9,1))
ggsave(paste("../output/Supplementary_Fig_5.jpg"), plot = plot, device = "jpeg",width = 20, height=12,units = "in", dpi=300)

t[which(t$motif == "A/T/C/G | CG"),] %>%
  group_by(group) %>%
  summarise(min = min(ratio), max = max(ratio), ratio = max(ratio)/min(ratio))

在这里插入图片描述

Supplementary Figure 6: an updated version of Supplementary Figure 5, based on Loyfer data.

t <- readRDS("../data/Loyfer_50_ratios.rds")
t[which(nchar(t$seq) == 3),]$seq <- paste0(substr(t[which(nchar(t$seq) == 3),]$seq, 1, 1), " | ", substr(t[which(nchar(t$seq) == 3),]$seq, 2, 3))
t[which(nchar(t$seq) == 4),]$seq <- paste0(substr(t[which(nchar(t$seq) == 4),]$seq, 1, 1), " | ", substr(t[which(nchar(t$seq) == 4),]$seq, 2, 4))
t$seq <- factor(t$seq, levels = c("C | CG", "G | CG", "A | CG", "T | CG",
                                  "C | CCG", "G | CCG", "A | CCG", "T | CCG"))
#t$beta <- round(t$beta, digits = 1)
t$char <- nchar(as.character(t$seq))
t <- t %>%
  group_by(beta, group, char) %>%
  summarise(start = sum(start), over = sum(over))
t$ratio <- t$start / t$over
t_add <- t %>%
  group_by(char, group) %>%
  summarise(beta = 1, start = 0, over = 1, ratio = 0)
t <- rbind(t, t_add)
t$group2 <- "NA"
t[which(t$char == 6),]$group2 <- "A/T/C/G | CG"
t[which(t$char == 7),]$group2 <- "A/T/C/G | CCG"
t$group2 <- factor(t$group2, levels = c("A/T/C/G | CG", "A/T/C/G | CCG"))
t$group <- sub("hg19_cpg_", "", t$group)
t$group <- sub("inter", "Open sea", t$group)
t$group <- sub("islands", "Islands", t$group)
t$group <- sub("shores", "Shores", t$group)
t$group <- sub("shelves", "Shelves", t$group)
t$group <- factor(t$group, levels = c("Islands", "Shores", "Shelves", "Open sea"))
plot1 <- ggplot(t[which(nchar(as.character(t$group2)) == 12),], aes(x=factor(beta), y=ratio, fill=beta)) +
  geom_bar(stat="identity", position = "dodge") +
  facet_grid(group2~group) +
  theme_classic(base_size = 20) +
  #xlab("Methylation (beta-value)") +

  ylab("Fraction of cfDNA fragments\nstarting or ending at CGs") + 
  scale_y_continuous(labels = scales::percent, limits = c(0,0.0075),
                     breaks = c(0.0075,0.005,0.0025,0)) +
  scale_x_discrete(labels=c("0", "1"),
                   breaks = c(0, 1)) +
  theme(text = element_text(size=16),
        axis.text.x = element_text(angle = 0, hjust=0.5),
        #axis.text.x = element_blank(),
        #axis.title.y = element_text(margin = margin(r = 5)),
        axis.title.x = element_blank(),
        strip.background=element_rect(color = NA),
        panel.spacing=unit(2, "lines"),
        strip.placement = "outside",
        legend.position = "none",
        axis.title.y = element_text(margin = margin(r = 20, l = 20))) +
  scale_fill_gradient(low="#77C8DD", high="#004766")
plot2 <- ggplot(t[which(nchar(as.character(t$group2)) == 13),], aes(x=factor(beta), y=ratio, fill=beta)) +
  geom_bar(stat="identity", position = "dodge") +
  facet_grid(group2~group) +
  theme_classic(base_size = 20) +
  xlab("Methylation (beta-value)") +
  ylab("Fraction of cfDNA fragments\nstarting or ending at CCGs") + 
  scale_y_continuous(labels = scales::percent, limits = c(0,0.0075),
                     breaks = c(0.0075,0.005,0.0025,0)) +
  scale_x_discrete(labels=c("0", "1"),
                   breaks = c(0, 1)) +
  theme(text = element_text(size=16),
        axis.text.x = element_text(angle = 0, hjust=0.5),
        #axis.text.x = element_blank(),
        #axis.title.y = element_text(margin = margin(r = 5)),
        axis.title.x = element_text(margin = margin(t = 20, b=20)),
        strip.background=element_rect(color = NA),
        panel.spacing=unit(2, "lines"),
        strip.placement = "outside",
        legend.position = "none",
        axis.title.y = element_text(margin = margin(r = 20, l = 20))) +
  scale_fill_gradient(low="#77C8DD", high="#004766")
plot <- plot_grid(plot1, plot2, nrow=2, rel_heights = c(0.90,1))

ggsave("../output/Supplementary_Fig_6.jpg", plot = plot, device = "jpeg",width = 20, height=12,units = "in", dpi=300)

在这里插入图片描述

Supplementary Figure 7A: window = +/- 50bps

t <- readRDS("../data/Moss_ratios.rds")
t <- t[-which(t$chr %in% c("chrX", "chrY")),]

acg_fwd <- t[which(substr(t$seq, 2, 4) == "ACG"),c(2,3,4,5,8,9)]
acg_bwd <- t[which(substr(t$seq, 3, 5) == "CGT"),c(2,3,4,5,10,11)]
colnames(acg_bwd) <- colnames(acg_fwd)
acg <- rbind(acg_fwd, acg_bwd)
acg$group <- "A | CG"
ccg_fwd <- t[which(substr(t$seq, 2, 4) == "CCG"),c(2,3,4,5,8,9)]
ccg_bwd <- t[which(substr(t$seq, 3, 5) == "CGG"),c(2,3,4,5,10,11)]
colnames(ccg_bwd) <- colnames(ccg_fwd)
ccg <- rbind(ccg_fwd, ccg_bwd)
ccg$group <- "C | CG"
gcg_fwd <- t[which(substr(t$seq, 2, 4) == "GCG"),c(2,3,4,5,8,9)]
gcg_bwd <- t[which(substr(t$seq, 3, 5) == "CGC"),c(2,3,4,5,10,11)]
colnames(gcg_bwd) <- colnames(gcg_fwd)
gcg <- rbind(gcg_fwd, gcg_bwd)
gcg$group <- "G | CG"
tcg_fwd <- t[which(substr(t$seq, 2, 4) == "TCG"),c(2,3,4,5,8,9)]
tcg_bwd <- t[which(substr(t$seq, 3, 5) == "CGA"),c(2,3,4,5,10,11)]
colnames(tcg_bwd) <- colnames(gcg_fwd)
tcg <- rbind(tcg_fwd, tcg_bwd)
tcg$group <- "T | CG"
t <- rbind(acg, ccg, gcg, tcg)

all10bars <- t
all10bars$subgroup <- round(all10bars$beta, digits = 1)
all10bars$group <- factor(all10bars$group, levels=c("C | CG", "G | CG", "A | CG", "T | CG"))

all10bars %>%
  group_by(group, subgroup) %>%
  summarise(ratio = sum(start_cg) / sum(start_cg_over)) %>%
  group_by(group) %>%
  summarise(min = min(ratio), max = max(ratio), ratio = max(ratio) / min(ratio))

plot2b <- all10bars %>%
  group_by(group, subgroup) %>%
  summarise(ratio = sum(start_cg) / sum(start_cg_over)) %>%
  ggplot(aes(x=factor(subgroup), y=ratio, fill=subgroup)) +
  geom_bar(stat="identity", position = "dodge") +
  facet_wrap(~group, nrow=1) +
  theme_classic(base_size = 20) +
  #xlab("Methylation (beta-value)") +
  #ylab(expression(atop("Fraction of cfDNA fragments", paste("starting or ending at CGs")))) +
  ylab("Fraction of cfDNA fragments\nstarting or ending at CGs") + 
  scale_y_continuous(labels = scales::percent, limits = c(0,0.0105),
                     breaks = c(0.01,0.0075,0.005,0.0025,0)) +
  scale_x_discrete(labels=c("0", "1"),
                   breaks = c(0,1)) +
  theme(text = element_text(size=16),
        axis.text.x = element_text(angle = 0, hjust=0.5),
        #axis.text.x = element_blank(),
        #axis.title.y = element_text(margin = margin(r = 5)),
        axis.title.x = element_blank(),
        strip.background=element_rect(color = NA),
        panel.spacing=unit(2, "lines"),
        strip.placement = "outside",
        legend.position = "none",
        axis.title.y = element_text(margin = margin(r = 20, l = 20))) +
  scale_fill_gradient(low="#77C8DD", high="#004766")


t <- readRDS("../data/Moss_ratios.rds")
t <- t[-which(t$chr %in% c("chrX", "chrY")),]

accg_fwd <- t[which(substr(t$seq, 1, 4) == "ACCG"),c(2,3,4,5,6,7)]
accg_bwd <- t[which(substr(t$seq, 3, 6) == "CGGT"),c(2,3,4,5,12,13)]
colnames(accg_bwd) <- colnames(accg_fwd)
accg <- rbind(accg_fwd, accg_bwd)
accg$group <- "A | CCG"
cccg_fwd <- t[which(substr(t$seq, 1, 4) == "CCCG"),c(2,3,4,5,6,7)]
cccg_bwd <- t[which(substr(t$seq, 3, 6) == "CGGG"),c(2,3,4,5,12,13)]
colnames(cccg_bwd) <- colnames(cccg_fwd)
cccg <- rbind(cccg_fwd, cccg_bwd)
cccg$group <- "C | CCG"
gccg_fwd <- t[which(substr(t$seq, 1, 4) == "GCCG"),c(2,3,4,5,6,7)]
gccg_bwd <- t[which(substr(t$seq, 3, 6) == "CGGC"),c(2,3,4,5,12,13)]
colnames(gccg_bwd) <- colnames(gccg_fwd)
gccg <- rbind(gccg_fwd, gccg_bwd)
gccg$group <- "G | CCG"
tccg_fwd <- t[which(substr(t$seq, 1, 4) == "TCCG"),c(2,3,4,5,6,7)]
tccg_bwd <- t[which(substr(t$seq, 3, 6) == "CGGA"),c(2,3,4,5,12,13)]
colnames(tccg_bwd) <- colnames(gccg_fwd)
tccg <- rbind(tccg_fwd, tccg_bwd)
tccg$group <- "T | CCG"
t <- rbind(accg, cccg, gccg, tccg)

all10bars <- t
all10bars$subgroup <- round(all10bars$beta, digits = 1)
all10bars$group <- factor(all10bars$group, levels=c("C | CCG", "G | CCG", "A | CCG", "T | CCG"))

all10bars %>%
  group_by(group, subgroup) %>%
  summarise(ratio = sum(start_ncg) / sum(start_ncg_over)) %>%
  group_by(group) %>%
  summarise(min = min(ratio), max = max(ratio), ratio = max(ratio) / min(ratio))

plot2c <- all10bars %>%
  group_by(group, subgroup) %>%
  summarise(ratio = sum(start_ncg) / sum(start_ncg_over)) %>%
  ggplot(aes(x=factor(subgroup), y=ratio, fill=subgroup)) +
  geom_bar(stat="identity", position = "dodge") +
  facet_wrap(~group, nrow=1) +
  theme_classic(base_size = 20) +
  #xlab("Methylation (beta-value)") +
  #ylab(expression(atop("Fraction of cfDNA fragments", paste("starting or ending at CGs")))) +
  ylab("Fraction of cfDNA fragments\nstarting or ending at CGs") + 
  scale_y_continuous(labels = scales::percent, limits = c(0,0.0125),
                     breaks = c(0.0125, 0.01,0.0075,0.005,0.0025,0)) +
  scale_x_discrete(labels=c("0", "1"),
                   breaks = c(0,1)) +
  theme(text = element_text(size=16),
        axis.text.x = element_text(angle = 0, hjust=0.5),
        #axis.text.x = element_blank(),
        #axis.title.y = element_text(margin = margin(r = 5)),
        axis.title.x = element_blank(),
        strip.background=element_rect(color = NA),
        panel.spacing=unit(2, "lines"),
        strip.placement = "outside",
        legend.position = "none",
        axis.title.y = element_text(margin = margin(r = 20, l = 20))) +
  scale_fill_gradient(low="#77C8DD", high="#004766")

title <- textGrob("window: +/- 50 bps (total: 101 bps)", gp = gpar(col = "black", fontsize = 19))
plot2bc_50 <- plot_grid(title, plot2b, plot2c, ncol = 1, align="v", axis="bt", rel_heights = c(1, 5, 5), labels = c("a", "", ""), label_size = 20)
plot2BC <- plot_grid(plot2b, plot2c, ncol = 1, align="v", axis="bt", rel_heights = c(5, 5), labels = c("", ""), label_size = 20)
#ggsave("../output/figure2bc_window50bp.jpg", plot = plot2bc_50, device = "jpeg",width = 20, height=12,units = "in", dpi=300)

在这里插入图片描述

Supplementary Figure 7B: window = +/- 75bps

t <- readRDS("../data/Moss_75_ratios.rds")
t <- t[-which(t$chr %in% c("chrX", "chrY")),]

acg_fwd <- t[which(substr(t$seq, 2, 4) == "ACG"),c(2,3,4,5,8,9)]
acg_bwd <- t[which(substr(t$seq, 3, 5) == "CGT"),c(2,3,4,5,10,11)]
colnames(acg_bwd) <- colnames(acg_fwd)
acg <- rbind(acg_fwd, acg_bwd)
acg$group <- "A | CG"
ccg_fwd <- t[which(substr(t$seq, 2, 4) == "CCG"),c(2,3,4,5,8,9)]
ccg_bwd <- t[which(substr(t$seq, 3, 5) == "CGG"),c(2,3,4,5,10,11)]
colnames(ccg_bwd) <- colnames(ccg_fwd)
ccg <- rbind(ccg_fwd, ccg_bwd)
ccg$group <- "C | CG"
gcg_fwd <- t[which(substr(t$seq, 2, 4) == "GCG"),c(2,3,4,5,8,9)]
gcg_bwd <- t[which(substr(t$seq, 3, 5) == "CGC"),c(2,3,4,5,10,11)]
colnames(gcg_bwd) <- colnames(gcg_fwd)
gcg <- rbind(gcg_fwd, gcg_bwd)
gcg$group <- "G | CG"
tcg_fwd <- t[which(substr(t$seq, 2, 4) == "TCG"),c(2,3,4,5,8,9)]
tcg_bwd <- t[which(substr(t$seq, 3, 5) == "CGA"),c(2,3,4,5,10,11)]
colnames(tcg_bwd) <- colnames(gcg_fwd)
tcg <- rbind(tcg_fwd, tcg_bwd)
tcg$group <- "T | CG"
t <- rbind(acg, ccg, gcg, tcg)

all10bars <- t
all10bars$subgroup <- round(all10bars$beta, digits = 1)
all10bars$group <- factor(all10bars$group, levels=c("C | CG", "G | CG", "A | CG", "T | CG"))

all10bars %>%
  group_by(group, subgroup) %>%
  summarise(ratio = sum(start_cg) / sum(start_cg_over)) %>%
  group_by(group) %>%
  summarise(min = min(ratio), max = max(ratio), ratio = max(ratio) / min(ratio))

plot2b_75 <- all10bars %>%
  group_by(group, subgroup) %>%
  summarise(ratio = sum(start_cg) / sum(start_cg_over)) %>%
  ggplot(aes(x=factor(subgroup), y=ratio, fill=subgroup)) +
  geom_bar(stat="identity", position = "dodge") +
  facet_wrap(~group, nrow=1) +
  theme_classic(base_size = 20) +
  #xlab("Methylation (beta-value)") +
  #ylab(expression(atop("Fraction of cfDNA fragments", paste("starting or ending at CGs")))) +
  ylab("Fraction of cfDNA fragments\nstarting or ending at CGs") + 
  scale_y_continuous(labels = scales::percent, limits = c(0,0.009),
                     breaks = c(0.0075,0.005,0.0025,0)) +
  scale_x_discrete(labels=c("0", "1"),
                   breaks = c(0,1)) +
  theme(text = element_text(size=16),
        axis.text.x = element_text(angle = 0, hjust=0.5),
        #axis.text.x = element_blank(),
        #axis.title.y = element_text(margin = margin(r = 5)),
        axis.title.x = element_blank(),
        strip.background=element_rect(color = NA),
        panel.spacing=unit(2, "lines"),
        strip.placement = "outside",
        legend.position = "none",
        axis.title.y = element_text(margin = margin(r = 20, l = 20))) +
  scale_fill_gradient(low="#77C8DD", high="#004766")


t <- readRDS("../data/Moss_75_ratios.rds")
t <- t[-which(t$chr %in% c("chrX", "chrY")),]

accg_fwd <- t[which(substr(t$seq, 1, 4) == "ACCG"),c(2,3,4,5,6,7)]
accg_bwd <- t[which(substr(t$seq, 3, 6) == "CGGT"),c(2,3,4,5,12,13)]
colnames(accg_bwd) <- colnames(accg_fwd)
accg <- rbind(accg_fwd, accg_bwd)
accg$group <- "A | CCG"
cccg_fwd <- t[which(substr(t$seq, 1, 4) == "CCCG"),c(2,3,4,5,6,7)]
cccg_bwd <- t[which(substr(t$seq, 3, 6) == "CGGG"),c(2,3,4,5,12,13)]
colnames(cccg_bwd) <- colnames(cccg_fwd)
cccg <- rbind(cccg_fwd, cccg_bwd)
cccg$group <- "C | CCG"
gccg_fwd <- t[which(substr(t$seq, 1, 4) == "GCCG"),c(2,3,4,5,6,7)]
gccg_bwd <- t[which(substr(t$seq, 3, 6) == "CGGC"),c(2,3,4,5,12,13)]
colnames(gccg_bwd) <- colnames(gccg_fwd)
gccg <- rbind(gccg_fwd, gccg_bwd)
gccg$group <- "G | CCG"
tccg_fwd <- t[which(substr(t$seq, 1, 4) == "TCCG"),c(2,3,4,5,6,7)]
tccg_bwd <- t[which(substr(t$seq, 3, 6) == "CGGA"),c(2,3,4,5,12,13)]
colnames(tccg_bwd) <- colnames(gccg_fwd)
tccg <- rbind(tccg_fwd, tccg_bwd)
tccg$group <- "T | CCG"
t <- rbind(accg, cccg, gccg, tccg)

all10bars <- t
all10bars$subgroup <- round(all10bars$beta, digits = 1)
all10bars$group <- factor(all10bars$group, levels=c("C | CCG", "G | CCG", "A | CCG", "T | CCG"))

all10bars %>%
  group_by(group, subgroup) %>%
  summarise(ratio = sum(start_ncg) / sum(start_ncg_over)) %>%
  group_by(group) %>%
  summarise(min = min(ratio), max = max(ratio), ratio = max(ratio) / min(ratio))

plot2c_75 <- all10bars %>%
  group_by(group, subgroup) %>%
  summarise(ratio = sum(start_ncg) / sum(start_ncg_over)) %>%
  ggplot(aes(x=factor(subgroup), y=ratio, fill=subgroup)) +
  geom_bar(stat="identity", position = "dodge") +
  facet_wrap(~group, nrow=1) +
  theme_classic(base_size = 20) +
  #xlab("Methylation (beta-value)") +
  #ylab(expression(atop("Fraction of cfDNA fragments", paste("starting or ending at CGs")))) +
  ylab("Fraction of cfDNA fragments\nstarting or ending at CGs") + 
  scale_y_continuous(labels = scales::percent, limits = c(0,0.011),
                     breaks = c(0.01,0.0075,0.005,0.0025,0)) +
  scale_x_discrete(labels=c("0", "1"),
                   breaks = c(0,1)) +
  theme(text = element_text(size=16),
        axis.text.x = element_text(angle = 0, hjust=0.5),
        #axis.text.x = element_blank(),
        #axis.title.y = element_text(margin = margin(r = 5)),
        axis.title.x = element_blank(),
        strip.background=element_rect(color = NA),
        panel.spacing=unit(2, "lines"),
        strip.placement = "outside",
        legend.position = "none",
        axis.title.y = element_text(margin = margin(r = 20, l = 20))) +
  scale_fill_gradient(low="#77C8DD", high="#004766")

title <- textGrob("window: +/- 75 bps (total: 151 bps)", gp = gpar(col = "black", fontsize = 19))
plot2bc_75 <- plot_grid(title, plot2b_75, plot2c_75, ncol = 1, align="v", axis="bt", rel_heights = c(1, 5, 5), labels = c("b", "", ""), label_size = 20)
#ggsave("../output/figure2bc_window75bp.jpg", plot = plot2bc_75, device = "jpeg",width = 20, height=12,units = "in", dpi=300)

在这里插入图片描述

Supplementary Figure 7C: window = +/- 100bps

t <- readRDS("../data/Moss_100_ratios.rds")
t <- t[-which(t$chr %in% c("chrX", "chrY")),]

acg_fwd <- t[which(substr(t$seq, 2, 4) == "ACG"),c(2,3,4,5,8,9)]
acg_bwd <- t[which(substr(t$seq, 3, 5) == "CGT"),c(2,3,4,5,10,11)]
colnames(acg_bwd) <- colnames(acg_fwd)
acg <- rbind(acg_fwd, acg_bwd)
acg$group <- "A | CG"
ccg_fwd <- t[which(substr(t$seq, 2, 4) == "CCG"),c(2,3,4,5,8,9)]
ccg_bwd <- t[which(substr(t$seq, 3, 5) == "CGG"),c(2,3,4,5,10,11)]
colnames(ccg_bwd) <- colnames(ccg_fwd)
ccg <- rbind(ccg_fwd, ccg_bwd)
ccg$group <- "C | CG"
gcg_fwd <- t[which(substr(t$seq, 2, 4) == "GCG"),c(2,3,4,5,8,9)]
gcg_bwd <- t[which(substr(t$seq, 3, 5) == "CGC"),c(2,3,4,5,10,11)]
colnames(gcg_bwd) <- colnames(gcg_fwd)
gcg <- rbind(gcg_fwd, gcg_bwd)
gcg$group <- "G | CG"
tcg_fwd <- t[which(substr(t$seq, 2, 4) == "TCG"),c(2,3,4,5,8,9)]
tcg_bwd <- t[which(substr(t$seq, 3, 5) == "CGA"),c(2,3,4,5,10,11)]
colnames(tcg_bwd) <- colnames(gcg_fwd)
tcg <- rbind(tcg_fwd, tcg_bwd)
tcg$group <- "T | CG"
t <- rbind(acg, ccg, gcg, tcg)

all10bars <- t
all10bars$subgroup <- round(all10bars$beta, digits = 1)
all10bars$group <- factor(all10bars$group, levels=c("C | CG", "G | CG", "A | CG", "T | CG"))

all10bars %>%
  group_by(group, subgroup) %>%
  summarise(ratio = sum(start_cg) / sum(start_cg_over)) %>%
  group_by(group) %>%
  summarise(min = min(ratio), max = max(ratio), ratio = max(ratio) / min(ratio))

plot2b_100 <- all10bars %>%
  group_by(group, subgroup) %>%
  summarise(ratio = sum(start_cg) / sum(start_cg_over)) %>%
  ggplot(aes(x=factor(subgroup), y=ratio, fill=subgroup)) +
  geom_bar(stat="identity", position = "dodge") +
  facet_wrap(~group, nrow=1) +
  theme_classic(base_size = 20) +
  #xlab("Methylation (beta-value)") +
  #ylab(expression(atop("Fraction of cfDNA fragments", paste("starting or ending at CGs")))) +
  ylab("Fraction of cfDNA fragments\nstarting or ending at CGs") + 
  scale_y_continuous(labels = scales::percent, limits = c(0,0.0075),
                     breaks = c(0.0075,0.005,0.0025,0)) +
  scale_x_discrete(labels=c("0", "1"),
                   breaks = c(0,1)) +
  theme(text = element_text(size=16),
        axis.text.x = element_text(angle = 0, hjust=0.5),
        #axis.text.x = element_blank(),
        #axis.title.y = element_text(margin = margin(r = 5)),
        axis.title.x = element_blank(),
        strip.background=element_rect(color = NA),
        panel.spacing=unit(2, "lines"),
        strip.placement = "outside",
        legend.position = "none",
        axis.title.y = element_text(margin = margin(r = 20, l = 20))) +
  scale_fill_gradient(low="#77C8DD", high="#004766")


t <- readRDS("../data/Moss_100_ratios.rds")
t <- t[-which(t$chr %in% c("chrX", "chrY")),]

accg_fwd <- t[which(substr(t$seq, 1, 4) == "ACCG"),c(2,3,4,5,6,7)]
accg_bwd <- t[which(substr(t$seq, 3, 6) == "CGGT"),c(2,3,4,5,12,13)]
colnames(accg_bwd) <- colnames(accg_fwd)
accg <- rbind(accg_fwd, accg_bwd)
accg$group <- "A | CCG"
cccg_fwd <- t[which(substr(t$seq, 1, 4) == "CCCG"),c(2,3,4,5,6,7)]
cccg_bwd <- t[which(substr(t$seq, 3, 6) == "CGGG"),c(2,3,4,5,12,13)]
colnames(cccg_bwd) <- colnames(cccg_fwd)
cccg <- rbind(cccg_fwd, cccg_bwd)
cccg$group <- "C | CCG"
gccg_fwd <- t[which(substr(t$seq, 1, 4) == "GCCG"),c(2,3,4,5,6,7)]
gccg_bwd <- t[which(substr(t$seq, 3, 6) == "CGGC"),c(2,3,4,5,12,13)]
colnames(gccg_bwd) <- colnames(gccg_fwd)
gccg <- rbind(gccg_fwd, gccg_bwd)
gccg$group <- "G | CCG"
tccg_fwd <- t[which(substr(t$seq, 1, 4) == "TCCG"),c(2,3,4,5,6,7)]
tccg_bwd <- t[which(substr(t$seq, 3, 6) == "CGGA"),c(2,3,4,5,12,13)]
colnames(tccg_bwd) <- colnames(gccg_fwd)
tccg <- rbind(tccg_fwd, tccg_bwd)
tccg$group <- "T | CCG"
t <- rbind(accg, cccg, gccg, tccg)

all10bars <- t
all10bars$subgroup <- round(all10bars$beta, digits = 1)
all10bars$group <- factor(all10bars$group, levels=c("C | CCG", "G | CCG", "A | CCG", "T | CCG"))

all10bars %>%
  group_by(group, subgroup) %>%
  summarise(ratio = sum(start_ncg) / sum(start_ncg_over)) %>%
  group_by(group) %>%
  summarise(min = min(ratio), max = max(ratio), ratio = max(ratio) / min(ratio))

plot2c_100 <- all10bars %>%
  group_by(group, subgroup) %>%
  summarise(ratio = sum(start_ncg) / sum(start_ncg_over)) %>%
  ggplot(aes(x=factor(subgroup), y=ratio, fill=subgroup)) +
  geom_bar(stat="identity", position = "dodge") +
  facet_wrap(~group, nrow=1) +
  theme_classic(base_size = 20) +
  #xlab("Methylation (beta-value)") +
  #ylab(expression(atop("Fraction of cfDNA fragments", paste("starting or ending at CGs")))) +
  ylab("Fraction of cfDNA fragments\nstarting or ending at CGs") + 
  scale_y_continuous(labels = scales::percent, limits = c(0,0.0095),
                     breaks = c(0.0075,0.005,0.0025,0)) +
  scale_x_discrete(labels=c("0", "1"),
                   breaks = c(0,1)) +
  theme(text = element_text(size=16),
        axis.text.x = element_text(angle = 0, hjust=0.5),
        #axis.text.x = element_blank(),
        #axis.title.y = element_text(margin = margin(r = 5)),
        axis.title.x = element_blank(),
        strip.background=element_rect(color = NA),
        panel.spacing=unit(2, "lines"),
        strip.placement = "outside",
        legend.position = "none",
        axis.title.y = element_text(margin = margin(r = 20, l = 20))) +
  scale_fill_gradient(low="#77C8DD", high="#004766")

title <- textGrob("window: +/- 100 bps (total: 201 bps)", gp = gpar(col = "black", fontsize = 19))
plot2bc_100 <- plot_grid(title, plot2b_100, plot2c_100, ncol = 1, align="v", axis="bt", rel_heights = c(1, 5, 5), labels = c("c", "", ""), label_size = 20)
#ggsave("../output/figure2bc_window100bp.jpg", plot = plot2bc_100, device = "jpeg",width = 20, height=12,units = "in", dpi=300)

在这里插入图片描述

Supplementary Figure 7D: window = +/- 125bps

t <- readRDS("../data/Moss_125_ratios.rds")
t <- t[-which(t$chr %in% c("chrX", "chrY")),]

acg_fwd <- t[which(substr(t$seq, 2, 4) == "ACG"),c(2,3,4,5,8,9)]
acg_bwd <- t[which(substr(t$seq, 3, 5) == "CGT"),c(2,3,4,5,10,11)]
colnames(acg_bwd) <- colnames(acg_fwd)
acg <- rbind(acg_fwd, acg_bwd)
acg$group <- "A | CG"
ccg_fwd <- t[which(substr(t$seq, 2, 4) == "CCG"),c(2,3,4,5,8,9)]
ccg_bwd <- t[which(substr(t$seq, 3, 5) == "CGG"),c(2,3,4,5,10,11)]
colnames(ccg_bwd) <- colnames(ccg_fwd)
ccg <- rbind(ccg_fwd, ccg_bwd)
ccg$group <- "C | CG"
gcg_fwd <- t[which(substr(t$seq, 2, 4) == "GCG"),c(2,3,4,5,8,9)]
gcg_bwd <- t[which(substr(t$seq, 3, 5) == "CGC"),c(2,3,4,5,10,11)]
colnames(gcg_bwd) <- colnames(gcg_fwd)
gcg <- rbind(gcg_fwd, gcg_bwd)
gcg$group <- "G | CG"
tcg_fwd <- t[which(substr(t$seq, 2, 4) == "TCG"),c(2,3,4,5,8,9)]
tcg_bwd <- t[which(substr(t$seq, 3, 5) == "CGA"),c(2,3,4,5,10,11)]
colnames(tcg_bwd) <- colnames(gcg_fwd)
tcg <- rbind(tcg_fwd, tcg_bwd)
tcg$group <- "T | CG"
t <- rbind(acg, ccg, gcg, tcg)

all10bars <- t
all10bars$subgroup <- round(all10bars$beta, digits = 1)
all10bars$group <- factor(all10bars$group, levels=c("C | CG", "G | CG", "A | CG", "T | CG"))

all10bars %>%
  group_by(group, subgroup) %>%
  summarise(ratio = sum(start_cg) / sum(start_cg_over)) %>%
  group_by(group) %>%
  summarise(min = min(ratio), max = max(ratio), ratio = max(ratio) / min(ratio))

plot2b_125 <- all10bars %>%
  group_by(group, subgroup) %>%
  summarise(ratio = sum(start_cg) / sum(start_cg_over)) %>%
  ggplot(aes(x=factor(subgroup), y=ratio, fill=subgroup)) +
  geom_bar(stat="identity", position = "dodge") +
  facet_wrap(~group, nrow=1) +
  theme_classic(base_size = 20) +
  #xlab("Methylation (beta-value)") +
  #ylab(expression(atop("Fraction of cfDNA fragments", paste("starting or ending at CGs")))) +
  ylab("Fraction of cfDNA fragments\nstarting or ending at CGs") + 
  scale_y_continuous(labels = scales::percent, limits = c(0,0.0067),
                     breaks = c(0.005,0.0025,0)) +
  scale_x_discrete(labels=c("0", "1"),
                   breaks = c(0,1)) +
  theme(text = element_text(size=16),
        axis.text.x = element_text(angle = 0, hjust=0.5),
        #axis.text.x = element_blank(),
        #axis.title.y = element_text(margin = margin(r = 5)),
        axis.title.x = element_blank(),
        strip.background=element_rect(color = NA),
        panel.spacing=unit(2, "lines"),
        strip.placement = "outside",
        legend.position = "none",
        axis.title.y = element_text(margin = margin(r = 20, l = 20))) +
  scale_fill_gradient(low="#77C8DD", high="#004766")


t <- readRDS("../data/Moss_125_ratios.rds")
t <- t[-which(t$chr %in% c("chrX", "chrY")),]

accg_fwd <- t[which(substr(t$seq, 1, 4) == "ACCG"),c(2,3,4,5,6,7)]
accg_bwd <- t[which(substr(t$seq, 3, 6) == "CGGT"),c(2,3,4,5,12,13)]
colnames(accg_bwd) <- colnames(accg_fwd)
accg <- rbind(accg_fwd, accg_bwd)
accg$group <- "A | CCG"
cccg_fwd <- t[which(substr(t$seq, 1, 4) == "CCCG"),c(2,3,4,5,6,7)]
cccg_bwd <- t[which(substr(t$seq, 3, 6) == "CGGG"),c(2,3,4,5,12,13)]
colnames(cccg_bwd) <- colnames(cccg_fwd)
cccg <- rbind(cccg_fwd, cccg_bwd)
cccg$group <- "C | CCG"
gccg_fwd <- t[which(substr(t$seq, 1, 4) == "GCCG"),c(2,3,4,5,6,7)]
gccg_bwd <- t[which(substr(t$seq, 3, 6) == "CGGC"),c(2,3,4,5,12,13)]
colnames(gccg_bwd) <- colnames(gccg_fwd)
gccg <- rbind(gccg_fwd, gccg_bwd)
gccg$group <- "G | CCG"
tccg_fwd <- t[which(substr(t$seq, 1, 4) == "TCCG"),c(2,3,4,5,6,7)]
tccg_bwd <- t[which(substr(t$seq, 3, 6) == "CGGA"),c(2,3,4,5,12,13)]
colnames(tccg_bwd) <- colnames(gccg_fwd)
tccg <- rbind(tccg_fwd, tccg_bwd)
tccg$group <- "T | CCG"
t <- rbind(accg, cccg, gccg, tccg)

all10bars <- t
all10bars$subgroup <- round(all10bars$beta, digits = 1)
all10bars$group <- factor(all10bars$group, levels=c("C | CCG", "G | CCG", "A | CCG", "T | CCG"))

all10bars %>%
  group_by(group, subgroup) %>%
  summarise(ratio = sum(start_ncg) / sum(start_ncg_over)) %>%
  group_by(group) %>%
  summarise(min = min(ratio), max = max(ratio), ratio = max(ratio) / min(ratio))

plot2c_125 <- all10bars %>%
  group_by(group, subgroup) %>%
  summarise(ratio = sum(start_ncg) / sum(start_ncg_over)) %>%
  ggplot(aes(x=factor(subgroup), y=ratio, fill=subgroup)) +
  geom_bar(stat="identity", position = "dodge") +
  facet_wrap(~group, nrow=1) +
  theme_classic(base_size = 20) +
  #xlab("Methylation (beta-value)") +
  #ylab(expression(atop("Fraction of cfDNA fragments", paste("starting or ending at CGs")))) +
  ylab("Fraction of cfDNA fragments\nstarting or ending at CGs") + 
  scale_y_continuous(labels = scales::percent, limits = c(0,0.008),
                     breaks = c(0.0075,0.005,0.0025,0)) +
  scale_x_discrete(labels=c("0", "1"),
                   breaks = c(0,1)) +
  theme(text = element_text(size=16),
        axis.text.x = element_text(angle = 0, hjust=0.5),
        #axis.text.x = element_blank(),
        #axis.title.y = element_text(margin = margin(r = 5)),
        axis.title.x = element_blank(),
        strip.background=element_rect(color = NA),
        panel.spacing=unit(2, "lines"),
        strip.placement = "outside",
        legend.position = "none",
        axis.title.y = element_text(margin = margin(r = 20, l = 20))) +
  scale_fill_gradient(low="#77C8DD", high="#004766")

title <- textGrob("window: +/- 125 bps (total: 251 bps)", gp = gpar(col = "black", fontsize = 19))
plot2bc_125 <- plot_grid(title, plot2b_125, plot2c_125, ncol = 1, align="v", axis="bt", rel_heights = c(1, 5, 5), labels = c("d", "", ""), label_size = 20)
#ggsave("../output/figure2bc_window125bp.jpg", plot = plot2bc_125, device = "jpeg",width = 20, height=12,units = "in", dpi=300)

在这里插入图片描述

Supplementary Figure 7: combining Supplementary Figure 7A, Supplementary Figure 7B, Supplementary Figure 7C, Supplementary Figure 7D

supplot4 <- plot_grid(plot2bc_50, plot2bc_75, plot2bc_100, plot2bc_125, ncol = 2, nrow=2, align="v", axis="bt", rel_heights = c(1, 1), rel_widths= c(1,1), label_size = 20)
ggsave("../output/Supplementary_Fig_7.jpg", plot = supplot4, device = "jpeg",width = 20, height=12,units = "in", dpi=300, scale =1.5)

在这里插入图片描述

Supplementary Figure 8

sum <- readRDS("../data/som_chrx.rds")
sum$group <- sub("open sea", "Open sea", sum$group)
sum$group <- sub("islands", "Islands", sum$group)
sum$group <- sub("shores", "Shores", sum$group)
sum$group <- sub("shelves", "Shelves", sum$group)
sum$group <- factor(sum$group, levels=c("Islands", "Shores", "Shelves", "Open sea"))
sum[which(nchar(sum$motif) == 4),]$motif <- sub("CCG", " | CCG", sum[which(nchar(sum$motif) == 4),]$motif)
sum[which(nchar(sum$motif) == 3),]$motif <- sub("CG", " | CG", sum[which(nchar(sum$motif) == 3),]$motif)
sum$motif <- factor(sum$motif, levels=c("C | CCG", "G | CCG", "A | CCG", "T | CCG", "C | CG", "G | CG", "A | CG", "T | CG"))
sum$sex <- NA
sum[which(sum$rel_chrx <= 0.04),]$sex <- "male"
sum[which(sum$rel_chrx >= 0.04),]$sex <- "female"
sum$sex <- factor(sum$sex, levels = c("male", "female"))

plot1_shores_chrx <- ggplot(data=sum[which((nchar(as.character(sum$motif)) == 7) & (sum$group == "Shores") & (sum$chromosome == "chrX")),], aes(x = sex, y=ratio, fill=sex)) +
  geom_jitter(size=0.4, alpha=1, aes(colour=sex)) +
  geom_boxplot(outlier.shape = NA, aes(alpha = 0.1)) +
  scale_y_continuous(labels = scales::percent,
                     limits = c(0,0.01),
                     breaks=c(0.01,0.0075,0.005,0.0025,0)) +
  ylab("Fraction of cfDNA fragments\nstarting or ending at CCGs") + 
  facet_grid(~motif) +
  theme_classic(base_size = 20) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
        text = element_text(size=16),
        #axis.title.y = element_blank(),
        axis.title.x = element_blank(),
        strip.background=element_rect(color = NA,  fill=NA),
        legend.position = "none",
        axis.title.y = element_text(margin = margin(r = 20, l = 20))
        ) +
  scale_fill_manual(values = c("#99CCFF", "#FF99BB")) +
  scale_color_manual(values = c("#99CCFF", "#FF99BB")) +
  stat_compare_means(method = "t.test", size = 3, aes(label = paste0("p = ", after_stat(p.format))))


plot2_shores_chrx <- ggplot(data=sum[which((nchar(as.character(sum$motif)) == 6) & (sum$group == "Shores") & (sum$chromosome == "chrX")),], aes(x = sex, y=ratio, fill=sex)) +
  geom_jitter(size=0.4, alpha=1, aes(colour=sex)) +
  geom_boxplot(outlier.shape = NA, aes(alpha = 0.1)) +
  scale_y_continuous(labels = scales::percent,
                     breaks = c(0.0125,0.01,0.0075,0.005,0.0025,0),
                     limits = c(0,0.0125)) +
  ylab("Fraction of cfDNA fragments\nstarting or ending at CGs") + 
  facet_grid(~motif) +
  theme_classic(base_size = 20) +
  theme(text = element_text(size=16),
        #axis.title.y = element_blank(),
        axis.title.x = element_blank(),
        axis.text.x.bottom = element_blank(),
        strip.background=element_rect(color = NA,  fill=NA),
        legend.position = "none",
        axis.title.y = element_text(margin = margin(r = 20, l = 20))
        ) +
  scale_fill_manual(values = c("#99CCFF", "#FF99BB")) +
  scale_color_manual(values = c("#99CCFF", "#FF99BB")) +
  stat_compare_means(method = "t.test", size = 3, aes(label = paste0("p = ", after_stat(p.format))))

plot1_shores_auto <- ggplot(data=sum[which((nchar(as.character(sum$motif)) == 7) & (sum$group == "Shores") & (sum$chromosome == "somatic")),], aes(x = sex, y=ratio, fill=sex)) +
  geom_jitter(size=0.4, alpha=1, aes(colour=sex)) +
  geom_boxplot(outlier.shape = NA, aes(alpha = 0.1)) +
  scale_y_continuous(labels = scales::percent,
                     limits = c(0,0.01),
                     breaks=c(0.01,0.0075,0.005,0.0025,0)) +
  #ylab("Fraction of cfDNA fragments\nstarting or ending at CCGs") + 
  facet_grid(~motif) +
  theme_classic(base_size = 20) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
        text = element_text(size=16),
        axis.title.y = element_blank(),
        axis.title.x = element_blank(),
        axis.text.y.left = element_blank(),
        strip.background=element_rect(color = NA,  fill=NA),
        legend.position = "none",
        #axis.title.y = element_text(margin = margin(r = 20, l = 20))
        ) +
  scale_fill_manual(values = c("#99CCFF", "#FF99BB")) +
  scale_color_manual(values = c("#99CCFF", "#FF99BB")) +
  stat_compare_means(method = "t.test", size = 3, aes(label = paste0("p = ", after_stat(p.format))))


plot2_shores_auto <- ggplot(data=sum[which((nchar(as.character(sum$motif)) == 6) & (sum$group == "Shores") & (sum$chromosome == "somatic")),], aes(x = sex, y=ratio, fill=sex)) +
  geom_jitter(size=0.4, alpha=1, aes(colour=sex)) +
  geom_boxplot(outlier.shape = NA, aes(alpha = 0.1)) +
  scale_y_continuous(labels = scales::percent,
                     breaks = c(0.0125,0.01,0.0075,0.005,0.0025,0),
                     limits = c(0,0.0125)) +
  #ylab("Fraction of chrX cfDNA fragments\nstarting or ending at CGs") + 
  facet_grid(~motif) +
  theme_classic(base_size = 20) +
  theme(text = element_text(size=16),
        axis.title.y = element_blank(),
        axis.title.x = element_blank(),
        axis.text.x.bottom = element_blank(),
        axis.text.y.left = element_blank(),
        strip.background=element_rect(color = NA,  fill=NA),
        legend.position = "none",
        #axis.title.y = element_text(margin = margin(r = 20, l = 20))
        ) +
  scale_fill_manual(values = c("#99CCFF", "#FF99BB")) +
  scale_color_manual(values = c("#99CCFF", "#FF99BB")) +
  stat_compare_means(method = "t.test", size = 3, aes(label = paste0("p = ", after_stat(p.format))))

plot1_shelves_chrx <- ggplot(data=sum[which((nchar(as.character(sum$motif)) == 7) & (sum$group == "Shelves") & (sum$chromosome == "chrX")) ,], aes(x = sex, y=ratio, fill=sex)) +
  geom_jitter(size=0.4, alpha=1, aes(colour=sex)) +
  geom_boxplot(outlier.shape = NA, aes(alpha = 0.1)) +
  scale_y_continuous(labels = scales::percent,
                     limits = c(0,0.01),
                     breaks=c(0.01,0.0075,0.005,0.0025,0)) +
  #ylab("Fraction of chrX cfDNA fragments\nstarting or ending at CCGs") + 
  facet_grid(~motif) +
  theme_classic(base_size = 20) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
        text = element_text(size=16),
        axis.title.y = element_blank(),
        axis.title.x = element_blank(),
        strip.background=element_rect(color = NA,  fill=NA),
        legend.position = "none",
        #axis.title.y = element_text(margin = margin(r = 20, l = 20))
        ) +
  scale_fill_manual(values = c("#99CCFF", "#FF99BB")) +
  scale_color_manual(values = c("#99CCFF", "#FF99BB")) +
  stat_compare_means(method = "t.test", size = 3, aes(label = paste0("p = ", after_stat(p.format))))

plot2_shelves_chrx <- ggplot(data=sum[which((nchar(as.character(sum$motif)) == 6) & (sum$group == "Shelves") & (sum$chromosome == "chrX")),], aes(x = sex, y=ratio, fill=sex)) +
  geom_jitter(size=0.4, alpha=1, aes(colour=sex)) +
  geom_boxplot(outlier.shape = NA, aes(alpha = 0.1)) +
  scale_y_continuous(labels = scales::percent,
                     breaks = c(0.0125,0.01,0.0075,0.005,0.0025,0),
                     limits = c(0,0.0125)) +
  #ylab("Fraction of chrX cfDNA fragments\nstarting or ending at CGs") + 
  facet_grid(~motif) +
  theme_classic(base_size = 20) +
  theme(text = element_text(size=16),
        axis.title.y = element_blank(),
        axis.title.x = element_blank(),
        axis.text.x.bottom = element_blank(),
        strip.background=element_rect(color = NA,  fill=NA),
        legend.position = "none",
        #axis.title.y = element_text(margin = margin(r = 20, l = 20))
        ) +
  scale_fill_manual(values = c("#99CCFF", "#FF99BB")) +
  scale_color_manual(values = c("#99CCFF", "#FF99BB")) +
  stat_compare_means(method = "t.test", size = 3, aes(label = paste0("p = ", after_stat(p.format))))

plot1_shelves_auto <- ggplot(data=sum[which((nchar(as.character(sum$motif)) == 7) & (sum$group == "Shelves") & (sum$chromosome == "somatic")) ,], aes(x = sex, y=ratio, fill=sex)) +
  geom_jitter(size=0.4, alpha=1, aes(colour=sex)) +
  geom_boxplot(outlier.shape = NA, aes(alpha = 0.1)) +
  scale_y_continuous(labels = scales::percent,
                     limits = c(0,0.01),
                     breaks=c(0.01,0.0075,0.005,0.0025,0)) +
  #ylab("Fraction of chrX cfDNA fragments\nstarting or ending at CCGs") + 
  facet_grid(~motif) +
  theme_classic(base_size = 20) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
        text = element_text(size=16),
        axis.title.y = element_blank(),
        axis.title.x = element_blank(),
        strip.background=element_rect(color = NA,  fill=NA),
        legend.position = "none",
        axis.text.y.left = element_blank(),
        #axis.title.y = element_text(margin = margin(r = 20, l = 20))
        ) +
  scale_fill_manual(values = c("#99CCFF", "#FF99BB")) +
  scale_color_manual(values = c("#99CCFF", "#FF99BB")) +
  stat_compare_means(method = "t.test", size = 3, aes(label = paste0("p = ", after_stat(p.format))))

plot2_shelves_auto <- ggplot(data=sum[which((nchar(as.character(sum$motif)) == 6) & (sum$group == "Shelves") & (sum$chromosome == "somatic")),], aes(x = sex, y=ratio, fill=sex)) +
  geom_jitter(size=0.4, alpha=1, aes(colour=sex)) +
  geom_boxplot(outlier.shape = NA, aes(alpha = 0.1)) +
  scale_y_continuous(labels = scales::percent,
                     breaks = c(0.0125,0.01,0.0075,0.005,0.0025,0),
                     limits = c(0,0.0125)) +
  #ylab("Fraction of chrX cfDNA fragments\nstarting or ending at CGs") + 
  facet_grid(~motif) +
  theme_classic(base_size = 20) +
  theme(text = element_text(size=16),
        axis.title.y = element_blank(),
        axis.title.x = element_blank(),
        axis.text.x.bottom = element_blank(),
        strip.background=element_rect(color = NA,  fill=NA),
        legend.position = "none",
        axis.text.y.left = element_blank(),
        #axis.title.y = element_text(margin = margin(r = 20, l = 20))
        ) +
  scale_fill_manual(values = c("#99CCFF", "#FF99BB")) +
  scale_color_manual(values = c("#99CCFF", "#FF99BB")) +
  stat_compare_means(method = "t.test", size = 3, aes(label = paste0("p = ", after_stat(p.format))))

plot1_inter_chrx <- ggplot(data=sum[which((nchar(as.character(sum$motif)) == 7) & (sum$group == "Open sea") & (sum$chromosome == "chrX")) ,], aes(x = sex, y=ratio, fill=sex)) +
  geom_jitter(size=0.4, alpha=1, aes(colour=sex)) +
  geom_boxplot(outlier.shape = NA, aes(alpha = 0.1)) +
  scale_y_continuous(labels = scales::percent,
                     limits = c(0,0.01),
                     breaks=c(0.01,0.0075,0.005,0.0025,0)) +
  #ylab("Fraction of chrX cfDNA fragments\nstarting or ending at CCGs") + 
  facet_grid(~motif) +
  theme_classic(base_size = 20) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
        text = element_text(size=16),
        axis.title.y = element_blank(),
        axis.title.x = element_blank(),
        strip.background=element_rect(color = NA,  fill=NA),
        legend.position = "none",
        #axis.title.y = element_text(margin = margin(r = 20, l = 20))
        ) +
  scale_fill_manual(values = c("#99CCFF", "#FF99BB")) +
  scale_color_manual(values = c("#99CCFF", "#FF99BB")) +
  stat_compare_means(method = "t.test", size = 3, aes(label = paste0("p = ", after_stat(p.format))))

plot2_inter_chrx <- ggplot(data=sum[which((nchar(as.character(sum$motif)) == 6) & (sum$group == "Open sea") & (sum$chromosome == "chrX")),], aes(x = sex, y=ratio, fill=sex)) +
  geom_jitter(size=0.4, alpha=1, aes(colour=sex)) +
  geom_boxplot(outlier.shape = NA, aes(alpha = 0.1)) +
  scale_y_continuous(labels = scales::percent,
                     breaks = c(0.0125,0.01,0.0075,0.005,0.0025,0),
                     limits = c(0,0.0125)) +
  #ylab("Fraction of chrX cfDNA fragments\nstarting or ending at CGs") + 
  facet_grid(~motif) +
  theme_classic(base_size = 20) +
  theme(text = element_text(size=16),
        axis.title.y = element_blank(),
        axis.title.x = element_blank(),
        axis.text.x.bottom = element_blank(),
        strip.background=element_rect(color = NA,  fill=NA),
        legend.position = "none",
        #axis.title.y = element_text(margin = margin(r = 20, l = 20))
        ) +
  scale_fill_manual(values = c("#99CCFF", "#FF99BB")) +
  scale_color_manual(values = c("#99CCFF", "#FF99BB")) +
  stat_compare_means(method = "t.test", size = 3, aes(label = paste0("p = ", after_stat(p.format))))

plot1_inter_auto <- ggplot(data=sum[which((nchar(as.character(sum$motif)) == 7) & (sum$group == "Open sea") & (sum$chromosome == "somatic")) ,], aes(x = sex, y=ratio, fill=sex)) +
  geom_jitter(size=0.4, alpha=1, aes(colour=sex)) +
  geom_boxplot(outlier.shape = NA, aes(alpha = 0.1)) +
  scale_y_continuous(labels = scales::percent,
                     limits = c(0,0.01),
                     breaks=c(0.01,0.0075,0.005,0.0025,0)) +
  #ylab("Fraction of chrX cfDNA fragments\nstarting or ending at CCGs") + 
  facet_grid(~motif) +
  theme_classic(base_size = 20) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
        text = element_text(size=16),
        axis.title.y = element_blank(),
        axis.title.x = element_blank(),
        strip.background=element_rect(color = NA,  fill=NA),
        axis.text.y.left = element_blank(),
        legend.position = "none",
        #axis.title.y = element_text(margin = margin(r = 20, l = 20))
        ) +
  scale_fill_manual(values = c("#99CCFF", "#FF99BB")) +
  scale_color_manual(values = c("#99CCFF", "#FF99BB")) +
  stat_compare_means(method = "t.test", size = 3, aes(label = paste0("p = ", after_stat(p.format))))

plot2_inter_auto <- ggplot(data=sum[which((nchar(as.character(sum$motif)) == 6) & (sum$group == "Open sea") & (sum$chromosome == "somatic")),], aes(x = sex, y=ratio, fill=sex)) +
  geom_jitter(size=0.4, alpha=1, aes(colour=sex)) +
  geom_boxplot(outlier.shape = NA, aes(alpha = 0.1)) +
  scale_y_continuous(labels = scales::percent,
                     breaks = c(0.0125,0.01,0.0075,0.005,0.0025,0),
                     limits = c(0,0.0125)) +
  #ylab("Fraction of chrX cfDNA fragments\nstarting or ending at CGs") + 
  facet_grid(~motif) +
  theme_classic(base_size = 20) +
  theme(text = element_text(size=16),
        axis.title.y = element_blank(),
        axis.title.x = element_blank(),
        axis.text.x.bottom = element_blank(),
        strip.background=element_rect(color = NA,  fill=NA),
        axis.text.y.left = element_blank(),
        legend.position = "none",
        #axis.title.y = element_text(margin = margin(r = 20, l = 20))
        ) +
  scale_fill_manual(values = c("#99CCFF", "#FF99BB")) +
  scale_color_manual(values = c("#99CCFF", "#FF99BB")) +
  stat_compare_means(method = "t.test", size = 3, aes(label = paste0("p = ", after_stat(p.format))))

title_chrx_left <- ggdraw() + 
  draw_label(
    "Chromosome X",
    size = 13,
    hjust = -0.15,
    vjust = 2
  )
title_chrx <- ggdraw() + 
  draw_label(
    "Chromosome X",
    size = 13,
    hjust = 0.25,
    vjust = 2
  )
title_auto <- ggdraw() + 
  draw_label(
    "Autosomes",
    size = 13,
    hjust = 0.4,
    vjust = 2
  ) 
plot_shores_chrx <- cowplot::plot_grid(title_chrx_left, plot2_shores_chrx, plot1_shores_chrx, ncol=1, align="hv", axis = "bt", rel_heights = c(0.1, 1.1, 1.1)) + theme(plot.background = element_rect(fill = "white", colour = NA))
plot_shores_auto <- cowplot::plot_grid(title_auto, plot2_shores_auto, plot1_shores_auto, ncol=1, align="hv", axis = "bt", rel_heights = c(0.1, 1.1, 1.1)) + theme(plot.background = element_rect(fill = "white", colour = NA))
plot_shores <- cowplot::plot_grid(plot_shores_chrx, plot_shores_auto, ncol=2, align="hv", axis = "bt", rel_widths = c(1.2,0.85)) + theme(plot.background = element_rect(fill = "white", colour = NA))
title_shores <- ggdraw() + 
  draw_label(
    "Shores",
    size = 16,
    hjust = -0.7,
    vjust = 0
  ) 
plot_shores <- cowplot::plot_grid(title_shores, plot_shores, nrow=2, rel_heights = c(0.05,1)) + theme(plot.background = element_rect(fill = "white", colour = NA))

plot_shelves_chrx <- cowplot::plot_grid(title_chrx, plot2_shelves_chrx, plot1_shelves_chrx, ncol=1, align="hv", axis = "bt", rel_heights = c(0.1, 1.1, 1.1)) + theme(plot.background = element_rect(fill = "white", colour = NA))
plot_shelves_auto <- cowplot::plot_grid(title_auto, plot2_shelves_auto, plot1_shelves_auto, ncol=1, align="hv", axis = "bt", rel_heights = c(0.1, 1.1, 1.1)) + theme(plot.background = element_rect(fill = "white", colour = NA))
plot_shelves <- cowplot::plot_grid(plot_shelves_chrx, plot_shelves_auto, ncol=2, align="hv", axis = "bt", rel_widths = c(1.2,1)) + theme(plot.background = element_rect(fill = "white", colour = NA))
title_shelves <- ggdraw() + 
  draw_label(
    "Shelves",
    size = 16,
    hjust = 0,
    vjust = 0
  ) 
plot_shelves <- cowplot::plot_grid(title_shelves, plot_shelves, nrow=2, rel_heights = c(0.05,1)) + theme(plot.background = element_rect(fill = "white", colour = NA))


plot_inter_chrx <- cowplot::plot_grid(title_chrx, plot2_inter_chrx, plot1_inter_chrx, ncol=1, align="hv", axis = "bt", rel_heights = c(0.1, 1.1, 1.1)) + theme(plot.background = element_rect(fill = "white", colour = NA))
plot_inter_auto <- cowplot::plot_grid(title_auto, plot2_inter_auto, plot1_inter_auto, ncol=1, align="hv", axis = "bt", rel_heights = c(0.1, 1.1, 1.1)) + theme(plot.background = element_rect(fill = "white", colour = NA))
plot_inter <- cowplot::plot_grid(plot_inter_chrx, plot_inter_auto, ncol=2, align="hv", axis = "bt", rel_widths = c(1.2,1)) + theme(plot.background = element_rect(fill = "white", colour = NA))
title_inter <- ggdraw() + 
  draw_label(
    "Open sea",
    size = 16,
    hjust = 0,
    vjust = 0
  ) 
plot_inter <- cowplot::plot_grid(title_inter, plot_inter, nrow=2, rel_heights = c(0.05,1)) + theme(plot.background = element_rect(fill = "white", colour = NA))

plot <-  cowplot::plot_grid(plot_shores, plot_shelves, plot_inter, nrow=1, align="hv", rel_widths  = c(1.2, 1.06, 1.06)) + theme(plot.background = element_rect(fill = "white", colour = NA))

ggsave(paste("../output/Supplementary_Fig_8.jpg"), plot = plot, device = "jpeg",width = 25, height=12,units = "in", dpi=300)

在这里插入图片描述

Supplementary Figure 8

tab <- readRDS(here('data/tss_cpg_connect_filled.rds')) %>%
  column_to_rownames('trans') %>%
  mutate(Coverage = log2(cov + 1)) %>%
  mutate(RNA=log2(rna + 1)) %>%
  mutate(WPS=wps) %>%
  mutate(Beta=beta) %>%
  mutate(Size=size) %>%
  dplyr::filter(!is.na(WPS) & !is.na(Beta) & !is.na(RNA) & Size > 0) %>%
  mutate(Methylated=Beta > 0.5) %>%
  mutate(WPS=wps)

ggplot(tab, aes(x=Coverage, y=RNA)) +
  geom_point(size=0.5, color='grey') +
  geom_density2d(color='black') +
  theme_classic(base_size=20) +
  xlab("Mean Coverage (Log2 CPM)") +
  ylab("Mean Expression (Log2 CPM)")

ggplot(tab, aes(y=WPS, x=Coverage)) +
  geom_point(size=0.5, color='grey') +
  geom_density2d(color='black') +
  theme_classic(base_size=20) +
  ylab("Mean Positioning Score") +
  xlab("Mean Coverage (Log2 CPM)")

breaks <- c(0.6, .8, 1, 1.5, 3, 6, 9)
ggplot(tab, aes(y=Beta, x=cov)) +
  geom_point(size=0.5, color='grey') +
  geom_density2d(color='black') +
  theme_classic(base_size=20) +
  ylab("Mean Beta Value") +
  xlab("Mean Coverage (CPM)") +
  xlim(0, 2000)

请添加图片描述
请添加图片描述

Supplementary Figure 10

t <- readRDS("../data/cpg_500000_topbot1000.rds")
t$beta <- sub("cpg_highmet", "High methylation at CpG-islands", t$beta)
t$beta <- sub("cpg_lowmet", "Low methylation at CpG-islands", t$beta)
t$beta <- factor(t$beta, levels = c("Low methylation at CpG-islands", "High methylation at CpG-islands"))
cpg_cov <- ggplot(t, aes(x=relpos_500,y=cov)) +
  geom_line(aes(group = beta, color = beta), alpha = 1) +
  #geom_smooth(method = "loess", se = FALSE, span = 0.001, aes(group= beta, color = beta)) +
  scale_x_continuous(breaks = c(1,251,501), labels=format(c(-500000, 0, 500000), scientific = FALSE)) +
  ggtitle("Coverage around CpG-islands") +
  theme_classic(base_size = 20) +
  theme(text = element_text(size=16),
        axis.title.x = element_text(margin = margin(t = 20, b = 0)),
        legend.position = c(0.75,0.25),
        legend.title = element_blank(),
        axis.title.y = element_text(margin = margin(r = 20, l = 0))) +
  xlab("") +
  ylab("Cumulative coverage") +
  ylim(400, 1000) +
  scale_color_manual(values= c("#004766", "#77C8DD"))

cpg_size <- ggplot(t, aes(x=relpos_500,y=size)) +
  geom_line(aes(group = beta, color = beta), alpha = 1) +
  #geom_smooth(method = "loess", se = FALSE, span = 0.5, aes(group=beta, color = beta)) +
  scale_x_continuous(breaks = c(1,251,501), labels=format(c(-500000, 0, 500000), scientific = FALSE)) +
  ggtitle("cfDNA fragment size around CpG-islands") +
  theme_classic(base_size = 20) +
  theme(text = element_text(size=16),
        axis.title.x = element_text(margin = margin(t = 20, b = 0)),
        legend.position = c(0.75,0.25),
        legend.title = element_blank(),
        axis.title.y = element_text(margin = margin(r = 20, l = 0))) +
  ylim(166, 171) +
  xlab("") +
  ylab("Average cfDNA-size")  +
  scale_color_manual(values= c("#004766", "#77C8DD"))

t <- readRDS("../data/tss_500000_topbot1000.rds")
t$group <- sub("expressed", "High expression", t$group)
t$group <- sub("not High expression", "Low expression", t$group)
tss_cov <- ggplot(t, aes(x=relpos_500,y=cov)) +
  geom_line(aes(group = group, color = group), alpha = 1) +
  #geom_smooth(method = "loess", se = FALSE, span = 0.5, aes(group=group, color = group)) +
  scale_x_continuous(breaks = c(1,251,501), labels=format(c(-500000, 0, 500000), scientific = FALSE)) +
  ggtitle("Coverage around transcription start sites") +
  theme_classic(base_size = 20) +
  theme(text = element_text(size=16),
        axis.title.x = element_text(margin = margin(t = 20, b = 0)),
        legend.position = c(0.75,0.25),
        legend.title = element_blank(),
        axis.title.y = element_text(margin = margin(r = 20, l = 0))) +
  xlab("") +
  ylab("") +
  ylim(400, 1000) +
  scale_color_manual(values= c("#52c922","#1b7d0e"))

tss_size <- ggplot(t, aes(x=relpos_500,y=size)) +
  geom_line(aes(group = group, color = group), alpha=1) +
  #geom_smooth(method = "loess", se = FALSE, span = 0.5, aes(group=group, color = group)) +
  scale_x_continuous(breaks = c(1,251,501), labels=format(c(-500000, 0, 500000), scientific = FALSE)) +
  ggtitle("cfDNA fragment size around transcription start sites") +
  theme_classic(base_size = 20) +
  theme(text = element_text(size=16),
        axis.title.x = element_text(margin = margin(t = 20, b = 0)),
        legend.position = c(0.75,0.25),
        legend.title = element_blank(),
        axis.title.y = element_text(margin = margin(r = 20, l = 0))) +
  ylim(166, 171) +
  xlab("") +
  ylab("") +
  scale_color_manual(values= c("#52c922","#1b7d0e"))


t <- readRDS("../data/cpg_500000_topbot1000_beta2.rds")
t$beta_group <- sub("cpg_highmet", "High methylation at CpG-islands", t$beta_group)
t$beta_group <- sub("cpg_lowmet", "Low methylation at CpG-islands", t$beta_group)
t$beta_group <- factor(t$beta_group, levels = c("Low methylation at CpG-islands", "High methylation at CpG-islands"))
t[which(t$relpos_500 == 0),]$mean_beta <- NA
t <- t %>%
  group_by(beta_group, relpos_500) %>%
  summarise(beta = mean(mean_beta))
cpg_beta <- ggplot(t, aes(x=relpos_500,y=beta)) +
  geom_line(aes(group = beta_group, color = beta_group), alpha = 0.4) +
  geom_smooth(method = "loess", se = FALSE, span = 0.2, aes(group= beta_group, color = beta_group)) +
  scale_x_continuous(breaks = c(1,251,501), labels=format(c(-500000, 0, 500000), scientific = FALSE)) +
  coord_cartesian(ylim=c(0.75, 0.9)) +
  ggtitle("Methylation around CpG-islands") +
  theme_classic(base_size = 20) +
  theme(text = element_text(size=16),
        axis.title.x = element_text(margin = margin(t = 20, b = 0)),
        legend.position = c(0.75,0.25),
        legend.title = element_blank(),
        axis.title.y = element_text(margin = margin(r = 20, l = 0))) +
  xlab("Position relative to CpG-island") +
  ylab("Average beta-value") +
  #ylim(400, 1000) +
  scale_color_manual(values= c("#004766", "#77C8DD"))

t <- readRDS("../data/tss_500000_topbot1000_beta.rds")
t$exp_group <- sub("tss_highexp", "High expression", t$exp_group)
t$exp_group <- sub("tss_lowexp", "Low expression", t$exp_group)
t$exp_group <- factor(t$exp_group, levels = c("High expression", "Low expression"))
t <- t %>%
  group_by(exp_group, relpos_500) %>%
  summarise(beta = mean(mean_beta))
tss_beta <- ggplot(t, aes(x=relpos_500,y=beta)) +
  #geom_line()
  geom_line(aes(group = exp_group, color = exp_group), alpha = 1) +
  #geom_smooth(method = "loess", se = FALSE, span = 0.5, aes(group= beta, color = beta)) +
  scale_x_continuous(breaks = c(1,251,501), labels=format(c(-500000, 0, 500000), scientific = FALSE)) +
  ggtitle("Methylation around transcription start sites") +
  theme_classic(base_size = 20) +
  theme(text = element_text(size=16),
        axis.title.x = element_text(margin = margin(t = 20, b = 0)),
        legend.position = c(0.75,0.25),
        legend.title = element_blank(),
        axis.title.y = element_text(margin = margin(r = 20, l = 0))) +
  xlab("Position relative to TSS") +
  ylab("Average beta-value") +
  #ylim(400, 1000) +
  scale_color_manual(values= c("#52c922","#1b7d0e"))

up <- plot_grid(cpg_cov, tss_cov, nrow = 1, rel_heights = c(1,1), align = "v", labels = c('a', 'b'))
down <- plot_grid(cpg_size, tss_size, nrow = 1, rel_heights = c(1,1), align = "v", labels = c('c', 'd'))
downer <- plot_grid(cpg_beta, NULL, nrow = 1, rel_heights = c(1,1), align = "v", labels = c('e', ''))
all <- plot_grid(up, down, downer, ncol=1, rel_heights = c(1,1,1), align="h")
ggsave("../output/Supplementary_Fig_10.jpg", plot = all, device = "jpg",width = 180, height=108,units = "mm", dpi=600, bg = "white", scale = 3)

在这里插入图片描述

Supplementary Figure 10

Required Libraries

library(tidyverse)
library(fgsea)
library(ggplot2)
library(ggpubr)
library(data.table)
library(ComplexHeatmap)
library(here)

Custom functions read in here. See .Rmd for details.

# Make custom GSEA plot
custom_enrichment <- function(ranks, genes, ylims, title=''){
    library(ggplot2)
    prep_df <- function(peaks, ids){
        tar_ranges <- 2:(length(peaks$tops) - 1)
        x <- rep(ids, each=2)
        y <- rbind(peaks$bots[tar_ranges], peaks$top[tar_ranges]) %>%
            c()
        data.frame(x=c(0, length(ranks), x), y=c(0, 0, y))
    }
    
    # Prep data
    ranks <- sort(ranks, decreasing=T)
    ids <- genes %>%
        intersect(names(ranks)) %>%
        match(names(ranks)) %>%
        sort()
    peaks <- custom_get_peaks(ranks, ids)
    df <- prep_df(peaks, ids)

    # Generate plot with real data
    p <- ggplot(df, aes(x=x, y=y)) + 
        geom_line(color="dark blue") + 
        geom_hline(yintercept=max(df$y), colour="red", linetype="dashed") + 
        geom_hline(yintercept=min(df$y), colour="red", linetype="dashed") + 
        geom_hline(yintercept=0, colour="black") + 
        theme_classic(base_size=20) +
        geom_segment(data=df[-1:-2, ], mapping=aes(x=x, y=ylims[1]+.01, xend=x, yend=ylims[1]+0.05), size=0.2) +
        theme(plot.title = element_text(size=15)) +
        labs(x="Gene Rank", y="Enrichment Score", title=title) + 
        ylim(ylims[1], ylims[2])

    # Add lines for random permutations
    for(i in 1:100){
        new_ids <- sample(1:length(ranks), length(ids)) %>%
            sort()
        peaks <- custom_get_peaks(ranks, new_ids)
        df <- prep_df(peaks, ids)
        p$layers <- c(geom_line(data=df, color='light grey', size=.1), p$layers)
    }
    p
}

custom_calc_gsea_stats <- function(ranks, ids, nperm=0){
    # Get peaks and leading edge genes
    peaks <- custom_get_peaks(ranks, ids)
    if(peaks$stat > 0){
        mid <- which(peaks$tops == peaks$stat) %>%
            na.omit() %>%
            head(1)
        le <- names(peaks$tops)[1:mid] %>%
            na.omit()
    } else {
        mid <- which(peaks$bots == peaks$stat) %>%
            na.omit() %>%
            tail(1)
        le <- names(peaks$bots)[length(peaks$bots):mid] %>%
            na.omit()
    }
    outs <- list(Leading.Edge=le, Enrichment.Score=peaks$stat)

    # Calculate pval if nperm defined
    if(nperm != 0){
        stats <- sapply(1:nperm, function(x){
            new_ids <- sample(1:length(ranks), length(ids)) %>%
                sort()
            custom_get_peaks(ranks, new_ids)$stat
        })
        n_greater <- sum(abs(stats) > abs(peaks$stat))
        if(n_greater == 0) n_greater <- 1
        outs$P <- n_greater / nperm
    }
    outs
}

custom_get_peaks <- function(ranks, ids){
    # Sort ids
    ids <- sort(ids)

    # Get correct step size for enrichment scores
    step_up <- 1 / length(ids)
    step_dn <- 1 / (length(ranks))

    # Calculate enrichment scores before and after each hit
    tops <- 0
    bots <- 0
    prev_id <- 1
    for(id in ids){
        bots <- c(bots, tail(tops, 1) + step_dn * (prev_id - id))
        tops <- c(tops, tail(bots, 1) + step_up)
        prev_id <- id
    }
    tops <- c(tops, 0)
    bots <- c(bots, 0)
    
    # Calc stat as the min/max of bot/top
    names(tops) <- names(bots) <- c(NA, names(ranks)[ids], NA)
    stat <- ifelse(abs(min(bots)) > max(tops), min(bots), max(tops))

    # Return
    list(tops=tops, bots=bots, stat=stat)
}

custom_gsea <- function(ranks, sets){
    # Prep to run
    df <- c()
    ranks <- sort(ranks, decreasing=T)

    # Step through each set
    for(set in names(sets)){
        # Get index of genes in pathway
        ids <- sets[[set]] %>%
            intersect(names(ranks)) %>%
            match(names(ranks)) %>%
            sort()

        # Calc relevant GSEA info and add to data frame
        gsea <- custom_calc_gsea_stats(ranks, ids, 1e4)
        df <- rbind(df, tibble(Gene.Set=set, P.val=gsea$P, 
            Enrichment.Score=gsea$Enrichment.Score, 
            Leading.Edge=list(gsea$Leading.Edge)))
    }

    # Finalize analyses and return
    df$P.adj <- p.adjust(df$P.val, 'BH')
    df
}

# Make GSEA plots for all ranks for given pathways
compare_gsea_plots <- function(path, sets, all_ranks, ylims){
    plots <- lapply(1:length(all_ranks), function(i){
        custom_enrichment(all_ranks[[i]], sets[[path]], ylims,
            paste(names(all_ranks)[i]))
    })
    title <- gsub('_', ' ', path, fixed=T) %>%
        str_to_title()
    ggarrange(plotlist=plots, ncol=2, nrow=2) %>%
        annotate_figure(top=text_grob(title, size=20)) %>%
        print()
    return()
}

Generate GSEA Enrichment plots

# Read in analysis data
# GSEA results for each modality
comp_table <- read.table(here('data/giraffe/gsea_table.csv'), sep=',', header=T)

# Gene ranks calculated by modality
all_ranks <- readRDS(here('data/giraffe/ranks.RDS'))

# Read in gene sets
sets <- c(
    gmtPathways(here('data/giraffe/gene_sets/c2.cp.kegg.v7.5.1.symbols.gmt')),
    gmtPathways(here('data/giraffe/gene_sets/h.all.v7.5.1.symbols.gmt'))
)
len <- sapply(sets, length)
sets <- sets[len > 150]

# Plot target gene set
comp_table <- arrange(comp_table, CpG.Coverage.P.val)
i <- 1
ymax <- max(comp_table[i, c(3, 6, 9, 12)])
ymin <- min(comp_table[i, c(3, 6, 9, 12)])
compare_gsea_plots(comp_table$Gene.Set[i], sets, all_ranks, c(ymin-.1, ymax))

请添加图片描述

Supplementary Figure 12A

t <- readRDS("../data/tss_cpg_connect_filled.rds")
t <- t[complete.cases(t),]
t$group <- "NA"
t$group2 <- "NA"
t_1000exp <- t[order(-t$rna),][c(1:1000),]
t_1000exp$group <- "High\nexpression"
t_1000exp$group2 <- "All genes"
t_1000unexp <- t[order(t$rna),][c(1:1000),]
t_1000unexp$group <- "Low\nexpression"
t_1000unexp$group2 <- "All genes"
t_1000exp_meth <- t[which(t$beta >= 0.7),][order(-t[which(t$beta >= 0.7),]$rna),][c(1:1000),]
t_1000exp_meth$group <- "High\nexpression"
t_1000exp_meth$group2 <- "Methylated genes"
t_1000unexp_meth <- t[which(t$beta >= 0.7),][order(t[which(t$beta >= 0.7),]$rna),][c(1:1000),]
t_1000unexp_meth$group <- "Low\nexpression"
t_1000unexp_meth$group2 <- "Methylated genes"
t_1000exp_unmeth <- t[which(t$beta <= 0.3),][order(-t[which(t$beta <= 0.3),]$rna),][c(1:1000),]
t_1000exp_unmeth$group <- "High\nexpression"
t_1000exp_unmeth$group2 <- "Unmethylated genes"
t_1000unexp_unmeth <- t[which(t$beta <= 0.3),][order(t[which(t$beta <= 0.3),]$rna),][c(1:1000),]
t_1000unexp_unmeth$group <- "Low\nexpression"
t_1000unexp_unmeth$group2 <- "Unmethylated genes"

t_cov <- rbind(t_1000exp, t_1000unexp,
               t_1000exp_meth, t_1000unexp_meth,
               t_1000exp_unmeth, t_1000unexp_unmeth)
t_cov$group2 <- factor(t_cov$group2, levels = c("All genes", "Unmethylated genes", "Methylated genes"))

alg <- ggplot(t_cov, aes(y=cov, x=group, fill=group)) +
  geom_jitter(size=0.4, alpha=1, width = 0.1, aes(colour=group)) +
  geom_boxplot(outlier.shape = NA, width = 0.2, aes(alpha = 0.1)) +
  facet_wrap(.~group2,scales='free') +
  theme_classic(base_size = 20) +
  theme(axis.text.x = element_text(),
        text = element_text(size=16),
        axis.title.x = element_text(margin = margin(r = 20, l = 20)),
        strip.background=element_rect(color = NA,  fill=NA),
        legend.position = "none",
        axis.title.y = element_text(margin = margin(r = 20, l = 20)),
        plot.title = element_text(hjust = 0.5,  size=16)) +
  scale_fill_manual(values = c("#77dd77", "#00660a")) +
  scale_color_manual(values = c("#77dd77", "#00660a")) +
  scale_y_continuous(limits = c(0,2000)) +
  xlab("") +
  ylab(paste0("cumulative cfDNA coverage\naround transcription start sites"))

Supplementary Figure 12B

he <- readRDS("../data/Moss_ratios.rds")
hegr <- GRanges(paste0(he$chr, ":", he$pos))
hegr$beta <- he$beta
hegr$ratio <- (he$start_cg + he$end_cg) / (he$start_cg_over + he$end_cg_over)
hegr <- hegr[which(seqnames(hegr) %in% paste0("chr", c(1:22)))]
hegr <- hegr[order(hegr$beta)]
hegr$beta_group <- c(1:length(hegr)) / length(hegr)
hegr$beta_group <- round((hegr$beta_group + 0.05) * 10)


tss <- read.table(file = "../data/transcriptAnno-GRCh37.75.tsv")
tss <- tss[which(tss$V2 %in% c(1:22)),]
tss$V2 <- paste0("chr", tss$V2)
tissue_key <- read.table("../data/tissue_key.tsv",as.is=T,header=T,sep="\t",quote="\"")
conv <- read.table('../data/labels.txt',as.is=T,header=T,sep="\t",quote="\"")
rna <- read.table('../data/RNAtable.tsv.gz',as.is=T,header=T)
express <- conv[which(conv$Category %in% c("Myeloid")),]$RName
express_df <- rna[c("GeneID", express)]
express_df$mean <- rowMeans(express_df[,c(2:(length(express) + 1))], na.rm=T)
express_df <- express_df[order(-express_df$mean),]
tssgr <- GRanges(paste0(tss$V2, ":", tss$V3, "-", tss$V4, ":", tss$V5))
tssgr$ensembl <- tss$V1
tssgr$rna <- 0
tssgr$rna <- express_df[match(tssgr$ensembl, express_df$GeneID),]$mean

hegr$rna <- NA
hegr$rna <- tssgr[nearest(hegr, tssgr)]$rna
hegr <- hegr[order(hegr$rna)]
hegr$rna_group <- c(1:length(hegr)) / length(hegr)
hegr$rna_group <- round((hegr$rna_group + 0.05) * 10)

he <- as_tibble(as.data.frame(hegr))
he <- he[complete.cases(he),]

he_meth <- he %>%
  group_by(beta_group) %>%
  summarise(ratio = mean(ratio), beta = mean(beta), rna = mean(rna))
ggmeth_ratio <- ggplot(he_meth, aes(x=beta_group, y=ratio)) +
  geom_bar(stat="identity", aes(fill=beta_group)) +
  theme_classic(base_size = 20) +
  theme(text = element_text(size=16),
        axis.text.x = element_blank(),
        axis.ticks = element_blank(),
        axis.title.x = element_blank(),
        strip.background=element_rect(color = NA,  fill=NA),
        legend.position = "none",
        panel.spacing = unit(2, "lines"),
        axis.title.y = element_text(margin = margin(r = 20, l = 20))) +
  scale_y_continuous(name = "Fraction of cfDNA fragments\nstarting or ending at CGs", limits = c(0,0.009)) +
  scale_fill_gradient(low="#77dd77", high="#00660a")

ggmeth_meth <- ggplot(he_meth, aes(x=beta_group, y=beta)) +
  geom_bar(stat="identity", aes(fill=beta_group)) +
  theme_classic(base_size = 20) +
  theme(text = element_text(size=16),
        axis.text.x = element_blank(),
        axis.ticks = element_blank(),
        axis.title.x = element_blank(),
        strip.background=element_rect(color = NA,  fill=NA),
        legend.position = "none",
        panel.spacing = unit(2, "lines"),
        axis.title.y = element_text(margin = margin(r = 20, l = 20))) +
  scale_y_continuous(name = "Methylation (beta-value)", limits = c(0,1)) +
  scale_fill_gradient(low="#77C8DD", high="#004766")

ggmeth_rna <- ggplot(he_meth, aes(x=beta_group, y=rna)) +
  geom_bar(stat="identity", aes(fill=beta_group)) +
  theme_classic(base_size = 20) +
  theme(text = element_text(size=16),
        axis.text.x = element_blank(),
        axis.ticks = element_blank(),
        strip.background=element_rect(color = NA,  fill=NA),
        legend.position = "none",
        panel.spacing = unit(2, "lines"),
        axis.title.y = element_text(margin = margin(r = 20, l = 20)),
        axis.title.x = element_text(margin = margin(t = 20, b = 20))) +
  scale_y_continuous(name = "Gene expression (TPM)", limits = c(0,150)) +
  scale_fill_gradient(low="#77C8DD", high="#004766") +
  xlab("order by methylation")
ggmeth_rna

he_rna <- he %>%
  group_by(rna_group) %>%
  summarise(ratio = mean(ratio), beta = mean(beta), rna = mean(rna))


ggrna_ratio <- ggplot(he_rna, aes(x=rna_group, y=ratio)) +
  geom_bar(stat="identity", aes(fill=rna_group)) +
  theme_classic(base_size = 20) +
  theme(text = element_text(size=16),
        axis.text.x = element_blank(),
        axis.ticks = element_blank(),
        axis.title.x = element_blank(),
        strip.background=element_rect(color = NA,  fill=NA),
        legend.position = "none",
        panel.spacing = unit(2, "lines"),
        axis.title.y = element_text(margin = margin(r = 20, l = 20))) +
  scale_y_continuous(name = "Fraction of cfDNA fragments\nstarting or ending at CGs", limits = c(0,0.009)) +
  scale_fill_gradient(low="#77dd77", high="#00660a")

ggrna_meth <- ggplot(he_rna, aes(x=rna_group, y=beta)) +
  geom_bar(stat="identity", aes(fill=rna_group)) +
  theme_classic(base_size = 20) +
  theme(text = element_text(size=16),
        axis.text.x = element_blank(),
        axis.ticks = element_blank(),
        axis.title.x = element_blank(),
        strip.background=element_rect(color = NA,  fill=NA),
        legend.position = "none",
        panel.spacing = unit(2, "lines"),
        axis.title.y = element_text(margin = margin(r = 20, l = 20))) +
  scale_y_continuous(name = "Methylation (beta-value)", limits = c(0,1)) +
  scale_fill_gradient(low="#77C8DD", high="#004766")

ggrna_rna <- ggplot(he_rna, aes(x=rna_group, y=rna)) +
  geom_bar(stat="identity", aes(fill=rna_group)) +
  theme_classic(base_size = 20) +
  theme(text = element_text(size=16),
        axis.text.x = element_blank(),
        axis.ticks = element_blank(),
        strip.background=element_rect(color = NA,  fill=NA),
        legend.position = "none",
        panel.spacing = unit(2, "lines"),
        axis.title.y = element_text(margin = margin(r = 20, l = 20)),
        axis.title.x = element_text(margin = margin(t = 20, b = 20))) +
  scale_y_continuous(name = "Gene expression (TPM)", limits = c(0,150)) +
  scale_fill_gradient(low="#77C8DD", high="#004766") +
  xlab("order by expression")

plot_meth <- plot_grid(ggmeth_ratio, ggmeth_meth, ggmeth_rna , ncol=1, nrow=3, rel_heights = c(1,1,1.2), align = "v")
plot_rna <- plot_grid(ggrna_ratio, ggrna_meth, ggrna_rna, ncol=1, nrow=3, rel_heights = c(1,1,1.2), align = "v")
plot <- plot_grid(plot_meth, plot_rna, ncol=2, rel_widths= c(1,1), align="h", labels=c("",""), label_size = 20)

combi <- plot_grid(alg, plot, ncol=2, rel_widths=c(1,1), align="hv", labels=c("a","b"), label_size = 20)
ggsave("../output/Supplementary_Fig_12.jpg", plot = combi, device = "jpeg",width = 20, height=12,units = "in", dpi=600, scale = 1.2)

在这里插入图片描述

Supplementary Figure 13

Pre-requisites

These packages are necessary to make the plots that make up Supplementary Figure 13.

library("reshape2")
library("ggplot2")
library("cowplot")
library("tidyverse")
library("dplyr")
library("GenomicRanges")

Supplementary Figure 13A: CpG-islands WPS (mononucleosomal reads)

colorz <- c("#DD6627", "#77C8DD", "#004766")
matrix <- readRDS("../data/cpg_wps_smallfragments.rds")
cpg <- readRDS("../data/cpgIslands.hg19.20001.rds")
geneid_ordered <- order(cpg$beta)
cpg <- cpg[geneid_ordered]
remove <- which(cpg$amount <=2)
cpg <- cpg[-remove]
tracker <- tibble("index" = c(1:nrow(matrix)))
tracker <- tracker[-remove,]

matrix_ordered <- matrix
matrix_ordered <- matrix_ordered[-remove,]

included_genes <- which(rowSums(is.na(matrix_ordered)) <= 10)
tracker <- tracker[included_genes,]
cpg <- cpg[included_genes]
matrix_ordered <- matrix_ordered[which(rowSums(is.na(matrix_ordered)) <= 10),]
matrix_ordered_long <- melt(matrix_ordered)
colnames(matrix_ordered_long) <- c("gene_index", "relative_position", "wps")
matrix_ordered_long[which(matrix_ordered_long$relative_position <= 250),]$relative_position <- matrix_ordered_long[which(matrix_ordered_long$relative_position <= 250),]$relative_position - 251
matrix_ordered_long[which(matrix_ordered_long$relative_position > 250),]$relative_position <- matrix_ordered_long[which(matrix_ordered_long$relative_position > 250),]$relative_position - 250
matrix_ordered_long$relative_position <- matrix_ordered_long$relative_position * 10
matrix_ordered_long$gene_index <- matrix_ordered_long$gene_index * -1
matrix_ordered_long$gene_index <- matrix_ordered_long$gene_index - min(matrix_ordered_long$gene_index) +1
m <- as_tibble(matrix_ordered_long) %>%
    pivot_wider(id_cols=gene_index,
                names_from=relative_position,
                values_from=wps)
m2 <- m %>% 
  dplyr::select(-gene_index) %>%
  as.matrix()
sfun <- function(x, k){
    s <- runmean(Rle(x), k, endrule="constant")
    as.numeric(s)
}
m3 <- apply(m2, 2, sfun, k=5)
m[, -1] <- m3
x2 <- m %>%
    pivot_longer(cols=-1, names_to="relative_position",
                 values_to="wps") %>%
    mutate(relative_position=as.integer(relative_position))  %>%
    mutate(method="smoothed")
pos <- sort(c(seq(-2500, 2500, 1000), -10))
brks <- match(pos, sort(unique(x2$relative_position)))
pos[pos == -10] <- 0
downsample <- sample(unique(x2$gene_index), 1000)
downsample <- unique(x2$gene_index)
width <- unit(10, "cm")
x2[which(is.na(x2$wps)),]$wps <- mean(x2[-which(is.na(x2$wps)),]$wps)
#tibfinal <- tibble()
#for(i in unique(x2$relative_position)) {
#  tib <- tibble("diff" = (mean(x2[which(x2$relative_position %in% c(i)),][c(1:1000),]$wps) - mean(x2[which(x2$relative_position %in% c(i)),][c((length(unique(x2$gene_index))-1000):(length(unique(x2$gene_index)))),]$wps)),
#         "pos" = i)
#  tibfinal <- rbind(tibfinal, tib)
#}
p1 <- x2[which(x2$gene_index %in% downsample),] %>%
    mutate(wps=ifelse(wps > 250, 250, wps),
           wps=ifelse(wps < -250, -250, wps),
           gene_index=as.integer(factor(gene_index)),
           x=as.integer(factor(relative_position))) %>%
    ggplot(aes(x, gene_index)) +
    geom_tile(aes(fill=wps), size=0) +
    scale_fill_gradientn(colours = c("#004766", "#77C8DD", "#DD6627")) +
    scale_x_continuous(breaks=brks,
                       labels=pos, 
                       expand = c(0,0)) +
    scale_y_continuous(expand = c(0,0)) +
    labs(x="Position relative to the middle of the CpG-island") +
    theme_classic(base_size=10) +
    theme(text = element_text(size=20),
          plot.title=element_blank(),
          panel.border = element_rect(colour = "black", fill=NA, size=0.5),
          axis.ticks.y = element_blank(),
          axis.text.y = element_blank(),
          axis.title.y = element_blank(),
          legend.position="bottom",
          legend.box="horizontal",
          legend.spacing.x = unit(0, "cm"),
          legend.key.width = unit(0.217, "snpc"),
          plot.margin = margin(0,0.7,0.7,0.7, "cm")) + 
  guides(fill = guide_colourbar(title.position="bottom", title.hjust = 0.5, title.vjust = 3, barheight = 0.5, frame.colour = "black", frame.linewidth = 0.5, title="cfDNA nucleosomal positioning score"))
values <- data.frame("var_x" = 1,
                     "var_y" = max(matrix_ordered_long$gene_index):1,
                     "value" = cpg$beta)

values$value <- as.numeric(values$value)
p2 <- ggplot(values, aes(x = var_y, y = value)) +
  geom_bar(stat="identity", fill = "black") +
  labs(x="CpG-islands in order of methylation (beta-value)", y="", title="") +
  theme_classic(base_size=10) + 
  theme(text = element_text(size=20),
        axis.title.y.right =  element_text(angle=90, vjust = 2),
        axis.line.x.bottom = element_blank(),
        axis.line.y.right = element_blank(),
        plot.title=element_blank(),
        axis.ticks.y = element_blank(),
        axis.ticks.x.bottom = element_blank(),
        axis.text.y = element_blank(),
        axis.text.x.bottom = element_blank(),
        legend.position="bottom",
        plot.margin = margin(0,0,0,0, "cm")) + 
  coord_flip() +
  guides(x.sec = "axis", y.sec = "axis") +
  scale_y_continuous(expand=c(0,0), limits = c(0, 1), breaks = seq(0, 1, by=1), labels=c("0.000000" = "0", "1" = "1")) +
  scale_x_continuous(expand=c(0,0), position = "top")


p3 <- x2[which(x2$gene_index %in% downsample),] %>%
  group_by(relative_position) %>%
  summarise(nwps = sum(wps)) %>%
  ggplot(aes(relative_position, nwps)) +
  labs(x="", y="", title="Cumulative cfDNA nucleosomal positioning score") +
  geom_line() +
  theme_classic(base_size=10) +
  theme(text = element_text(size=20),
        #axis.title.x.bottom = element_text(angle=0, vjust = 10),
        plot.title=element_text(hjust=0.5, vjust=0, size = 16),
        axis.ticks = element_blank(),
        axis.text = element_blank(),
        axis.title.y = element_blank(),
        axis.title.x = element_blank(),
        legend.position="bottom",
        legend.box="horizontal",
        legend.spacing.x = unit(0, "cm"),
        plot.margin = margin(0.2,0.7,0,0.7, "cm")) +
  scale_x_continuous(breaks=brks,
                       labels=pos, 
                       expand = c(0,0)) +
  ggtitle("Cumulative cfDNA nucleosomal positioning score")


p1_legend <- get_legend(p1)
p1 <- p1 + theme(legend.position='none')
#p2_legend <- get_legend(p2)
#p2 <- p2 + theme(legend.position='none')
#plot_combi <- plot_grid(plot_grid(p1, p2, ncol=2, nrow=1,align = "h", rel_widths = c(10,2)),
#          plot_grid(NULL, p1_legend, p2_legend, NULL, ncol = 1, nrow = 4, align="v", axis="b"), #rel_widths = c(10,1))
plot_combi1 <- plot_grid(p3, NULL, ncol=2, nrow=1, rel_widths = c(10,1), align = "h")
plot_combi2 <- plot_grid(p1, p2, ncol=2, nrow=1, rel_widths = c(10,1), align = "h")
plot_combi3 <- plot_grid(p1_legend, NULL, ncol=2, nrow=1, rel_widths = c(10,1), align = "h")
plot_combi_cpg_wps <- plot_grid(plot_combi1, plot_combi2, plot_combi3, NULL, ncol=1, rel_heights = c(2,15,1,0.5))
title <- ggdraw() + 
  draw_label(
    "Nucleosome positioning over CpG-islands",
    hjust = 0.5,
    size = 20
  ) +
  theme(
    # add margin on the left of the drawing canvas,
    # so title is aligned with left edge of first plot
    plot.margin = margin(0, 0, 0, 7)
  )
title <- plot_grid(title, NULL, ncol=2, nrow=1, rel_widths = c(10,1), align = "h")
plot_combi_cpg_wps_title <- plot_grid(title, plot_combi_cpg_wps, ncol = 1, rel_heights = c(1,15))

在这里插入图片描述

Supplementary Figure 13B: TSS WPS (mononucleosomal reads)

matrix <- readRDS("../data/tss_wps_smallfragments.rds")
tissue_key <- read.table("../data/tissue_key.tsv",as.is=T,header=T,sep="\t",quote="\"")
conv <- read.table('../data/labels.txt',as.is=T,header=T,sep="\t",quote="\"")
rna <- read.table('../data/RNAtable.tsv.gz',as.is=T,header=T)
express <- conv[which(conv$Category %in% c("Myeloid")),]$RName
express_df <- rna[c("GeneID", express)]
express_df$mean <- rowMeans(express_df[,c(2:(length(express) + 1))], na.rm=T)
express_df <- express_df[order(-express_df$mean),]
geneid_ordered <- express_df$GeneID

tss <- read.table(file = "../data/transcriptAnno-GRCh37.75.tss20001.tsv")
colnames(tss) <- c("transcript", "seqnames", "start", "end", "strand")
tss$seqnames <- paste0("chr", tss$seqnames)
tss_gr <- GRanges(tss)
tss_gr <- tss_gr[which(seqnames(tss_gr) %in% paste0("chr", c(1:22)))]
seqlevels(tss_gr) <- paste0("chr", c(1:22))

order <- match(geneid_ordered, tss_gr$transcript)
order <- order [!is.na(order)]
#matrix_ordered <- matrix[order,]
matrix_ordered <- matrix
included_genes <- tss_gr[order]$transcript
included_genes <- included_genes[which(rowSums(matrix_ordered) <= 22500)]
matrix_ordered <- matrix_ordered[which(rowSums(matrix_ordered) <= 22500),]

matrix_ordered_long <- melt(matrix_ordered)
colnames(matrix_ordered_long) <- c("gene_index", "relative_position", "wps")
matrix_ordered_long[which(matrix_ordered_long$relative_position <= 250),]$relative_position <- matrix_ordered_long[which(matrix_ordered_long$relative_position <= 250),]$relative_position - 251
matrix_ordered_long[which(matrix_ordered_long$relative_position > 250),]$relative_position <- matrix_ordered_long[which(matrix_ordered_long$relative_position > 250),]$relative_position - 250
matrix_ordered_long$relative_position <- matrix_ordered_long$relative_position * 10
matrix_ordered_long$gene_index <- matrix_ordered_long$gene_index * -1
matrix_ordered_long$gene_index <- matrix_ordered_long$gene_index - min(matrix_ordered_long$gene_index) +1
m <- as_tibble(matrix_ordered_long) %>%
    pivot_wider(id_cols=gene_index,
                names_from=relative_position,
                values_from=wps)
m2 <- m %>% 
  dplyr::select(-gene_index) %>%
  as.matrix()
sfun <- function(x, k){
    s <- runmean(Rle(x), k, endrule="constant")
    as.numeric(s)
}
m3 <- apply(m2, 2, sfun, k=5)
m[, -1] <- m3
x2 <- m %>%
    pivot_longer(cols=-1, names_to="relative_position",
                 values_to="wps") %>%
    mutate(relative_position=as.integer(relative_position))  %>%
    mutate(method="smoothed")
pos <- sort(c(seq(-2500, 2500, 1000), -10))
brks <- match(pos, sort(unique(x2$relative_position)))
pos[pos == -10] <- 0
downsample <- sample(unique(x2$gene_index), 1000)
downsample <- unique(x2$gene_index)
width <- unit(10, "cm")
p1 <- x2[which(x2$gene_index %in% downsample),] %>%
#p1 <- x2 %>%
    mutate(wps=ifelse(wps > 250, 250, wps),
           wps=ifelse(wps < -250, 250, wps),
           gene_index=as.integer(factor(gene_index)),
           x=as.integer(factor(relative_position))) %>%
    ggplot(aes(x, gene_index)) +
    geom_tile(aes(fill=wps), size=0) +
    scale_fill_gradientn(colours = c("#004766", "#77C8DD", "#DD6627")) +
    scale_x_continuous(breaks=brks,
                       labels=pos, 
                       expand = c(0,0)) +
    scale_y_continuous(expand = c(0,0)) +
    labs(x="Position relative to TSS") +
    theme_classic(base_size=10) +
    theme(text = element_text(size=20),
          plot.title=element_blank(),
          panel.border = element_rect(colour = "black", fill=NA, size=0.5),
          axis.ticks.y = element_blank(),
          axis.text.y = element_blank(),
          axis.title.y = element_blank(),
          legend.position="bottom",
          legend.box="horizontal",
          legend.spacing.x = unit(0, "cm"),
          legend.key.width = unit(0.217, "snpc"),
          plot.margin = margin(0,0.7,0.7,0.7, "cm")) + 
  guides(fill = guide_colourbar(title.position="bottom", title.hjust = 0.5, title.vjust = 3, barheight = 0.5, frame.colour = "black", frame.linewidth = 0.5, title="cfDNA nucleosomal positioning score"))
values <- data.frame("var_x" = 1,
                     "var_y" = max(matrix_ordered_long$gene_index):1,
                     "value" = express_df[which(express_df$GeneID %in% included_genes),]$mean,
                     "gene" = express_df[which(express_df$GeneID %in% included_genes),]$GeneID)
matrix_ordered_long$transcript_id <- values[match(matrix_ordered_long$gene_index, values$var_y),]$gene
matrix_ordered_long$rna_expression <- values[match(matrix_ordered_long$gene_index, values$var_y),]$value
#saveRDS(matrix_ordered_long, "/Users/mnoe/Documents/expression_coverage.rds")
p2 <- ggplot(values[which(values$var_y %in% downsample),], aes(x = var_y, y = log(value + 1))) +
  geom_bar(stat="identity", fill = "black") +
  labs(x="Genes in order of expression (TPM)", y="", title="") +
  theme_classic(base_size=10) + 
  theme(text = element_text(size=20),
        axis.title.y.right =  element_text(angle=90, vjust = 4),
        axis.line.x.bottom = element_blank(),
        axis.line.y.right = element_blank(),
        plot.title=element_blank(),
        axis.ticks.y = element_blank(),
        axis.ticks.x.bottom = element_blank(),
        axis.text.y = element_blank(),
        axis.text.x.bottom = element_blank(),
        legend.position="bottom",
        plot.margin = margin(0,0,0,0, "cm")) + 
  coord_flip() +
  guides(x.sec = "axis", y.sec = "axis") +
  scale_y_continuous(expand=c(0,0), limits = c(0, 7.835816), breaks = seq(0, 7.835816, by=7.835816), labels=c("0.000000" = "0", "7.835816" = "2528")) +
  scale_x_continuous(expand=c(0,0), position = "top")


p3 <- x2[which(x2$gene_index %in% downsample),] %>%
  group_by(relative_position) %>%
  summarise(nwps = sum(wps)) %>%
  ggplot(aes(relative_position, nwps)) +
  labs(x="", y="", title="Cumulative cfDNA nucleosomal positioning score") +
  geom_line() +
  theme_classic(base_size=10) +
  theme(text = element_text(size=20),
        #axis.title.x.bottom = element_text(angle=0, vjust = 10),
        plot.title=element_text(hjust=0.5, vjust=0, size = 16),
        axis.ticks = element_blank(),
        axis.text = element_blank(),
        axis.title.y = element_blank(),
        axis.title.x = element_blank(),
        legend.position="bottom",
        legend.box="horizontal",
        legend.spacing.x = unit(0, "cm"),
        plot.margin = margin(0.2,0.7,0,0.7, "cm")) +
  scale_x_continuous(breaks=brks,
                       labels=pos, 
                       expand = c(0,0)) +
  ggtitle("Cumulative cfDNA nucleosomal positioning score")


p1_legend <- get_legend(p1)
p1 <- p1 + theme(legend.position='none')
#p2_legend <- get_legend(p2)
#p2 <- p2 + theme(legend.position='none')
#plot_combi <- plot_grid(plot_grid(p1, p2, ncol=2, nrow=1,align = "h", rel_widths = c(10,2)),
#          plot_grid(NULL, p1_legend, p2_legend, NULL, ncol = 1, nrow = 4, align="v", axis="b"), #rel_widths = c(10,1))
plot_combi1 <- plot_grid(p3, NULL, ncol=2, nrow=1, rel_widths = c(10,1), align = "h")
plot_combi2 <- plot_grid(p1, p2, ncol=2, nrow=1, rel_widths = c(10,1), align = "h")
plot_combi3 <- plot_grid(p1_legend, NULL, ncol=2, nrow=1, rel_widths = c(10,1), align = "h")
plot_combi_tss_wps <- plot_grid(plot_combi1, plot_combi2, plot_combi3, NULL, ncol=1, rel_heights = c(2,15,1,0.5))
title <- ggdraw() + 
  draw_label(
    "Nucleosome positioning over transcription start sites",
    hjust = 0.5,
    size = 20
  ) +
  theme(
    # add margin on the left of the drawing canvas,
    # so title is aligned with left edge of first plot
    plot.margin = margin(0, 0, 0, 7)
  )
title <- plot_grid(title, NULL, ncol=2, nrow=1, rel_widths = c(10,1), align = "h")
plot_combi_tss_wps_title <- plot_grid(title, plot_combi_tss_wps, ncol = 1, rel_heights = c(1,15))

在这里插入图片描述

Supplementary Figure 13: combine Supplementary Figure 13A, Supplementary Figure 13B

small_size <- plot_grid(plot_combi_cpg_wps_title, NULL, plot_combi_tss_wps_title, rel_widths = c(1,0.1,1), labels=c("a", "", "b"), nrow=1,label_size = 20)
#ggsave(paste("../output_final/Fig_3EF.pdf"), plot = small_size, device = "pdf",width = 20, height=12,units = "in", dpi=600, bg = "white", scale = 0.78)
ggsave(paste("../output/Supplementary_Fig_13.jpg"), plot = small_size, device = "jpg",width = 20, height=12,units = "in", dpi=600, bg = "white", scale = 0.82)

在这里插入图片描述

Supplementary Figure 14

Required Libraries

library(tidyverse)
library(fgsea)
library(ggplot2)
library(ggpubr)
library(data.table)
library(ComplexHeatmap)
library(sjPlot)
library(here)

Run linear model fitting RNA, methylation, and WPS to coverage and size.

# Read in table methylation, expression, and sizes
ticks <- c(-36, -16, -4, 0, 4, 16, 36)
trans <- scales::trans_new('Signed.Sqrt', 
      function(x) sign(x) * sqrt(abs(x)), 
      function(x) sign(x) * (abs(x)^2),
      breaks=function(x) ticks)
tab <- readRDS(here('data/giraffe/tss_cpg_connect_filled.rds')) %>%
  column_to_rownames('trans') %>%
  mutate(Coverage = log10(cov + 1)) %>%
  mutate(RNA=scale(log10(rna + 1))) %>%
  mutate(WPS=scale(wps)) %>%
  mutate(Beta=beta) %>%
  mutate(Size=size) %>%
  filter(!is.na(WPS) & !is.na(Beta) & !is.na(RNA) & Size > 0) %>%
  mutate(Methylated=Beta > 0.5) %>%
  mutate(WPS=wps)

# Fit and plot coverage
full_model <- glm(Coverage ~ poly(WPS, 3) * RNA  + Beta * poly(WPS, 3) + RNA * Beta, data=tab)
plot_model(full_model) +
  scale_x_discrete(labels=c('RNA * Meth',
    'WPS\u00b3 * Meth', 'WPS\u00b2 * Meth', 'WPS * Meth',  
    'WPS\u00b3 * RNA', 'WPS\u00b2 * RNA', 'WPS * RNA', 
    'Meth', 'RNA', 'WPS\u00b3', 'WPS\u00b2', 'WPS')) +
  theme_classic(base_size=20) +
  geom_hline(yintercept=0, color='grey', linewidth=0.5) +
  scale_y_continuous(trans=trans, breaks=ticks) +
  ylab('Coefficient')

# Fit and plot size
full_model <- glm(Size ~ poly(WPS, 3) * RNA  + Beta * poly(WPS, 3) + RNA * Beta, data=tab)
plot_model(full_model) +
  scale_x_discrete(labels=c('RNA * Meth',
    'WPS\u00b3 * Meth', 'WPS\u00b2 * Meth', 'WPS * Meth',  
    'WPS\u00b3 * RNA', 'WPS\u00b2 * RNA', 'WPS * RNA', 
    'Meth', 'RNA', 'WPS\u00b3', 'WPS\u00b2', 'WPS')) +
  theme_classic(base_size=20) +
  geom_hline(yintercept=0, color='grey', linewidth=0.5) +
  scale_y_continuous(trans=trans, breaks=ticks) +
  ylab('Coefficient')

请添加图片描述
请添加图片描述

Supplementary Figure 15

Required Libraries

library(tidyverse)
library(data.table)
library(ggplot2)
library(ggpubr)
library(here)

Generate plots

# Read in data
beta_dat <- readRDS(here('data/giraffe/beta_dat.RDS'))
rna_dat <- readRDS(here('data/giraffe/rna_dat.RDS'))

# Prep for plotting
comps <- list(
    c('WB', 'BRCA'),
    c('WB', 'COAD'),
    c('WB', 'LIHC'),
    c('WB', 'LUAD'),
    c('WB', 'LUSC'),
    c('WB', 'OV'),
    c('WB', 'PAAD')
)

stats_fxn <- function(x, lab_height){
    size <- max(x)-min(x)
    data.frame(y=lab_height, label=length(x))
}

# Make plots
scale <- max(rna_dat$Expressed.Genes) - min(rna_dat$Expressed.Genes)
lab_height <- min(rna_dat$Expressed.Genes) - scale*.05
ggboxplot(rna_dat, x='Type', y='Expressed.Genes', fill='Type', 
        add='jitter') +
    xlab('') +
    ylab('Expressed Genes (count)') +
    theme(legend.position='none')

scale <- max(beta_dat$Methylated.Sites) - min(beta_dat$Methylated.Sites)
lab_height <- min(beta_dat$Methylated.Sites) - scale*.05
ggboxplot(beta_dat, x='Type', y='Methylated.Sites', fill='Type', 
        add='jitter') +
    xlab('') +
    ylab('Methylated Sites (count)') +
    theme(legend.position='none')

请添加图片描述
请添加图片描述

  • 10
    点赞
  • 20
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

生信学习者2

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值