文章SameStr(二):图2代码

在这里插入图片描述

title: “Publication Figure 2”

百度云盘链接: https://pan.baidu.com/s/15g7caZp354zIWktpnWzWhQ

提取码: 4sh7

Libraries

Standard Import

library(tidyverse)
library(cowplot)
library(scales)
library(ggpubr)

Special

library(lme4)
library(sjPlot)
library(sjstats)
library(sjlabelled)

Paths

bin_dir = '../bin/'
data_dir = '../data/'
results_dir = '../results/'

Custom Scripts / Theme

source(paste0(bin_dir, 'theme_settings.R'))
source (paste0(bin_dir, 'utilities.R'))
source(paste0(bin_dir, 'species_competition_functions.R'))
source(paste0(bin_dir, 'analysis_metadata.R'))
date <- format(Sys.time(), "%d%m%y")

Import Tables

Metadata

samples.metadata <- read_tsv(paste0(data_dir, 'samples.metadata.tsv'))
microbe.taxonomy <- read_tsv(paste0(data_dir, 'microbe.taxonomy.tsv'))
microbe.metadata <- microbe.taxonomy %>% 
  right_join(., read_tsv(paste0(data_dir, 'microbe.metadata.tsv'))) 

Taxonomy

MetaPhlAn2 tables
combine with taxonomy

mp_species.long <- microbe.taxonomy %>% 
  right_join(., read_tsv(paste0(data_dir, 'samples.mp_profiles.species.tsv')), 
             by = 'species') %>% 
  left_join(., samples.metadata, by = 'Name')

rCDI subset
annotate with metadata

mp_species.rcdi.long <- 
  mp_species.long %>%
  filter(Study %in% c(rcdi.studies)) 

Case Summary

SameStr Case-Summary table

sstr_cases <- read_tsv(paste0(data_dir, 'samples.case_summary.tsv')) %>% 
  left_join(., microbe.taxonomy, by = 'species')

Set Figure

fig = paste0('Fig_2.')

Species-level Source

Mean across Cases

Species-level plot of mean post-FMT rel. abund. by source, using the last available time point of successful FMT cases.

Format

sstr_cases.source_mean <-
  sstr_cases %>% 
  dplyr::filter(fmt_success, last_sample) %>% 
  
  dplyr::mutate(unclassified_sp = ifelse(grepl(species, pattern = '_unclassified'), 
                                  'unclassified', 'classified')) %>% 
  dplyr::group_by(Study_Type, Case_Name, Days_Since_FMT.post, unclassified_sp) %>% 
  summarize_existed(.) %>% 
  
  dplyr::group_by(Study_Type, unclassified_sp) %>% 
  pivot_wider(names_from = 'unclassified_sp', values_from = matches('^[nrf].'),
              names_sep = '.', 
              id_cols = c('Study_Type', 'Case_Name', 'Days_Since_FMT.post')) %>% 
  dplyr::select(Study_Type, Case_Name, Days_Since_FMT.post, starts_with('r.')) %>%
  
  dplyr::group_by(Study_Type, Case_Name, Days_Since_FMT.post) %>% 
  pivot_longer(names_to = 'source', values_to = 'rel_abund', cols = starts_with('r.')) %>% 
  dplyr::ungroup() %>% 
  separate(source, into = c(NA,'Sample','source','classified_sp')) %>% 
  
  dplyr::filter(!source %in% c('either', 'any')) %>%
  dplyr::mutate(tag.source = case_when(
    classified_sp == 'unclassified' ~ 'Unclassified', 
    (Sample == 'donor' & source == 'post') | 
    (Sample == 'post' & source == 'donor') ~ 'Donor-Specific', 
    (Sample == 'recipient' & source == 'post') | 
    (Sample == 'post' & source == 'recipient') ~ 'Recipient-Specific',
    source == 'before' ~ 'Shared R/D', 
    source == 'both' ~ 'Same Sp.',
    Sample == 'post' & source == 'unique' ~ 'New',
    T ~ str_to_title(source)
  )) %>%  
  dplyr::mutate(tag.source = as.factor(tag.source)) %>% 
  dplyr::mutate(Sample = str_to_title(str_extract(Sample, pattern = '.'))) %>% 
  
  dplyr::select(Study_Type, Sample, tag.source, rel_abund) %>% 
  dplyr::group_by(Study_Type, Sample, tag.source) %>%
  dplyr::summarize_all(.funs = funs(rel_abund.mean = round(mean(., na.rm = T), 2),
                             rel_abund.sd = round(sd(., na.rm = T), 2))) %>% 
  dplyr::ungroup()

Statistics

Data

sstr_cases.samples <-
  sstr_cases %>% 
  dplyr::filter(fmt_success, last_sample) %>%
  dplyr::select(Study_Type, Study, Case_Name, Sample.recipient, Sample.post, Sample.donor, fmt_success) %>% 
  dplyr::distinct() %>% 
  dplyr::filter(Study_Type == 'rCDI')
paste0('Recipient: ', length(unique(sstr_cases.samples$Sample.recipient)))
paste0('Post-FMT: ', length(unique(sstr_cases.samples$Sample.post)))
paste0('Donor: ', length(unique(sstr_cases.samples$Sample.donor)))

Last Post-FMT time point of successfully treated patients had species-level sources:

  • 54.41% Same Sp.
  • 24.95% Donor-Specific
  • 11.33% Unclassified
  • 7.65% Recipient-Specific
  • 1.52% New/Unique
    Unseen/Unclear: 11.33 + 1.51 = 12.84%
sstr_cases.source_mean %>% 
  pivot_wider(names_from = 'Sample', 
              values_from = c('rel_abund.mean', 'rel_abund.sd'), 
              names_sep = '.') %>% 
  dplyr::mutate_at(.vars = vars(starts_with('rel_abund')), 
            .funs = funs(replace_na(., 0))) %>% 
  dplyr::select(Study_Type, tag.source, ends_with('R'), ends_with('P'), ends_with('D')) %>% 
  dplyr::arrange(desc(Study_Type), desc(rel_abund.mean.P))

Pie Chart

Pie chart showing rel. abund. of sets of co-occurring species in recipients, donors, and post-FMT patients. Only a small fraction of post-FMT species has not been observed in the recipient or donor before.

coord_polar_fix <- coord_polar(theta = "y", start = 7.15)
coord_polar_fix$is_free <- function() TRUE

plot <-
  sstr_cases.source_mean %>% 
  dplyr::mutate(tag.source = fct_relevel(tag.source, 
                                  'Donor-Specific', 'New', 'Recipient-Specific',
                                  'Unique', 'Shared R/D', 
                                  'Same Sp.', 'Unclassified')) %>% 
  ggplot(aes(x = fct_relevel(Sample, 'R', 'P', 'D'), 
             y = rel_abund.mean, fill = tag.source)) +   
  geom_bar(stat = 'identity', position = 'fill', color = 'black') + # 
  facet_grid(rows = vars(fct_relevel(Study_Type, 'rCDI')), 
             cols = vars(fct_relevel(Sample, 'R', 'P', 'D')),
             scales = 'free', switch = 'y') + 
  scale_fill_manual(values = c(colors.discrete[c(1, 2, 3)], 'white', 'black', 
                               colors.discrete[c(5, 10)])) + 
  theme_cowplot() + 

  coord_polar_fix + 

  theme(aspect.ratio = 1, 
        axis.text = element_blank(),
        axis.title = element_blank(),
        axis.line = element_blank(),
        axis.ticks = element_blank(),
        strip.background = element_blank(),
        panel.spacing = unit(-0.5, "lines"),
        strip.text = element_text(size = 14),
        legend.title = element_blank())

plot

在这里插入图片描述

Exporting plot

output_name = 'PostSource.Mean_SuccessfulCases.Species'

ggsave(plot + theme(legend.position = 'none'), 
       device = 'pdf', dpi = 300, width = 3, height = 2,
       filename = paste0(results_dir, fig, output_name, '.Plot.pdf'))

Case-Wise

Species-level plot of per-Case post-FMT rel. abund. by source

Format

sstr_cases.source_case <-
  
  sstr_cases %>%
  dplyr::mutate(n = 1) %>% 
  
  dplyr::select(Study_Type, Case_Name, Days_Since_FMT.post, fmt_success.label, 
         kingdom, phylum, class, order, family, genus, species, 
         n, rel_abund.recipient, rel_abund.post, rel_abund.donor) %>% 
  
  dplyr::mutate(source = case_when(
    grepl(species, pattern = '_unclassified') ~ 'Unclassified Sp.', 
    (rel_abund.recipient > 0) & (rel_abund.donor > 0) & (rel_abund.post == 0) ~ 'Shared Before',
    (rel_abund.recipient > 0) & (rel_abund.donor > 0) & (rel_abund.post > 0) ~ 'Same Sp.',
    (rel_abund.recipient > 0) & (rel_abund.donor == 0) & (rel_abund.post > 0) ~ 'Recipient / Initial Sample',
    (rel_abund.recipient == 0) & (rel_abund.donor > 0) & (rel_abund.post > 0) ~ 'Donor',
    (rel_abund.recipient > 0) & (rel_abund.donor == 0) & (rel_abund.post == 0) ~ 'Unique Recipient',
    (rel_abund.recipient == 0) & (rel_abund.donor > 0) & (rel_abund.post == 0) ~ 'Unique Donor',
    (rel_abund.recipient == 0) & (rel_abund.donor == 0) & (rel_abund.post > 0) ~ 'Unique to Time Point',
    T ~ 'Other'
    )) %>% 
  dplyr::filter(!source %in% c('Unique Donor', 'Unique Recipient', 'Shared Before')) %>% 
  dplyr::filter(rel_abund.post > 0) %>% 

  dplyr::group_by(Study_Type, Case_Name, Days_Since_FMT.post, fmt_success.label) %>% 
  dplyr::mutate(f = n / sum(n, na.rm = T)) %>% 
  pivot_longer(names_to = 'metric', 
               values_to = 'value', 
               cols = c('rel_abund.post', 'n', 'f')) %>% 

  dplyr::group_by(Study_Type, Case_Name, Days_Since_FMT.post, fmt_success.label, source, metric) %>% 
  dplyr::summarize(value = sum(value), .groups = 'drop') %>% 

  dplyr::mutate(value = ifelse(metric == 'rel_abund.post',  value / 100, value),
         metric = case_when(
           metric == 'rel_abund.post' ~ 'Rel. Abund.', 
           metric == 'f' ~ 'Spp. Fraction', 
           metric == 'n' ~ 'Count of Spp.', 
           T ~ 'Other'))

Bar Charts, per Case

rCDI - Bar charts showing post-FMT rel. abund. of sets of co-occurring species in recipients, donors, and post-FMT patients.

source_order <- c('Donor', 'Recipient / Initial Sample', 'Unique to Time Point', 'Same Sp.')

plot.rcdi <-
  sstr_cases.source_case %>% 
  dplyr::filter(metric == "Rel. Abund.") %>% 
  dplyr::filter(Study_Type == 'rCDI') %>% 
  
  dplyr::mutate(ordering = -as.numeric(as.factor(Case_Name)) + (Days_Since_FMT.post/1000)) %>% 
  ggplot(aes(
    fct_reorder(paste0('D', Days_Since_FMT.post, ' | ', str_remove(Case_Name, pattern = 'Case_')), ordering),
    y = value, 
   fill = fct_relevel(source, source_order))) + 

   geom_bar(stat = 'identity', position = position_fill(), width = 1, col = 'black') +
  theme_cowplot() + 
  scale_y_continuous(labels = percent_format(),
                     breaks = c(1, .75, .5, .25, 0),
                     expand = c(0,0)) +
  scale_fill_manual(values = c(colors.discrete[c(1, 3, 2, 5, 10)]), 
                    
                    guide = guide_legend(reverse = TRUE)) + 
  labs(y = 'Rel. Abund.') + 
  theme(
        axis.title.x = element_blank(),
        axis.line.y = element_blank(),
        axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5, size = 10),
        axis.ticks.x = element_blank(),
        legend.title=element_blank(),
        legend.position = 'top')
plot.rcdi

在这里插入图片描述

Control - Bar charts showing >=second time point rel. abund. of sets of co-occurring species in first and second time points of controls.

plot.control <- 
  sstr_cases.source_case %>% 
  dplyr::filter(metric == "Rel. Abund.") %>% 
  dplyr::filter(Study_Type == 'Control') %>% 
  dplyr::filter(Days_Since_FMT.post <= 373) %>% # ~ same time range as last rcdi
  
  dplyr::mutate(ordering = as.numeric(as.factor(Case_Name)) + (Days_Since_FMT.post/1000)) %>% 
  ggplot(aes(
    fct_reorder(paste0('D', Days_Since_FMT.post, ' | ', str_remove(Case_Name, pattern = 'Case_')), ordering),
    y = value, 
   fill = fct_relevel(source, source_order))) + 
  

  geom_bar(stat = 'identity', position = position_fill(), width = 1, col = 'black') +
  theme_cowplot() + 
  scale_y_continuous(labels = percent_format(),
                     breaks = c(1, .75, .5, .25, 0),
                     expand = c(0,0)) +
  scale_fill_manual(values = c(colors.discrete[c(3, 2, 10)]), 
                    
                    guide = guide_legend(reverse = TRUE)) + 
  labs(y = 'Rel. Abund.') + 
  theme(
        axis.title.x = element_blank(),
        axis.line.y = element_blank(),
        axis.ticks.x = element_blank(),
        axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5, size = 7.5),
        legend.title=element_blank(),
        legend.position = 'top')
plot.control

Exporting plots

output_name = 'PostSource.Cases.rCDI.Species'
ggsave(plot.rcdi + theme(legend.position = 'none'), 
       device = 'pdf', dpi = 300, width = 9, height = 3.5,
       filename = paste0(results_dir, fig, output_name, '.Plot.pdf'))
output_name = 'PostSource.Cases.Control.Species'
ggsave(plot.control + theme(legend.position = 'none'), 
       device = 'pdf', dpi = 300, width = 25, height = 3.5,
       filename = paste0(results_dir, fig, output_name, '.Plot.pdf'))

Post-FMT Presence/Absence (Species)

Using GLMM to model post-FMT species presence/absence post-FMT.
We only have sparse data and single cases for later time points, limiting to <= 84 days

sstr_cases %>%
  dplyr::filter(Days_Since_FMT.post <= 84) %>%
  
  dplyr::filter(Study_Type %in% c('rCDI') ) %>% 
  dplyr::filter(kingdom == 'Bacteria') %>% 
  dplyr::filter(fmt_success) %>% 
  dplyr::distinct(Days_Since_FMT.post) %>% 
  summary(.)

Format

scale_gelman <- function(x) {
  return((x - mean(x)) / (2 * sd(x)))
}

sstr_cases.scaled <- 
  sstr_cases %>%

  dplyr::filter(Study_Type %in% c('rCDI') ) %>% 
  dplyr::filter(kingdom == 'Bacteria') %>% 
  dplyr::filter(fmt_success, Days_Since_FMT.post <= 84) %>%

  dplyr::mutate(source = ifelse(grepl(species, pattern = 'unclassified'),  'Unclassified', source)) %>%
  dplyr::mutate(source = case_when(
    analysis_level == 'species' & source == 'self' ~ 'Self Sp.',
    analysis_level == 'species' & source == 'donor' ~ 'Donor Sp.',
    analysis_level == 'species' & source == 'unique' ~ 'Unique Sp.',
    T ~ source
  )) %>% 
  
  dplyr::left_join(., microbe.metadata) %>%
  dplyr::mutate(habit.oral = ifelse(is.na(habit.oral), F, habit.oral)) %>% 


  dplyr::mutate(Donor = replace_na(rel_abund.donor > 0, F), 
         PostTreatment = replace_na(rel_abund.post > 0, F), 
         Pre = replace_na(rel_abund.recipient > 0, F),
         Both = Donor & Pre, 
         Any = Donor | Pre) %>% 
  
  mutate(Engrafted = `Donor/Post-FMT.mvs` > min_similarity & 
                     `Donor/Post-FMT.overlap` > min_overlap) %>%
  mutate(Engrafted = replace_na(Engrafted, F),
         Engrafted = ifelse(analysis_level == 'species' |
                            grepl(species, pattern = 'unclassified') |
                            !species %in% microbe.metadata$species, NA, Engrafted)) %>% 

  # filter only species that existed in any of recipient or donor
  dplyr::filter(Any) %>%
    
  dplyr::rename(OxyTol = 'oxygen.tolerant', 
         OxyClass = 'oxygen.class',
         DaysSinceFMT = 'Days_Since_FMT.post', 
         Case = 'Case_Name',
         AbundanceDonor = 'rel_abund.donor',
         AbundanceRecipient = 'rel_abund.recipient',
         AbundancePost = 'rel_abund.post') %>% 
  dplyr::mutate(Habitat = ifelse(habit.oral, 'Oral', 'Not-Oral'),
         OxyTol = ifelse(OxyTol, 'Tolerant', 'Not-Tolerant')) %>% 
  dplyr::mutate(Detected = case_when(
    Pre & !Donor ~ 'Recipient', 
    !Pre & Donor ~ 'Donor',
    Pre & Donor ~ 'Both')) %>% 
  dplyr::mutate(Specificity = ifelse(Detected == 'Both', F, T)) %>% 
  dplyr::mutate_at(.vars = vars(Case, 
                         Specificity, 
                         Detected, Engrafted,
                         phylum, class, order, family,
                         OxyTol, OxyClass, Habitat, PostTreatment,
                         ),
            .funs = funs(as.factor(.))) %>% 
  dplyr::mutate(
    Detected = fct_relevel(Detected, 'Recipient'), 
    OxyClass = fct_relevel(OxyClass, 'aerobe'),
    Habitat = fct_relevel(Habitat, 'Not-Oral'),
    OxyTol = fct_relevel(OxyTol, 'Not-Tolerant'),
    Phylum = fct_relevel(phylum, 'Firmicutes')
  ) %>% 
  dplyr::select(phylum:species, Engrafted, PostTreatment, 
         AbundancePost, Detected, AbundanceDonor, AbundanceRecipient, 
         Habitat, OxyTol, DaysSinceFMT, Case, OxyClass) %>% 
  dplyr::mutate(AbundanceDonor = pseudo_log_trans(1e-4, 10)$transform(AbundanceDonor),
         AbundanceRecipient = pseudo_log_trans(1e-4, 10)$transform(AbundanceRecipient),
         AbundanceRatio = AbundanceDonor-AbundanceRecipient,
         DaysSinceFMT = log10(DaysSinceFMT)
         )  %>% 
  dplyr::mutate_at(.vars = vars(starts_with('Abundance'), DaysSinceFMT),
            .funs = funs(scale_gelman(.)))

GLMM Model

Using cases as random effect to account for repeated measurements

fit.rcdi <- glmer(PostTreatment ~ 
                        Detected +
                        AbundanceDonor + 
                        AbundanceRecipient + 
                        AbundanceRecipient:AbundanceDonor + 
                        Detected:DaysSinceFMT + 
                        Detected:Habitat + 
                        Detected:OxyTol +
                        (1 | Case), 
             family=binomial(link='logit'), data=sstr_cases.scaled)
summary(fit.rcdi)

在这里插入图片描述

Forest Plot

a = 'Presence\nBefore FMT'
b = 'Relative\nAbundance'
c = 'Physiology'
d = 'Days\nSince FMT' 
e = 'Oral Habitat' 
f = 'Oxygen Tolerant' 

plot <-
  sjPlot::plot_model(fit.rcdi, se = T, 
                     vline.color = "black", 
                     wrap.labels = 1e4, # transform = NULL,
            sort.est = F, show.p = T, show.values = T, value.offset = 0.35, 
            colors = 'black', axis.title = 'Log of Odds Ratio (95% CI)',
            group.terms = c(a,a, b,b,b, d,d,d, e,e,e, f,f,f),
            title = 'Species Presence Post-FMT'
            ) + 
  theme_cowplot() + 
  theme(strip.background = element_blank(), 
    strip.text.y.left = element_text(angle = 0, hjust = 0), 
    plot.title = element_blank(),
    axis.text.y = element_text(size = 11)
    ) + 
  
  facet_grid(rows = vars(fct_relevel(group, a, b, d, e, f)),
             scales = 'free_y', space = 'free_y', switch = 'y') +
 
    scale_x_discrete(
      labels = c(
        "DetectedBoth" = "Found in Both*",
        "DetectedDonor" = "Donor-Specific*",

        'AbundanceDonor' = 'Donor',
        'AbundanceRecipient' = 'Recipient',
        'AbundanceDonor:AbundanceRecipient' = 'Ratio D/R',
        
        "DetectedRecipient:DaysSinceFMT" = "Recipient-Specific",
        "DetectedBoth:DaysSinceFMT" = "Found in Both",
        "DetectedDonor:DaysSinceFMT" = "Donor-Specific",
        "DetectedRecipient:HabitatOral" = "Recipient-Specific",
        "DetectedBoth:HabitatOral" = "Found in Both",
        "DetectedDonor:HabitatOral" = "Donor-Specific",

        "DetectedRecipient:OxyTolTolerant" = "Recipient-Specific",
        "DetectedBoth:OxyTolTolerant" = "Found in Both",
        "DetectedDonor:OxyTolTolerant" = "Donor-Specific"
        )
      ) +
   scale_y_continuous(expand = c(0.1,0)) 
plot

在这里插入图片描述

Exporting plots, tables

output_name = 'PostPresence.Species.GLMM'

# write model stats to html
sjPlot::tab_model(fit.rcdi, prefix.labels = "varname", transform = NULL,
                  file = paste0(results_dir, fig, output_name, '.Model.html')
                  )

  ggsave(plot + theme(legend.position = 'none'), 
       device = 'pdf', dpi = 300, width = 6, height = 5,
       filename = paste0(results_dir, fig, output_name, '.Plot.pdf'))
  • 19
    点赞
  • 27
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

生信学习者2

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值