空间转录组合作项目分析示例(四)

最新推荐文章于 2024-09-09 23:28:21 发布

追风少年ii

最新推荐文章于 2024-09-09 23:28:21 发布

阅读量211

点赞数

文章标签： python 空间数据分析 hotspot 大数据

本文链接：https://blog.csdn.net/weixin_53637133/article/details/142062572

版权

作者，Evil Genius

上一篇我们完成了空间注释之后，立马就要来到我们的下一步分析，细胞的空间位置关系。也是空间转录组必备的分析之一。

其中包括不同细胞类型之间的空间关系

以及相同细胞类型的空间位置关系

课程上分享的代码均可实现，但是我们不走回头路，分享就分享新的。

首先定义分析空间细胞类型共定位的函数

# # co-localization functions  ---------------------------------------------
# Count interactions between pairs 
colocal_mat <- function(x) {
  stopifnot(
    is.matrix(x), is.numeric(x),
    all(dim(x) > 0), ncol(x) > 1)
  
  if (is.null(colnames(x))) {
    colnames(x) <- seq_len(ncol(x))
  }
  df <- calc_pairs(x)
  df <- compute_interactions(x, df)
  return(df)
}

calc_pairs <- function(x) {
  x <- x > 0
  ab <- combn(colnames(x), 2)
  y <- apply(ab, 2, function(.) sum(matrixStats::rowAlls(x[, ., drop = FALSE])))
  df <- data.frame(t(ab), y)
  names(df) <- c("pair1", "pair2", "n")
  return(df)
}

compute_interactions <- function(x, df) {
  y <- colnames(x)
  df$a <- factor(df$pair1, levels = y)
  df$b <- factor(df$pair2, levels = rev(y))
  t <- colSums(x > 0)
  a <- match(df$pair1, y)
  b <- match(df$pair2, y)
  df$ta <- t[a]
  df$tb <- t[b]
  df$pa <- df$n / df$ta
  df$pb <- df$n / df$tb
  df$ab_mean <- (df$pa + df$pb) / 2
  df$ab_comb <- df$n / (df$ta + df$tb)
  df$ab_comb2 <- df$n / (df$ta * df$tb)
  df$pairs <- paste(df$a, df$b)
  return(df)
}

# Adjacency Helper Functions -------------------------------------------


.neighbors_table_func <- function(metadata, spot_class = "MPid", spot_name = "Key") {
  if(is.factor(metadata[[spot_class]])) {metadata[[spot_class]] <- as.character(metadata[[spot_class]])}
  neighbors_table <- sapply(metadata[[spot_name]], function(spot) {
    spots_row = metadata$array_row[metadata[[spot_name]] == spot]
    spots_col = metadata$array_col[metadata[[spot_name]] == spot]
    
    n1_temp = metadata[[spot_name]][metadata$array_row == as.numeric(spots_row) - 1 & metadata$array_col == as.numeric(spots_col) - 1]
    if(length(n1_temp) == 0) {
      n1 = NA
    } else {
      n1 = as.character(metadata[[spot_class]][metadata[[spot_name]] == n1_temp])
    }
    
    n2_temp = metadata[[spot_name]][metadata$array_row == as.numeric(spots_row) - 1 & metadata$array_col == as.numeric(spots_col) + 1]
    if (length(n2_temp) == 0) {
      n2 = NA
    } else {
      n2 = as.character(metadata[[spot_class]][metadata[[spot_name]] == n2_temp])
    }
    
    n3_temp = metadata[[spot_name]][metadata$array_row == as.numeric(spots_row) & metadata$array_col == as.numeric(spots_col) - 2]
    if (length(n3_temp) == 0) {
      n3 = NA
    } else {
      n3 = as.character(metadata[[spot_class]][metadata[[spot_name]] == n3_temp])
    }
    
    n4_temp = metadata[[spot_name]][metadata$array_row == as.numeric(spots_row) & metadata$array_col == as.numeric(spots_col) + 2]
    if (length(n4_temp) == 0) {
      n4 = NA
    } else {        
      n4 = as.character(metadata[[spot_class]][metadata[[spot_name]] == n4_temp])
    }
    
    n5_temp = metadata[[spot_name]][metadata$array_row == as.numeric(spots_row) + 1 & metadata$array_col == as.numeric(spots_col) - 1]
    if (length(n5_temp) == 0) {
      n5 = NA
    } else {
      n5 = as.character(metadata[[spot_class]][metadata[[spot_name]] == n5_temp])
    }
    
    n6_temp = metadata[[spot_name]][metadata$array_row == as.numeric(spots_row) + 1 & metadata$array_col == as.numeric(spots_col) + 1]
    if (length(n6_temp) == 0) {
      n6 = NA
    } else {
      n6 = as.character(metadata[[spot_class]][metadata[[spot_name]] == n6_temp])
    }
    
    return(c(n1, n2, n3, n4, n5, n6))
  })
  
  neighbors_table = t(neighbors_table)
  rownames(neighbors_table) = metadata[[spot_name]]
  return(neighbors_table)
}


.prog_connectivity_score <- function(program_neighbors, state) {
  state_neighbors_bin <- ifelse(program_neighbors == state, 0, 1)
  if(is.null(dim(state_neighbors_bin))) {
    prog_connect <- 1
  } else {
    prog_connect <- length(which(apply(state_neighbors_bin, 1, function(x) {sum(na.omit(x))}) > 0))
  }
  return(prog_connect)
}


.calc_adj_mat <- function(neighbored_state, spots_states, state_neighbors_table, spot_class = "MPid") {
  if(!(neighbored_state %in% spots_states[[spot_class]])) {
    return(0)
  } else {
    state_neighbors_bin <- ifelse(state_neighbors_table == neighbored_state, 1, 0)
    if(is.null(dim(state_neighbors_bin))) {
      state_neighbors_sum <- sum(na.omit(state_neighbors_bin))
    } else {
      state_neighbors_sum <- sum(apply(state_neighbors_bin, 1, function(x) {sum(na.omit(x))}))
    }
    return(state_neighbors_sum)
  }
}



# Adjacency Bootstrapping Method ---------------------------------------


# The metadata supplied to the function must contain spatial coordinate information!
calc_spatial_neighborhood <- function(metadata, 
                                      spot_class = "MPid", 
                                      spot_name = "Key", 
                                      samples = "all", 
                                      iter = 20) {
  
  # Check inputs
  if(is.factor(metadata[[spot_class]])) {
    all_labels <- na.omit(levels(metadata[[spot_class]]))
  } else if(is.character(metadata[[spot_class]])) {
    all_labels <- na.omit(unique(metadata[[spot_class]]))
    metadata[[spot_class]] <- as.factor(metadata[[spot_class]])
  } else {stop("spot_class should either be a factor or character vector!")}
  stopifnot("Error: metadata must contain a column specifying sample name." = 
              any(grepl("sample*", colnames(metadata), ignore.case = TRUE)))
  metadata$enumerate <- metadata[[grep("sample*", colnames(metadata), ignore.case = TRUE)]]
  if(length(samples) == 1 && samples == "all") {
    samples <- unique(metadata$enumerate)
  } else if((length(samples) > 1 || samples != "all") & all(samples %in% unique(metadata$enumerate))) {
    samples <- samples
  } else {stop("samples supplied do not match the samples present in the metadata!")}
  
  message("This proccess may take a while...")
  connectivity <- lapply(seq_along(samples), function(i) {
    message(paste0("Start proccessing sample: ", samples[[i]]))
    samp_meta <- metadata[metadata$enumerate == samples[[i]], ]
    
    ## Construct Neighbors tables
    # Randomized spot ids & position table
    rand_neighbors_table <- lapply(seq_len(iter), function(j) {
      shuffled_spots <- sample(samp_meta[[spot_name]], length(samp_meta[[spot_name]]), replace = FALSE)
      shuffled_meta <- samp_meta %>% dplyr::mutate({{spot_name}} := shuffled_spots)
      neighbors_table <- .neighbors_table_func(shuffled_meta, spot_class = spot_class, spot_name = spot_name)
      return(neighbors_table)
    })
    
    # Bootstrapped spots class identity
    boots_neighbors_table <- lapply(seq_len(iter), function(j) {
      new_spots <- unique(sample(samp_meta[[spot_name]], length(samp_meta[[spot_name]]), replace = TRUE))
      bootsteped_meta <- samp_meta[samp_meta[[spot_name]] %in% new_spots, ]
      neighbors_table <- .neighbors_table_func(bootsteped_meta, spot_class = spot_class, spot_name = spot_name)
      return(neighbors_table)
    })
    
    # Actual (observed) neighborhood table
    neighbors_table <- .neighbors_table_func(samp_meta, spot_class = spot_class, spot_name = spot_name)
    
    # Calculate connectivity scores (spatial coherence)
    programs_connectivity_score <- sapply(sort(all_labels), function(cluster) {
      if (!(cluster %in% samp_meta[[spot_class]])) {
        prog_score <- NaN
      } else {
        program_neighbors_table = neighbors_table[rownames(neighbors_table) %in% samp_meta[[spot_name]][samp_meta[[spot_class]] == cluster & !is.na(samp_meta[[spot_class]])], ]
        prog_score <- .prog_connectivity_score(program_neighbors_table, cluster)
      }
      return(prog_score)
    })
    
    
    ## Connectivity
    boots_adj_mat <- lapply(boots_neighbors_table, function(b_table) {
      obs_adj_mat <- sapply(sort(all_labels), function(cluster) {
        if (!(cluster %in% samp_meta[[spot_class]])) {
          zero_neigh <- rep(0, length(all_labels)) 
          names(zero_neigh) <- all_labels
          return(zero_neigh)
        } else {
          cluster_neighbors_table = b_table[rownames(b_table) %in% samp_meta[[spot_name]][samp_meta[[spot_class]] == cluster & !is.na(samp_meta[[spot_class]])], ]
          num_of_neighbores = sapply(sort(as.character(all_labels)), function(neighbored_cluster) {
            num <- .calc_adj_mat(neighbored_cluster, samp_meta, cluster_neighbors_table)
            return(num)
          })
          return(num_of_neighbores)
        }})
      
      diag(obs_adj_mat) <- 0
      weighted_adj_mat <- apply(obs_adj_mat, 2, function(x) {x / sum(x)})
      
      comp4mat <- programs_connectivity_score[colnames(weighted_adj_mat)]
      comp4mat[is.na(comp4mat)] <- 0
      weighted_denominator_v2 <- sapply(c(names(comp4mat)), function(prog){
        new_comp <- comp4mat
        new_comp[prog] <- 0
        new_comp <- new_comp / sum(new_comp)
        return(new_comp)
      })
      
      norm_adj_mat <- weighted_adj_mat / weighted_denominator_v2
      
      upper_mat <- (norm_adj_mat[upper.tri(norm_adj_mat)] + t(norm_adj_mat)[upper.tri(t(norm_adj_mat))]) / 2
      lower_mat <- rep(NaN, length(upper_mat))
      avg_mat <- norm_adj_mat
      avg_mat[upper.tri(avg_mat)] <- upper_mat
      avg_mat[lower.tri(avg_mat)] <- lower_mat
      
      rownames(avg_mat) <- colnames(avg_mat)
      avg_mat <- t(avg_mat)
      avg_mat <- as.data.frame(avg_mat)
      avg_mat$pair2 <- rownames(avg_mat)
      long <- reshape2::melt(data.table::setDT(avg_mat), id.vars = c("pair2"), variable.name = "pair1")
      return(long)
    })
    
    
    # Random connectivity 
    rand_adj_mat <- lapply(rand_neighbors_table, function(b_table) {
      obs_adj_mat <- sapply(sort(all_labels), function(cluster) {
        if (!(cluster %in% samp_meta[[spot_class]])) {
          return(rep(0, length(all_labels)))
        } else {
          cluster_neighbors_table = b_table[row.names(b_table) %in% samp_meta[[spot_name]][samp_meta[[spot_class]] == cluster & !is.na(samp_meta[[spot_class]])], ]
          num_of_neighbores = sapply(sort(as.character(all_labels)), function(neighbored_cluster) {
            num <- .calc_adj_mat(neighbored_cluster, samp_meta, cluster_neighbors_table)
            return(num)
          })
          return(num_of_neighbores)
        }})
      
      diag(obs_adj_mat) <- 0
      weighted_adj_mat <- apply(obs_adj_mat, 2, function(x) {x / sum(x)})
      
      comp4mat <- programs_connectivity_score[colnames(weighted_adj_mat)]
      comp4mat[is.na(comp4mat)] <- 0
      weighted_denominator_v2 <- sapply(c(names(comp4mat)), function(prog) {
        new_comp <- comp4mat
        new_comp[prog] <- 0
        new_comp <- new_comp / sum(new_comp)
        return(new_comp)
      })
      
      norm_adj_mat <- weighted_adj_mat / weighted_denominator_v2
      
      upper_mat <- (norm_adj_mat[upper.tri(norm_adj_mat)] + t(norm_adj_mat)[upper.tri(t(norm_adj_mat))]) / 2
      lower_mat <- rep(NaN, length(upper_mat))
      avg_mat <- norm_adj_mat
      avg_mat[upper.tri(avg_mat)] <- upper_mat
      avg_mat[lower.tri(avg_mat)] <- lower_mat
      
      rownames(avg_mat) <- colnames(avg_mat)
      avg_mat <- t(avg_mat)
      avg_mat <- as.data.frame(avg_mat)
      avg_mat$pair2 <- rownames(avg_mat)
      long <- reshape2::melt(data.table::setDT(avg_mat), id.vars = c("pair2"), variable.name = "pair1")
      return(long)
    })
    
    final_adj_mat <- data.frame(pair1 = boots_adj_mat[[1]]$pair1,
                                pair2 = boots_adj_mat[[1]]$pair2,
                                connectivity = apply(sapply(boots_adj_mat, function(x) {return(x$value)}), 1, function(k) {mean(na.omit(k))}),
                                effect_size = apply(sapply(boots_adj_mat, function(x) {return(x$value)}), 1, function(k) {mean(na.omit(k))}) / apply(sapply(rand_adj_mat, function(x) {return(x$value)}), 1, function(k) {mean(na.omit(k))}),
                                sd = apply(sapply(boots_adj_mat, function(x) {return(x$value)}), 1, function(k) {sd(na.omit(k))}))
    
    
    ## Add p-value 
    pval <- sapply(seq_len(nrow(final_adj_mat)), function(j1) {
      obs <- na.omit(as.numeric(sapply(seq_len(iter), function(j2) {
        return(boots_adj_mat[[j2]][j1, "value"])
      })))
      exp <- na.omit(as.numeric(sapply(seq_len(iter), function(j2) {
        return(rand_adj_mat[[j2]][j1, "value"])
      })))
      if (length(obs) == 0) {
        return(NA)
      } else {
        t.res <- t.test(obs, exp, alternative = "two.sided", var.equal = FALSE)
        return(t.res$p.value)
      }
    })
    
    final_adj_mat$pval <- pval
    return(final_adj_mat)
  })
  names(connectivity) <- samples
  return(connectivity)
}





# Adjacency Permutation Method ----------------------------------------------------

# The metadata supplied to the function must contain spatial coordinate information!
neighbor_spot_props <- function(metadata, 
                                zone = "All", 
                                #site = "All", 
                                samples = "All",
                                spot_class = "MPid", 
                                spot_name = "Key", 
                                #zone_by = "EpiStroma", 
                                n_cores = 10, 
                                n_perm = 1000, 
                                signif_val = 0.01, 
                                plot_perm_distr = TRUE, 
                                filter_signif = TRUE, 
                                zscore_thresh = 1) {
  # Load variables
  all_states <- as.character(unique(unlist(sapply(c(1:length(sample_ls)), function(i){
    mp_assign <- readRDS('test.rds')
    return(unique(mp_assign$spot_type_meta_new))
  }))))
  #samples_metadata <- readRDS(file = here("Analysis/Metadata/samples_metadata.rds"))
  neighbs_stats_ls <- list()
  signif_neighbs_ls <- list()
  distr_plots_ls <- list()
  
  # Filter metadata input by selected site and zone
  stopifnot("Error: metadata must contain a column specifying sample name." = 
              any(grepl("sample*", colnames(metadata), ignore.case = TRUE)))
  # if(site != "All") {
  #   stopifnot("Error: Site argument must be one of the following: 'Laryngeal' / 'Oral' / 'Oropharynx'." = 
  #               site %in% unique(samples_metadata$Site))
  #   metadata <- metadata[metadata$Sample %in% samples_metadata$Sample[samples_metadata$Site == site], ]
  # }
  if(zone != "All") {
    #stopifnot("Error: Argument `zone_by` must be either 'EpiStroma' (for separating Epithelial, Stromal or Mixed spots) or 'Zone' (for separating Epithelial zonation)." = 
    #           zone_by %in% c("EpiStroma", "Zone") & length(zone_by) == 1)
    # if(zone_by == "EpiStroma") {
    #   stopifnot("Error: Argument `zone` should specify on which tumor region neighboring states will be computed - 'Epithelial' / 'Stroma' / 'Mixed'." = 
    #               zone %in% unique(metadata$EpiStroma) & length(zone) == 1)
    #   metadata <- metadata[metadata$EpiStroma == zone, ]
    # }
    # if(zone_by == "Zone") {
    #   stopifnot("Error: The variable `Zone` is not found in the metadata. Run classify_zones function first." = 
    #               any(colnames(metadata) %in% "Zone"))
    #  stopifnot("Error: Argument `zone` should specify on which Epithelial zone neighboring states will be computed - 'Zone_1' / 'Zone_2' / 'Zone_3'." = 
    #               zone %in% unique(metadata$Zone) & length(zone) == 1)
    metadata <- metadata[metadata$Zone == zone, ]
    # }
  }
  metadata$enumerate <- metadata[[grep("sample*", colnames(metadata), ignore.case = TRUE)]]
  if(length(samples) == 1 && samples == "All") {
    samples <- unique(metadata$enumerate)
  } else if((length(samples) > 1 || samples != "All") & all(samples %in% unique(metadata$enumerate))) {
    samples <- samples
  } else {stop("samples supplied do not match the samples present in the metadata!")}
  
  for(samp in samples) {
    message(paste0("Processing sample: ", samp))
    
    ### ============ Actual state pairs connectivity values ============
    # Construct neighboring table for the sample
    samp_meta <- metadata[metadata$Sample == samp, ]
    neighbors_table <- .neighbors_table_func(samp_meta, spot_class = spot_class, spot_name = spot_name)
    
    # Calculate connectivity scores (spatial coherence)
    programs_connectivity_score <- sapply(sort(all_states), function(state) {
      if (!(state %in% samp_meta[[spot_class]])) {
        prog_score <- NaN
      } else {
        program_neighbors_table = neighbors_table[rownames(neighbors_table) %in% samp_meta[[spot_name]][samp_meta[[spot_class]] == state & !is.na(samp_meta[[spot_class]])], ]
        prog_score <- .prog_connectivity_score(program_neighbors_table, state)
      }
      return(prog_score)
    })
    
    # Count neighboring states for each spot (number of free (non-coherent) X state classified spots that neighbor free Y reference-state)
    obs_adj_mat <- sapply(sort(all_states), function(state) {
      if (!(state %in% samp_meta[[spot_class]])) {
        zero_neigh <- rep(0, length(all_states)) 
        names(zero_neigh) <- all_states
        return(zero_neigh)
      } else {
        state_neighbors_table = neighbors_table[rownames(neighbors_table) %in% samp_meta[[spot_name]][samp_meta[[spot_class]] == state & !is.na(samp_meta[[spot_class]])], ]
        num_of_neighbors = sapply(sort(as.character(all_states)), function(neighbored_state) {
          num <- .calc_adj_mat(neighbored_state, samp_meta, state_neighbors_table)
          return(num)
        })
        return(num_of_neighbors)
      }})
    diag(obs_adj_mat) <- 0
    weighted_adj_mat <- apply(obs_adj_mat, 2, function(x) {x / sum(x)})
    
    # Calculate corrected proportion of neighboring spots
    comp4mat <- programs_connectivity_score[colnames(weighted_adj_mat)]
    comp4mat[is.na(comp4mat)] <- 0
    weighted_denominator_v2 <- sapply(c(names(comp4mat)), function(prog){
      new_comp <- comp4mat
      new_comp[prog] <- 0
      new_comp <- new_comp / sum(new_comp)
      return(new_comp)
    })
    
    norm_adj_mat <- weighted_adj_mat / weighted_denominator_v2
    baseline_stat <- Melt(norm_adj_mat)
    
    ### ============ Permuted state pairs connectivity values ============
    # Permute neighbor table `n_perm` times, to create a permuted sampling distribution which will serve as the null distribution
    permute_neighbs <- parallel::mclapply(1:n_perm, function(x) {
      shuffled_spots <- sample(samp_meta[[spot_name]], length(samp_meta[[spot_name]]), replace = FALSE)
      shuffled_meta <- samp_meta %>% dplyr::mutate({{spot_name}} := shuffled_spots)
      neighbors_table <- .neighbors_table_func(shuffled_meta, spot_class = spot_class, spot_name = spot_name)
      neighbors_table <- neighbors_table[match(samp_meta[[spot_name]], rownames(neighbors_table)), ]
      
      obs_adj_mat <- sapply(sort(all_states), function(state) {
        if (!(state %in% samp_meta[[spot_class]])) {
          zero_neigh <- rep(0, length(all_states)) 
          names(zero_neigh) <- all_states
          return(zero_neigh)
        } else {
          state_neighbors_table = neighbors_table[rownames(neighbors_table) %in% samp_meta[[spot_name]][samp_meta[[spot_class]] == state & !is.na(samp_meta[[spot_class]])], ]
          num_of_neighbors = sapply(sort(as.character(all_states)), function(neighbored_state) {
            num <- .calc_adj_mat(neighbored_state, samp_meta, state_neighbors_table)
            return(num)
          })
          return(num_of_neighbors)
        }})
      diag(obs_adj_mat) <- 0
      weighted_adj_mat <- apply(obs_adj_mat, 2, function(x) {x / sum(x)})
      
      comp4mat <- programs_connectivity_score[colnames(weighted_adj_mat)]
      comp4mat[is.na(comp4mat)] <- 0
      weighted_denominator_v2 <- sapply(c(names(comp4mat)), function(prog){
        new_comp <- comp4mat
        new_comp[prog] <- 0
        new_comp <- new_comp / sum(new_comp)
        return(new_comp)
      })
      
      norm_adj_mat <- weighted_adj_mat / weighted_denominator_v2
      perm_stat <- Melt(norm_adj_mat)
    }, mc.cores = n_cores)
    
    perm_stats <- cbind.data.frame(permute_neighbs[[1]]$rows, permute_neighbs[[1]]$cols, do.call(cbind.data.frame, lapply(permute_neighbs, function(y) y$vals))) %>% 
      magrittr::set_colnames(c("rows", "cols", paste0("perm_", seq(n_perm))))
    
    
    
    # Compare observed neighboring state proportion to the permuted null distribution - extract Neighbor-proportion, P-value & Permuted distribution statistics
    # merged_df <- merge(baseline_stat, perm_stats, by = c("rows", "cols"))
    merged_df <- na.omit(merge(baseline_stat, perm_stats, by = c("rows", "cols")))
    scaled_vals <- merged_df %>% dplyr::select(-c("rows", "cols")) %>% t() %>% scale() %>% t() %>% 
      as.data.frame() %>% dplyr::mutate(rows = merged_df$rows, cols = merged_df$cols, .before = 1) %>% dplyr::pull(vals)
    neighbs_stats <- lapply(seq_len(nrow(merged_df)), function(i) {
      summary_stats <- summary(as.numeric(merged_df[i, grep("perm_", colnames(merged_df))]))
      if(merged_df[i, "vals"] == 0) {
        pvals <- 1
        tail_direction <- NA
      } else {
        Rtail_pvals <- (sum(as.numeric(merged_df[i, grep("perm_", colnames(merged_df))]) >= merged_df[i, "vals"]) + 1) / (n_perm + 1)
        Ltail_pvals <- (n_perm - sum(as.numeric(merged_df[i, grep("perm_", colnames(merged_df))]) > merged_df[i, "vals"]) + 1) / (n_perm + 1)
        tail_direction <- Rtail_pvals < Ltail_pvals
        pvals <- Rtail_pvals * tail_direction + Ltail_pvals * !(tail_direction)
      }
      sds <- sd(as.numeric(merged_df[i, grep("perm_", colnames(merged_df))]))
      out <- data.frame(Neighb_State = merged_df$rows[i], Ref_State = merged_df$cols[i], Prop_Neighb = merged_df$vals[i], Z_Score = scaled_vals[i],
                        Perm_Min = summary_stats[["Min."]], Perm_Max = summary_stats[["Max."]], Perm_Mean = summary_stats[["Mean"]], Perm_Median = summary_stats[["Median"]],
                        P_val = pvals, Interaction_Type = ifelse(is.na(tail_direction), NA, 
                                                                 ifelse(tail_direction,"Drawn","Repelled")), SD = sds)
    })
    neighbs_stats <- do.call(rbind.data.frame, neighbs_stats)
    neighbs_stats$Significant <- neighbs_stats$P_val < signif_val
    neighbs_stats_ls[[samp]] <- neighbs_stats
    
    if(filter_signif) {
      top_neighbs_df <- neighbs_stats %>% 
        dplyr::filter(Neighb_State != Ref_State) %>%
        dplyr::mutate(State_Pair = paste0(Ref_State, "_", Neighb_State)) %>%
        dplyr::filter(Significant == TRUE) %>% 
        mutate(Signif_Pair = unname(sapply(.$State_Pair, function(x) {
          ifelse(paste0(scalop::substri(x, pos = 2, sep = "_"), "_", scalop::substri(x, pos = 1, sep = "_")) %in% .$State_Pair,
                 yes = "Yes", no = "No")
        }))) %>% dplyr::filter(abs(Z_Score) >= zscore_thresh, Signif_Pair == "Yes") %>% dplyr::arrange(desc(Z_Score))
      signif_neighbs_ls[[samp]] <- top_neighbs_df
    }
    
    # Generate plots
    if(plot_perm_distr) {
      plot_df <- reshape2::melt(perm_stats)
      distr_plot <- ggplot(plot_df, aes(x = value)) +
        facet_grid(rows ~ cols) +
        geom_histogram() +
        geom_vline(data = baseline_stat, aes(xintercept = vals), color = "red")
      distr_plots_ls[[samp]] <- distr_plot
    }
  }
  
  if(isTRUE(plot_perm_distr) & isFALSE(filter_signif)) {return(list(Neighbor_Stats = neighbs_stats_ls, Distr_plots = distr_plots_ls))}
  else if(isTRUE(filter_signif) & isFALSE(plot_perm_distr)) {return(list(Neighbs_Stats = neighbs_stats_ls, Top_Neighbs = signif_neighbs_ls))}
  else if(isTRUE(plot_perm_distr) & isTRUE(filter_signif)) {return(list(Neighbs_Stats = neighbs_stats_ls, Top_Neighbs = signif_neighbs_ls, Distr_plots = distr_plots_ls))}
  else {return(neighbs_stats_ls)}
}




Melt <- function(x) {
  if (!is.data.frame(x = x)) {
    x <- as.data.frame(x = x)
  }
  return(data.frame(
    rows = rep.int(x = rownames(x = x), times = ncol(x = x)),
    cols = unlist(x = lapply(X = colnames(x = x), FUN = rep.int, times = nrow(x = x))),
    vals = unlist(x = x, use.names = FALSE)
  ))
}



# regional comp functions  ------------------------------------------------
sample_programs_composition <- function(spots_clusters, gen_clusters){
  composition <- table(spots_clusters$spot_type)
  old_clusters <- names(composition)
  add_clusters <- gen_clusters[!gen_clusters %in% old_clusters]
  sapply(add_clusters,function(clust){
    composition <<- c(composition, clust = 0)
  })
  
  names(composition) <- c(old_clusters, add_clusters)
  final_composition <- composition[sort(names(composition))]/sum(composition)
  return(final_composition)
}

obs_program_spatial_score <- function(program_neighbors, cluster){
  cluster_neighbors_bin <- ifelse(program_neighbors == cluster, 1, 0)
  if(is.null(dim(program_neighbors))){
    cluster_neighbors_sum <- sum(cluster_neighbors_bin)
  } else {
    cluster_neighbors_sum <- apply(cluster_neighbors_bin,1,function(rx){sum(na.omit(rx))})
  }
  obs <- mean(cluster_neighbors_sum)
  return(obs)
}

one_val <- function(spots_num){
  a <- sqrt((4*spots_num)/(6*sqrt(3)))
  oneval <- (6*spots_num-12*a-6)/spots_num
  return(oneval)
}

zero_val <- function(rand_table, spots_clusters, cluster){
  all_zeroval <- sapply(rand_table, function(neighbors_rand_table){
    program_rand_neighbors_table = neighbors_rand_table[row.names(neighbors_rand_table) %in% spots_clusters$barcodes[spots_clusters$spot_type == cluster],]
    rand_obs <- obs_program_spatial_score(program_rand_neighbors_table, cluster)
    return(rand_obs)
  })
  zeroval <- mean(all_zeroval)
  return(zeroval)
}


calc_adj_mat <- function(neighbored_cluster, spots_clusters, cluster_neighbors_table){
  if (!(neighbored_cluster %in% spots_clusters$spot_type)) {
    return(0)
  } else {
    cluster_neighbors_bin <- ifelse(cluster_neighbors_table == neighbored_cluster, 1, 0)
    
    if (is.null(dim(cluster_neighbors_bin))){
      cluster_neighbors_sum <- sum(na.omit(cluster_neighbors_bin))
    } else {
      cluster_neighbors_sum <- sum(apply(cluster_neighbors_bin,1,function(x){sum(na.omit(x))}))
    }
    return(cluster_neighbors_sum)
  }
}


prog_connectivity_score <- function(program_neighbors, cluster){
  cluster_neighbors_bin <- ifelse(program_neighbors == cluster, 0, 1)
  if(is.null(dim(program_neighbors))){
    prog_connect <- 1
  } else {
    prog_connect <- length(which(apply(cluster_neighbors_bin,1,function(x){sum(na.omit(x))}) >0))
  }
  return(prog_connect)
}

neighbors_table_func <- function(spots_positions,spots_clusters){
  neighbors_table <- sapply(spots_clusters$barcodes, function(spot){
    spots_row = spots_positions[spots_positions$V1 == spot, 3]
    spots_col = spots_positions[spots_positions$V1 == spot, 4]
    
    if (spots_col == 0 | spots_row == 0) {
      c1 = NaN
    } else {
      n1 = spots_positions$V1[spots_positions$V3 == spots_row - 1 & spots_positions$V4 == spots_col - 1]
      if (spots_positions$V2[spots_positions$V1 == n1] == 0 | !(n1 %in% spots_clusters$barcodes)){
        c1 = NaN
      } else {
        c1 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n1])
      }
    }
    
    if (spots_col == 127 | spots_row == 0) {
      c2 = NaN
    } else {
      n2 = spots_positions$V1[spots_positions$V3 == spots_row - 1 & spots_positions$V4 == spots_col + 1]
      if (spots_positions$V2[spots_positions$V1 == n2] == 0 | !(n2 %in% spots_clusters$barcodes)){
        c2 = NaN
      } else {
        c2 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n2])
      }
    }
    
    if (spots_col == 0 | spots_col == 1) {
      c3 = NaN
    } else {
      n3 = spots_positions$V1[spots_positions$V3 == spots_row & spots_positions$V4 == spots_col - 2]
      if (spots_positions$V2[spots_positions$V1 == n3] == 0 | !(n3 %in% spots_clusters$barcodes)){
        c3 = NaN
      } else {
        c3 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n3])
      }
    }
    
    if (spots_col == 126 | spots_col == 127) {
      c4 = NaN
    } else {
      n4 = spots_positions$V1[spots_positions$V3 == spots_row & spots_positions$V4 == spots_col + 2]
      if (spots_positions$V2[spots_positions$V1 == n4] == 0 | !(n4 %in% spots_clusters$barcodes)){
        c4 = NaN
      } else {
        c4 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n4])
      }
    }
    
    if (spots_col == 0 | spots_row == 77) {
      c5 = NaN
    } else {
      n5 = spots_positions$V1[spots_positions$V3 == spots_row + 1 & spots_positions$V4 == spots_col - 1]
      if (spots_positions$V2[spots_positions$V1 == n5] == 0 | !(n5 %in% spots_clusters$barcodes)){
        c5 = NaN
      } else {
        c5 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n5])
      }
    }
    
    if (spots_col == 127 | spots_row == 77) {
      c6 = NaN
    } else {
      n6 = spots_positions$V1[spots_positions$V3 == spots_row + 1 & spots_positions$V4 == spots_col + 1]
      if (spots_positions$V2[spots_positions$V1 == n6] == 0 | !(n6 %in% spots_clusters$barcodes)){
        c6 = NaN
      } else {
        c6 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n6])
      }
    }
    
    
    return(c(c1,c2,c3,c4,c5,c6))
    
  })
  
  neighbors_table = t(neighbors_table)
  row.names(neighbors_table) = spots_clusters$barcodes
  
  return(neighbors_table)
}

neighbors_table_funcV2 <- function(spots_positions,spots_clusters){
  neighbors_table <- sapply(spots_clusters$barcodes, function(spot){
    spots_row = spots_positions[spots_positions$V1 == spot, 3]
    spots_col = spots_positions[spots_positions$V1 == spot, 4]
    
    if (spots_col == 0 | spots_row == 0) {
      c1 = NaN
    } else {
      n1 = spots_positions$V1[spots_positions$V3 == spots_row - 1 & spots_positions$V4 == spots_col - 1]
      if (spots_positions$V2[spots_positions$V1 == n1] == 0 | !(n1 %in% spots_clusters$barcodes)){
        c1 = NaN
      } else {
        c1 = as.character(n1)
      }
    }
    
    if (spots_col == 127 | spots_row == 0) {
      c2 = NaN
    } else {
      n2 = spots_positions$V1[spots_positions$V3 == spots_row - 1 & spots_positions$V4 == spots_col + 1]
      if (spots_positions$V2[spots_positions$V1 == n2] == 0 | !(n2 %in% spots_clusters$barcodes)){
        c2 = NaN
      } else {
        c2 = as.character(n2)
      }
    }
    
    if (spots_col == 0 | spots_col == 1) {
      c3 = NaN
    } else {
      n3 = spots_positions$V1[spots_positions$V3 == spots_row & spots_positions$V4 == spots_col - 2]
      if (spots_positions$V2[spots_positions$V1 == n3] == 0 | !(n3 %in% spots_clusters$barcodes)){
        c3 = NaN
      } else {
        c3 = as.character(n3)
      }
    }
    
    if (spots_col == 126 | spots_col == 127) {
      c4 = NaN
    } else {
      n4 = spots_positions$V1[spots_positions$V3 == spots_row & spots_positions$V4 == spots_col + 2]
      if (spots_positions$V2[spots_positions$V1 == n4] == 0 | !(n4 %in% spots_clusters$barcodes)){
        c4 = NaN
      } else {
        c4 = as.character(n4)
      }
    }
    
    if (spots_col == 0 | spots_row == 77) {
      c5 = NaN
    } else {
      n5 = spots_positions$V1[spots_positions$V3 == spots_row + 1 & spots_positions$V4 == spots_col - 1]
      if (spots_positions$V2[spots_positions$V1 == n5] == 0 | !(n5 %in% spots_clusters$barcodes)){
        c5 = NaN
      } else {
        c5 = as.character(n5)
      }
    }
    
    if (spots_col == 127 | spots_row == 77) {
      c6 = NaN
    } else {
      n6 = spots_positions$V1[spots_positions$V3 == spots_row + 1 & spots_positions$V4 == spots_col + 1]
      if (spots_positions$V2[spots_positions$V1 == n6] == 0 | !(n6 %in% spots_clusters$barcodes)){
        c6 = NaN
      } else {
        c6 = as.character(n6)
      }
    }
    
    
    return(c(c1,c2,c3,c4,c5,c6))
    
  })
  
  neighbors_table = t(neighbors_table)
  row.names(neighbors_table) = spots_clusters$barcodes
  
  return(neighbors_table)
}


win_prox_neighbors_table_func <- function(spots_positions,spots_clusters){
  neighbors_table <- sapply(spots_clusters$barcodes, function(spot){
    spots_row = spots_positions[spots_positions$V1 == spot, 3]
    spots_col = spots_positions[spots_positions$V1 == spot, 4]
    
    if (spots_col == 0 | spots_row == 0) {
      n1 = NA
    } else {
      n1_temp = spots_positions$V1[spots_positions$V3 == spots_row - 1 & spots_positions$V4 == spots_col - 1]
      if (length(n1_temp) == 0) {
        n1 = NA
      } else if (spots_positions$V2[spots_positions$V1 == n1_temp] == 0 | !(n1_temp %in% spots_clusters$barcodes)){
        n1 = NA
      } else {
        n1 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n1_temp])
      }
    }
    
    if (spots_col == 127 | spots_row == 0) {
      n2 = NA
    } else {
      n2_temp = spots_positions$V1[spots_positions$V3 == spots_row - 1 & spots_positions$V4 == spots_col + 1]
      if (length(n2_temp) == 0) {
        n2 = NA
      } else if (spots_positions$V2[spots_positions$V1 == n2_temp] == 0 | !(n2_temp %in% spots_clusters$barcodes)){
        n2 = NA
      } else {
        n2 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n2_temp])
      }
    }
    
    if (spots_col == 0 | spots_col == 1) {
      n3 = NA
    } else {
      n3_temp = spots_positions$V1[spots_positions$V3 == spots_row & spots_positions$V4 == spots_col - 2]
      if (length(n3_temp) == 0) {
        n3 = NA
      } else if (spots_positions$V2[spots_positions$V1 == n3_temp] == 0 | !(n3_temp %in% spots_clusters$barcodes)){
        n3 = NA
      } else {
        n3 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n3_temp])
      }
    }
    
    if (spots_col == 126 | spots_col == 127) {
      n4 = NA
    } else {
      n4_temp = spots_positions$V1[spots_positions$V3 == spots_row & spots_positions$V4 == spots_col + 2]
      if (length(n4_temp) == 0) {
        n4 = NA
      } else if (spots_positions$V2[spots_positions$V1 == n4_temp] == 0 | !(n4_temp %in% spots_clusters$barcodes)){
        n4 = NA
      } else {
        n4 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n4_temp])
      }
    }
    
    if (spots_col == 0 | spots_row == 77) {
      n5 = NA
    } else {
      n5_temp = spots_positions$V1[spots_positions$V3 == spots_row + 1 & spots_positions$V4 == spots_col - 1]
      if (length(n5_temp) == 0) {
        n5 = NA
      } else if (spots_positions$V2[spots_positions$V1 == n5_temp] == 0 | !(n5_temp %in% spots_clusters$barcodes)){
        n5 = NA
      } else {
        n5 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n5_temp])
      }
    }
    
    if (spots_col == 127 | spots_row == 77) {
      n6 = NA
    } else {
      n6_temp = spots_positions$V1[spots_positions$V3 == spots_row + 1 & spots_positions$V4 == spots_col + 1]
      if (length(n6_temp) == 0) {
        n6 = NA
      } else if (spots_positions$V2[spots_positions$V1 == n6_temp] == 0 | !(n6_temp %in% spots_clusters$barcodes)){
        n6 = NA
      } else {
        n6 = as.character(spots_clusters$spot_type[spots_clusters$barcodes == n6_temp])
      }
    }
    
    
    return(c(n1,n2,n3,n4,n5,n6))
    
  })
  
  neighbors_table = t(neighbors_table)
  row.names(neighbors_table) = spots_clusters$barcodes
  
  return(neighbors_table)
}


prox_neighbors_table_func <- function(spots_positions,spots_clusters){
  neighbors_table <- sapply(spots_clusters$barcodes, function(spot){
    spots_row = spots_positions[spots_positions$V1 == spot, 3]
    spots_col = spots_positions[spots_positions$V1 == spot, 4]
    
    if (spots_col == 0 | spots_row == 0) {
      n1 = NA
    } else {
      n1_temp = spots_positions$V1[spots_positions$V3 == spots_row - 1 & spots_positions$V4 == spots_col - 1]
      if (spots_positions$V2[spots_positions$V1 == n1_temp] == 0 | !(n1_temp %in% spots_clusters$barcodes)){
        n1 = NA
      } else {
        n1 = n1_temp
      }
    }
    
    if (spots_col == 127 | spots_row == 0) {
      n2 = NA
    } else {
      n2_temp = spots_positions$V1[spots_positions$V3 == spots_row - 1 & spots_positions$V4 == spots_col + 1]
      if (spots_positions$V2[spots_positions$V1 == n2_temp] == 0 | !(n2_temp %in% spots_clusters$barcodes)){
        n2 = NA
      } else {
        n2 = n2_temp
      }
    }
    
    if (spots_col == 0 | spots_col == 1) {
      n3 = NA
    } else {
      n3_temp = spots_positions$V1[spots_positions$V3 == spots_row & spots_positions$V4 == spots_col - 2]
      if (spots_positions$V2[spots_positions$V1 == n3_temp] == 0 | !(n3_temp %in% spots_clusters$barcodes)){
        n3 = NA
      } else {
        n3 = n3_temp
      }
    }
    
    if (spots_col == 126 | spots_col == 127) {
      n4 = NA
    } else {
      n4_temp = spots_positions$V1[spots_positions$V3 == spots_row & spots_positions$V4 == spots_col + 2]
      if (spots_positions$V2[spots_positions$V1 == n4_temp] == 0 | !(n4_temp %in% spots_clusters$barcodes)){
        n4 = NA
      } else {
        n4 = n4_temp
      }
    }
    
    if (spots_col == 0 | spots_row == 77) {
      n5 = NA
    } else {
      n5_temp = spots_positions$V1[spots_positions$V3 == spots_row + 1 & spots_positions$V4 == spots_col - 1]
      if (spots_positions$V2[spots_positions$V1 == n5_temp] == 0 | !(n5_temp %in% spots_clusters$barcodes)){
        n5 = NA
      } else {
        n5 = n5_temp
      }
    }
    
    if (spots_col == 127 | spots_row == 77) {
      n6 = NA
    } else {
      n6_temp = spots_positions$V1[spots_positions$V3 == spots_row + 1 & spots_positions$V4 == spots_col + 1]
      if (spots_positions$V2[spots_positions$V1 == n6_temp] == 0 | !(n6_temp %in% spots_clusters$barcodes)){
        n6 = NA
      } else {
        n6 = n6_temp
      }
    }
    
    
    return(c(n1,n2,n3,n4,n5,n6))
    
  })
  
  neighbors_table = t(neighbors_table)
  row.names(neighbors_table) = spots_clusters$barcodes
  
  return(neighbors_table)
}

大家还知道不同平台共定位需要考虑的邻居数么？

主脚本

library(stringr)
library(cocor)
library(dplyr)
library(data.table)
library(stats)
library(scalop)
library(tibble)
library(ggrepel)
library(tidyr)
library(scales)
library(matrixStats)

# Run functions first

# co-localization  ---------------------------------------------------------
#### log-transform expression matrices 
m <- readRDS("test.rds")
m <- m[-grep("^MT-|^RPL|^RPS", rownames(m)), ]
if(min(colSums(m)) == 0){m <- m[, colSums(m) != 0]}
 scaling_factor <- 1000000/colSums(m)
 m_CPM <- sweep(m, MARGIN = 2, STATS = scaling_factor, FUN = "*")
 m_loged <- log2(1 + (m_CPM/10))
 # removing genes with zero variance across all cells
 var_filter <- apply(m_loged, 1, var)
 m_proc <- m_loged[var_filter != 0, ]
 # filtering out lowly expressed genes
 exp_genes <- rownames(m_proc)[(rowMeans(m_proc) > 0.4)]
 m_proc <- m_proc[exp_genes, ]
 # output to a list 
 per_sample_mat[[i]] <- m_proc
 names(per_sample_mat)[i] <- sample_ls[[i]]
 rm(path, files, m, m_CPM, m_loged, var_filter, exp_genes, m_proc)

#### co-localiztion
results<-list()
for(i in seq_along(names(all_decon))) {
  decon <- all_decon[[sample_ls[i]]]
  spot_list <- lapply(c(1:500),function(x){ #num. of shuffled matrices
    all_row <- sapply(c(1:dim(decon)[2]),function(c){
      new_row <- sample(c(1:length(row.names(decon))), length(row.names(decon)), replace = FALSE)
      return(new_row) 
    })
    new_mat <- sapply(c(1:dim(decon)[2]),function(c){
      sh_row <- decon[,c][all_row[,c]]#generate many deconv matrices by shuffling cell types
      return(sh_row)
    })
    colnames(new_mat) <- colnames(decon)
    return(new_mat)
  }) 
  
  #run colocal_mat_fun -- perform colocalization on the shuffled decon matrices
  mats_list<- lapply(spot_list, function(new_decon){
    colocal_mats<-colocal_mat(new_decon)
    return(colocal_mats)
  })
  mean_mat <- sapply(mats_list,function(x){ #expected colocalization calculated from shuffled decon matrices
    return(x[,"ab_comb"])
  })
  pairs_shuf_mean <- apply(mean_mat,1,mean)
  names(pairs_shuf_mean) <- mats_list[[1]][,"pairs"]
  rownames(mean_mat) <- mats_list[[1]][,"pairs"]
  
  obs_coloc <- colocal_mat(decon)
  
  r1 = obs_coloc$ab_comb #observed colocal
  r2 = pairs_shuf_mean #expected colocal
  
  n = dim(decon)[1] #num total spots
  
  fisher = ((0.5*log((1+r1)/(1-r1)))-(0.5*log((1+r2)/(1-r2))))/((1/(n-3))+(1/(n-3)))^0.5
  
  p.value = (2*(1-pnorm(abs(as.matrix(fisher)))))
  effect_size <- r1/r2 #observed/expected
  results[[i]]<-data.frame(pvalue = p.value,
                           effect_size = effect_size)
  
}
names(results) <- sample_ls

results2<-lapply(results, dplyr::add_rownames, 'pairs')
results2<- lapply(results2,function(x){
  as.matrix(x)
})
results2 <- lapply(results2, function(x){rownames(x) <- x[,1]; x})
df <- do.call("rbind", results2)

###volcano plot (all pairs, all samples)###
df->results_comb2
results_comb2<-as.data.frame(results_comb2)
results_comb2$pvalue<-as.numeric(results_comb2$pvalue)
results_comb2$effect_size<-as.numeric(results_comb2$effect_size)
results_comb2$sig <- "NO"
results_comb2$sig[results_comb2$effect_size >= 1.3 & results_comb2$pvalue <= 0.01] <- "enriched"
results_comb2$sig[results_comb2$effect_size <= 0.7 & results_comb2$pvalue <= 0.01] <- "depleted"
results_comb2$label <- NA
results_comb2$label[results_comb2$sig != "NO"] <- results_comb2$pairs[results_comb2$sig != "NO"]

ggplot(data=results_comb2, aes(x=effect_size, y=-log10(pvalue), col=sig, label=label)) + 
  geom_point() + 
  theme_minimal() +
  geom_text_repel()+
  theme(text = element_text(size = 20))

### Adjacency  --------------------------------------------------------------

mp_assign <- readRDS("test.rds", sep = "")

all_zones <- readRDS("final_zones.rds")

extend_metadata <- tibble()
generate_metadata <- sapply(c(1:length(sample_ls)), function(i){
  print(sample_ls[i])
  
  # load data
  spots_positions <- read.csv("/test/outs/spatial/tissue_positions_list.csv", header = FALSE, stringsAsFactors = FALSE)
 
row.names(spots_positions) <- spots_positions$V1
spots_clusters <- readRDS("test.rds", sep = "")
spots_clusters <- na.omit(spots_clusters)
colnames(spots_clusters) <- c("barcodes", "spot_type")
row.names(spots_clusters)<- spots_clusters$barcodes  
  
  
metadata <- tibble(Key = paste(sample_ls[i],spots_clusters$barcodes, sep="_"),
                     SpotID = spots_clusters$barcodes,
                     Sample = rep(sample_ls[i], nrow(spots_clusters)),
                     MPid = spots_clusters$spot_type,
                     array_row = spots_positions[spots_clusters$barcodes,"V3"],
                     array_col = spots_positions[spots_clusters$barcodes, "V4"],
                     pxl_in_rows = spots_positions[spots_clusters$barcodes, "V5"],
                     pxl_in_cols = spots_positions[spots_clusters$barcodes, "V6"],
                     Zone = as.character(all_zones[[i]][spots_clusters$barcodes]))
  
  extend_metadata <<- rbind.data.frame(extend_metadata, metadata)
})

set.seed(50)
neighbs_stats <- neighbor_spot_props(metadata = extend_metadata, 
                                     zone = "All", 
                                     #site = "All", 
                                     samples = "All",
                                     #zone_by = "EpiStroma", 
                                     n_cores = 30, 
                                     plot_perm_distr = TRUE, 
                                     n_perm = 10000, 
                                     filter_signif = TRUE, 
                                     zscore_thresh = 1)




# regional comp  (Previously run by server. Possible to run per sample below)----------------------------------------------------------

#!/usr/bin/env Rscript
#args = commandArgs(trailingOnly = TRUE)

#sname <- as.character(args[1])
#sname <- str_replace(sname, "\r", "")
#print(paste("I got the samp right", sname))

sname <- "" #insert sample name 
gen_clusters <- as.character(unique(unlist(sapply(c(1:26), function(i){
mp_assign <- readRDS('test.rds')
return(unique(mp_assign$spot_type_meta_new))
}))))

max_win_size <- 15
pairs <- combn(sort(gen_clusters),2)
pairs_names <- apply(pairs, 2, function(x){return(paste(x[1],x[2], sep = " "))})

# load data
spots_positions <- read.csv("/test/outs/spatial/tissue_positions_list.csv", header = FALSE, stringsAsFactors = FALSE)
row.names(spots_positions) <- spots_positions$V1

spots_clusters <- readRDS('test.rds')
spots_clusters <- na.omit(spots_clusters)
colnames(spots_clusters) <- c("barcodes", "spot_type")
row.names(spots_clusters)<- spots_clusters$barcodes  

neighbors_table <- prox_neighbors_table_func(spots_positions,spots_clusters)

all_spots <- spots_clusters$barcodes

all_pval_windows <- sapply(c(1:max_win_size), function(win_size){
  proximity <- t(sapply(all_spots, function(spot){
    win_spots <- c(spot)
    sapply(c(1:win_size), function(i){
      win_spots <<- unique(c(win_spots,unique(na.omit(as.character(neighbors_table[win_spots,])))))
    })
    win_abund <- table(spots_clusters$spot_type[spots_clusters$barcodes %in% win_spots])/sum(table(spots_clusters$spot_type[spots_clusters$barcodes %in% win_spots]))
    return(win_abund)
  }))
  
  old_clusters <- colnames(proximity)
  add_clusters <- gen_clusters[!gen_clusters %in% old_clusters]
  
  if (length(add_clusters) != 0){
    sapply(add_clusters,function(clust){
      proximity <<- cbind(proximity,rep(0,dim(proximity)[1]))
    })
    colnames(proximity)[c((length(old_clusters)+1):dim(proximity)[2])] <- add_clusters
  }
  
  proximity <- proximity[,sort(colnames(proximity))]
  
  is_one <- apply(proximity,1,function(ro){
    return(1 %in% ro )
  })
  
  proximity <- proximity[!is_one,]
  
  all_cor <- sapply(c(1:dim(pairs)[2]), function(j){
    pair_cor <- cor(proximity[,pairs[1,j]], proximity[,pairs[2,j]])
    return(pair_cor)
  })
  
  all_cor <- data.frame(pair_cors = all_cor)
  row.names(all_cor) <- pairs_names
  return(all_cor)
  
})
sample_proximity <- as.data.frame(all_pval_windows)
row.names(sample_proximity) <- pairs_names



# regional comp random (Previously run by server. Possible to run per sample below) ---------------------------------------------------

#!/usr/bin/env Rscript
#args = commandArgs(trailingOnly = TRUE)

#sname <- as.character(args[1])
#sname <- str_replace(sname, "\r", "")

sname <- "" #insert sample name 
gen_clusters <- as.character(unique(unlist(sapply(c(1:26), function(i){
 mp_assign <- readRDS('test.rds')
 return(unique(mp_assign$spot_type_meta_new))
}))))

max_win_size <- 15
pairs <- combn(sort(gen_clusters),2)
pairs_names <- apply(pairs, 2, function(x){return(paste(x[1],x[2], sep = " "))})
rand_num <- 500

# load data
spots_positions_orign <- read.csv("/test/outs/spatial/tissue_positions_list.csv", header = FALSE, stringsAsFactors = FALSE)
row.names(spots_positions_orign) <- spots_positions_orign$V1

spots_clusters <- readRDS("test.rds", sep = "")
spots_clusters <- na.omit(spots_clusters)
colnames(spots_clusters) <- c("barcodes", "spot_type")
row.names(spots_clusters)<- spots_clusters$barcodes  

all_rand <- lapply(c(1:rand_num),function(j){
  new_pos_all <- sample(spots_positions_orign$V1[spots_positions_orign$V2 != 0], length(spots_positions_orign$V1[spots_positions_orign$V2 != 0]), replace = FALSE)
  spots_positions <- spots_positions_orign
  spots_positions$V1[spots_positions$V2 != 0] <- new_pos_all
  
  neighbors_table <- prox_neighbors_table_func(spots_positions,spots_clusters)
  
  all_spots <- spots_clusters$barcodes
  
  
  all_pval_windows <- sapply(c(1:max_win_size), function(win_size){
    proximity <- t(sapply(all_spots, function(spot){
      win_spots <- c(spot)
      sapply(c(1:win_size), function(i){
        win_spots <<- unique(c(win_spots,unique(na.omit(as.character(neighbors_table[win_spots,])))))
      })
      win_abund <- table(spots_clusters$spot_type[spots_clusters$barcodes %in% win_spots])/sum(table(spots_clusters$spot_type[spots_clusters$barcodes %in% win_spots]))
      return(win_abund)
    }))
    
    old_clusters <- colnames(proximity)
    add_clusters <- gen_clusters[!gen_clusters %in% old_clusters]
    
    if (length(add_clusters) != 0){
      sapply(add_clusters,function(clust){
        proximity <<- cbind(proximity,rep(0,dim(proximity)[1]))
      })
      colnames(proximity)[c((length(old_clusters)+1):dim(proximity)[2])] <- add_clusters
    }
    
    proximity <- proximity[,sort(colnames(proximity))]
    
    is_one <- apply(proximity,1,function(ro){
      return(1 %in% ro )
    })
    
    proximity <- proximity[!is_one,]
    
    all_cor <- sapply(c(1:dim(pairs)[2]), function(j){
      pair_cor <- cor(proximity[,pairs[1,j]], proximity[,pairs[2,j]])
      return(pair_cor)
    })
    
    all_cor <- data.frame(pair_cors = all_cor)
    row.names(all_cor) <- pairs_names
    return(all_cor)
    
  })
  sample_proximity <- as.data.frame(all_pval_windows)
  row.names(sample_proximity) <- pairs_names
  return(sample_proximity)
})
sample_mean_rand_prox <- Reduce("+", all_rand) / length(all_rand)

sample_sd_rand_prox <- round(apply(array(unlist(all_rand), c(length(pairs_names), max_win_size, rand_num)), c(1,2), sd),4)



# regional comp downstream ------------------------------------------------

gen_clusters <- as.character(unique(unlist(sapply(c(1:26), function(i){
  mp_assign <- readRDS('test.rds')
  return(unique(mp_assign$spot_type_meta_new))
}))))

pairs <- combn(sort(gen_clusters),2)
pairs_names <- apply(pairs, 2, function(x){return(paste(x[1],x[2], sep = " "))})
all_zones <- readRDS("Spatial_coh_zones/spatial_zonesv3.rds")

all_proximity_list <- list.files("Spatial_coh_zones/proximity_samples/")

all_proximity <- lapply(all_proximity_list, function(prox){
  samp_prox <- readRDS('test.rds')
  return(samp_prox)
})
names(all_proximity) <- sapply(str_split(all_proximity_list, "_"), function(x){return(x[1])})

all_proximity_rand_list <- list.files("Spatial_coh_zones/proximity_rand_samples/")

all_proximity_rand <- lapply(all_proximity_rand_list, function(prox){
  samp_prox <- readRDS('test.rds')
  return(samp_prox)
})
names(all_proximity_rand) <- sapply(str_split(all_proximity_rand_list, "_"), function(x){return(x[1])})


combined_proximity <- t(sapply(c(1:length(pairs_names)), function(i){
  pair_prox <- sapply(all_proximity, function(x){
    pair_sample_df <- as.data.frame(x[i,c(1:15)])
    colnames(pair_sample_df) <- c(as.character(c(1:15)))
    return(pair_sample_df)
  })
  return(apply(pair_prox,1,function(x){mean(na.omit(as.numeric(x)))}))
}))

row.names(combined_proximity) <- row.names(all_proximity[[1]])



Heatmap(na.omit(combined_proximity), cluster_columns = FALSE, column_title = "Metaprograms Proximity", 
        row_names_gp = grid::gpar(fontsize = 5), name = "proximity", show_row_names = T, show_row_dend = F)
# regional comp downstream significant ---------------------------------------------------

spots_numv1 <-sapply(samp_list, function(smp){
  samp_df <- readRDS('test.rds')
  return(nrow(samp_df))
})
names(spots_numv1) <- sapply(str_split(names(spots_numv1), "\\."), function(x){return(x[1])})

spots_num <-sapply(samp_list, function(smp){
samp_df <- readRDS('test.rds')
pairs_n <- sapply(c(1:ncol(pairs)),function(p){
p_table <- samp_df[samp_df$spot_type_meta_new %in% c(pairs[,p]),]
return(nrow(p_table))
  })
  return(pairs_n)
})
colnames(spots_num) <- sapply(str_split(colnames(spots_num), "\\."), function(x){return(x[1])})



proximity_bin <- lapply(c(1:26),function(i){
  r1 = all_proximity[[i]]
  r2 = all_proximity_rand[[i]]
  
  n_df = as.data.frame(spots_num[,names(all_proximity)[i]])
  n_df <- cbind(n_df, rep(n_df,14))
  
  fisher = ((0.5*log((1+r1)/(1-r1)))-(0.5*log((1+r2)/(1-r2))))/((1/(n_df-3))+(1/(n_df-3)))^0.5
  
  p.value = (2*(1-pnorm(abs(as.matrix(fisher)))))
  colnames(p.value) <- as.character(1:15)
  bin_pval <- ifelse(p.value < 0.0000000001,1,NA)
  #  bin_pval <- ifelse(p.value < 0.00001,1,0)
  rbin <- r1*bin_pval
  return(rbin)
})
names(proximity_bin) <- names(all_proximity)