slingshot 代码篇

Slingshot: Trajectory Inference for Single-Cell Data (bioconductor.org)

关注微信:生信小博士



#request 2
.libPaths(c( "/home/data/t040413/R/x86_64-pc-linux-gnu-library/4.2","/home/data/t040413/R/yll/usr/local/lib/R/site-library",  "/home/data/refdir/Rlib/", "/usr/local/lib/R/library"))
library(slingshot)
#BiocManager::install("BUSpaRse")
library(BUSpaRse)
library(tidyverse)
library(tidymodels)
library(Seurat)
library(scales)
library(viridis)
#library(Matrix)
library(tradeSeq)

load("/home/data/t040413/ipf/gse135893_20_PF_10_control_scRNAseq/fibroblast_sct_all_theway.rds")
getwd()

dir.create("~/silicosis/fibroblast_myofibroblast/slingshot")
setwd("~/silicosis/fibroblast_myofibroblast/slingshot")
getwd()

dir.create("~/silicosis/fibroblast_myofibroblast/slingshot/rna",recursive = T)
setwd("~/silicosis/fibroblast_myofibroblast/slingshot/rna")
getwd()


Idents(fibroblast)=fibroblast$SCT_snn_res.0.8

fibroblast=RenameIdents(fibroblast,"3"="Universal fib",
             "6"="Universal fib")
fibroblast$cell.type=Idents(fibroblast)

subset_rnaslot=GetAssay(fibroblast,assay = "RNA")
subset_rnaslot=AddMetaData(fibroblast,metadata = fibroblast@meta.data)
head(subset_rnaslot@meta.data)
table(subset_rnaslot$SCT_snn_res.0.8)
DimPlot(subset_rnaslot,group.by ="SCT_snn_res.0.8",label = TRUE )

#https://bustools.github.io/BUS_notebooks_R/slingshot.html
library(slingshot)  #https://bioconductor.org/packages/release/bioc/vignettes/slingshot/inst/doc/vignette.html
library(SingleCellExperiment)
library(RColorBrewer)


subset_rnaslot.sce=as.SingleCellExperiment(subset_rnaslot)

DotPlot(subset_rnaslot,features = c("CD34","GPX3","INMT","PI16"))

sds=slingshot(subset_rnaslot.sce,clusterLabels ="cell.type",reducedDim = 'UMAP' ,
              start.clus ="Universal fib")
seu=subset_rnaslot
#sds <- slingshot(Embeddings(seu, "umap"), clusterLabels = seu$seurat_clusters, start.clus = 4, stretch = 0)

sds
summary(sds$slingPseudotime_1)


cell_pal <- function(cell_vars, pal_fun,...) {
  if (is.numeric(cell_vars)) {
    pal <- pal_fun(100, ...)
    return(pal[cut(cell_vars, breaks = 100)])
  } else {
    categories <- sort(unique(cell_vars))
    pal <- setNames(pal_fun(length(categories), ...), categories)
    return(pal[cell_vars])
  }
}
cell_colors <- cell_pal(subset_rnaslot$cell.type, hue_pal())#, brewer_pal("qual", "Set2")
cell_colors_clust <- cell_pal(subset_rnaslot$cell.type, hue_pal())

head(reducedDims(sds)$UMAP)
head(SlingshotDataSet(sds))

1
plot(reducedDims(sds)$UMAP,  pch = 16, cex = 0.5) #col =brewer.pal("qual", "Set2")[sds$cell_type] ,
lines(SlingshotDataSet(sds), lwd = 2, type = 'lineages', col = 'red')

plot(reducedDims(sds)$UMAP,  pch = 16, cex = 0.5, col =cell_colors_clust )
lines(SlingshotDataSet(sds), lwd = 2, type = 'lineages', col = 'red')


2
plot(reducedDims(sds)$UMAP,pch = 16, cex = 0.5, col =cell_colors_clust )
lines(SlingshotDataSet(sds), lwd = 2, col = 'red')


3
pt <- slingPseudotime(sds)
head(pt)
nc <- 3

nms <- colnames(pt)
nr <- ceiling(length(nms)/nc)
nr
pal <- viridis(100, end = 0.95)
c(nr, nc)

par(mfrow = c(nr, nc))

pdf("/home/data/t040413/silicosis/fibroblast_myofibroblast/slingshot/rna/lineages_2.pdf")
for (i in nms) {
  colors <- pal[cut(pt[,i], breaks = 100)]
  
  
  plot(reducedDims(sds)$UMAP, col = colors, pch = 16, cex = 0.5, main = i)
  lines(SlingshotDataSet(sds), lwd = 2, col = 'red', type = 'lineages')
  
  
}
dev.off()

##########=============或者画一张大图
pdf("/home/data/t040413/silicosis/fibroblast_myofibroblast/slingshot/rna/lineages_2.pdf",width = 20,height = 20)
par(mfrow = c(nr, nc))
for (i in nms) {
  colors <- pal[cut(pt[,i], breaks = 100)]
  
  
  plot(reducedDims(sds)$UMAP, col = colors, pch = 16, cex = 0.5, main = i)
  lines(SlingshotDataSet(sds), lwd = 2, col = 'red', type = 'lineages')
  
  
}
dev.off()



4
#差异分析
# Get top highly variable genes
4.1
top_hvg <- HVFInfo(seu) %>% 
  mutate(., bc = rownames(.)) %>% 
  arrange(desc(residual_variance)) %>% 
  top_n(300, residual_variance) %>% 
  pull(bc)
head(top_hvg)
# Prepare data for random forest
dat_use <- t(GetAssayData(seu, slot = "data")[top_hvg,])
head(dat_use)[,1:10]
head(slingPseudotime(sds)[,])
dat_use_df <- cbind(slingPseudotime(sds)[,2], dat_use) # Do curve 2, so 2nd columnn
head(dat_use_df)[,1:10]
colnames(dat_use_df)[1] <- "pseudotime"
dat_use_df <- as.data.frame(dat_use_df[!is.na(dat_use_df[,1]),])
head(dat_use_df)[,1:10]

4.2
#The subset of data is randomly split into training and validation; the model fitted on the training set will be evaluated on the validation set.

dat_split <- initial_split(dat_use_df)
dat_split
dat_train <- training(dat_split)
dat_val <- testing(dat_split)
head(dat_val)[,1:10]
dim(dat_train)
dim(dat_val)

4.3
#随机森林模型
#tidymodels is a unified interface to different machine learning models, a “tidier” version of caret. The code chunk below can easily be adjusted to use other random forest packages as the back end, so no need to learn new syntax for those packages.

model <- rand_forest(mtry = 200, trees = 1400, min_n = 15, mode = "regression") %>%
  set_engine("ranger", importance = "impurity", num.threads = 3) %>%
  fit(pseudotime ~ ., data = dat_train)

4.3-1
#模型评价
#The model is evaluated on the validation set with 3 metrics: room mean squared error (RMSE), coefficient of determination using correlation (rsq, between 0 and 1), and mean absolute error (MAE).

val_results <- dat_val %>% 
  mutate(estimate = predict(model, .[,-1]) %>% pull()) %>% 
  select(truth = pseudotime, estimate)
metrics(data = val_results, truth, estimate)


4.3-2
#RMSE and MAE should have the same unit as the data. As pseudotime values here usually have values much larger than 2, the error isn’t too bad. Correlation (rsq) between slingshot’s pseudotime and random forest’s prediction is very high, also showing good prediction from the top 300 highly variable genes.

summary(dat_use_df$pseudotime)


4.4
#画特定基因
#Now it’s time to plot some genes deemed the most important to predicting pseudotime:
plot(reducedDims(sds)$UMAP, col = colors, 
     pch = 16, cex = 0.5, main = top_gene_name[i])
lines(SlingshotDataSet(sds), lwd = 2, col = 'red', type = 'lineages')


var_imp <- sort(model$fit$variable.importance, decreasing = TRUE)
top_genes <- names(var_imp)[1:6]
top_genes
# Convert to gene symbol
top_gene_name <- top_genes#gns$gene_name[match(top_genes, gns$gene)]

par(mfrow = c(3, 2))
for (i in seq_along(top_genes)) {
  colors <- pal[cut(dat_use[,top_genes[i]], breaks = 100)]
  
  plot(reducedDim(sds), col = colors, 
       pch = 16, cex = 0.5, main = top_gene_name[i])
  lines(SlingshotDataSet(sds), lwd = 2, col = 'red', type = 'lineages')
}
dev.off()

pdf("./gene_pseudotime.pdf",width = 12,height = 15)
par(mfrow = c(3, 2))
for (i in seq_along(top_genes)) {
  colors <- pal[cut(dat_use[,top_genes[i]], breaks = 100)]
  
  plot(reducedDims(sds)$UMAP, col = colors, 
       pch = 16, cex = 0.5, main = top_gene_name[i])
  lines(SlingshotDataSet(sds), lwd = 2, col = 'red', type = 'lineages')
}
dev.off()

pdf("./selected_gene_pseudotime.pdf",width = 12,height = 15)
par(mfrow = c(3, 2))
top_gene_name=c("INMT","GPX3","CD34","PI16","SERBP1")
for (i in seq_along(top_genes)) {
  colors <- pal[cut(dat_use[,top_genes[i]], breaks = 100)]
  
  plot(reducedDims(sds)$UMAP, col = colors, 
       pch = 16, cex = 0.5, main = top_gene_name[i])
  lines(SlingshotDataSet(sds), lwd = 2, col = 'red', type = 'lineages')
}
dev.off()

librar

files_tosave= paste(ls(),sep = ",",collapse = ",")
files_tosave
save(files_tosave,file = "/home/data/t040413/silicosis/fibroblast_myofibroblast/slingshot/slingshot.Rdata")


5
#Identifying temporally dynamic genes
#After running slingshot, we are often interested in finding genes that change their expression over the course of development. We will demonstrate this type of analysis using the tradeSeq package (Van den Berge et al. 2020).
#For each gene, we will fit a general additive model (GAM) using a negative binomial noise distribution to model the (potentially nonlinear) relationshipships between gene expression and pseudotime. We will then test for significant associations between expression and pseudotime using the associationTest.
.libPaths()
BiocManager::install("tradeSeq")
library(tradeSeq)

# fit negative binomial GAM
sds <- fitGAM(sds)

# test for dynamic expression
ATres <- associationTest(sce)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

生信小博士

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值