scater分析单细胞转录组数据代码

1. 演示数据和包的加载

# if (!requireNamespace("BiocManager", quietly = TRUE))
#   install.packages("BiocManager")
# 
# BiocManager::install("scater")
# BiocManager::install("scRNAseq")

library(scater)
library(scRNAseq)
# 创建SingleCellExperiment对象
example_sce <- ZeiselBrainData()

直接创建SingleCellExperiment对象
SingleCellExperiment(
  ...,
  reducedDims = list(),
  altExps = list(),
  rowPairs = list(),
  colPairs = list(),
  mainExpName = NULL
)

2. 质控作图,细胞与基因的筛选

## Diagnostic plots for quality control
example_sce <- addPerCellQC(example_sce, 
                            subsets=list(Mito=grep("mt-", rownames(example_sce))))

plotColData(example_sce, x = "sum", y="detected", colour_by="tissue")

plotColData(example_sce, x = "sum", y="subsets_Mito_percent", 
            other_fields="tissue") + facet_wrap(~tissue)

plotHighestExprs(example_sce, exprs_values = "counts")

## remove damaged cells and poorly sequenced libraries.自己制定筛选条件

# 查看是否有spike-ins
rownames(example_sce)[grep("^ERCC-", rownames(example_sce))]

## 选择总counts数大于5,000,表达的基因数大于500的细胞。
keep.total <- example_sce$sum > 5e3
#table(keep.total)

keep.n <- example_sce$`total mRNA mol` > 500
# table(keep.n)

# 根据设定的条件进行过滤
example_sce <- example_sce[,keep.total & keep.n]
#dim(example_sce)

## 选择至少在三个细胞中表达的基因
keep_feature <- nexprs(example_sce, byrow=TRUE) >= 3
# table(keep_feature)
example_sce <- example_sce[keep_feature,]

# log转化并归一化
example_sce <- logNormCounts(example_sce) 

# plotExplanatoryVariables(example_sce)

vars <- getVarianceExplained(example_sce, 
                             variables=c("tissue", "total mRNA mol", "sex", "age"))
head(vars)
class(vars)
plotExplanatoryVariables(vars)

3. 基因表达值可视化作图

## Visualizing expression values
colnames(example_sce@colData)

## 不同分组中一个或多个基因的表达量
plotExpression(example_sce, rownames(example_sce)[1:3])
# 分组统计
plotExpression(example_sce, rownames(example_sce)[1:3], 
               x = "level1class")
# 加颜色
plotExpression(example_sce, rownames(example_sce)[1:3],
               x = "level1class", colour_by="tissue")

## 基因表达量之间的相关性
plotExpression(example_sce, rownames(example_sce)[1:3],
               x = rownames(example_sce)[10])

4. 降维并作图

## Dimensionality reduction

# calculatePCA(
#   x,
#   ncomponents = 50,
#   ntop = 500,
#   subset_row = NULL,
#   scale = FALSE,
#   transposed = FALSE,
#   BSPARAM = bsparam(),
#   BPPARAM = SerialParam()
# )


example_sce <- runPCA(example_sce)
# uses the top 500 genes with the highest variances to compute the first PCs
str(reducedDim(example_sce, "PCA"))
plotReducedDim(example_sce, dimred = "PCA", 
               colour_by = "level1class")

plotPCA(example_sce,colour_by = "level1class")
plotPCA(example_sce, colour_by="Mog")
# PCA1-4 两两之间作图
plotPCA(example_sce, ncomponents = 4, 
        colour_by = "level1class")

# 指定基因,指定名称,指定纬度数据
example_sce <- runPCA(example_sce, name="PCA2",
                      subset_row=rownames(example_sce)[1:1000],
                      ncomponents=25)
str(reducedDim(example_sce, "PCA2"))

set.seed(1000)
example_sce <- runTSNE(example_sce, perplexity=10) # 默认ncomponents = 2
example_sce <- runTSNE(example_sce)
#numeric; Perplexity parameter (should not be bigger than 
# 3 * perplexity < nrow(X) - 1
head(reducedDim(example_sce, "TSNE"))

#dev.new()

# 颜色显示分组
# plotTSNE(example_sce, colour_by = "level1class")
plotReducedDim(example_sce, dimred = "TSNE", 
               colour_by = "level1class")
# 按基因表达量显示颜色
plotTSNE(example_sce, colour_by = "Snap25")
# "Snap25"  %in% rownames(example_sce)

5. ggcells,ggfeatures作图

ggcells,ggfeatures函数将智能地从colData()、assays()、altExps()或reducedDims()中检索字段,以创建单个data.frame供立即使用。

#  These functions generate a data.frame from the contents of 
# a SingleCellExperiment and pass it to ggplot.

ggcells(example_sce, mapping=aes(x=level1class, y=Snap25)) + 
  geom_boxplot() +
  facet_wrap(~tissue)

# 显示Snap25表达量并按组织来源分别显示的TSNE图。
ggcells(example_sce, mapping=aes(x=TSNE.1, y=TSNE.2, colour=Snap25)) +
  geom_point() +
  stat_density_2d() +
  facet_wrap(~tissue) +
  scale_colour_distiller(direction=1)

#examine the relationship between the size factors 
# on the normalized gene expression
ggcells(example_sce, mapping=aes(x=sizeFactor, y=Actb)) +
  geom_point() +
  geom_smooth()

# 使列名(cell barcodes)有效
colnames(example_sce) <- make.names(colnames(example_sce))
make.names(".2way") #  输出[1] "X.2way"
ggfeatures(example_sce, mapping=aes(x=featureType, y=X1772062111_E06)) + 
  geom_violin()

参考

http://www.bioconductor.org/packages/release/bioc/vignettes/scater/inst/doc/overview.html

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值