1. 演示数据和包的加载
# if (!requireNamespace("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
#
# BiocManager::install("scater")
# BiocManager::install("scRNAseq")
library(scater)
library(scRNAseq)
# 创建SingleCellExperiment对象
example_sce <- ZeiselBrainData()
直接创建SingleCellExperiment对象
SingleCellExperiment(
...,
reducedDims = list(),
altExps = list(),
rowPairs = list(),
colPairs = list(),
mainExpName = NULL
)
2. 质控作图,细胞与基因的筛选
## Diagnostic plots for quality control
example_sce <- addPerCellQC(example_sce,
subsets=list(Mito=grep("mt-", rownames(example_sce))))
plotColData(example_sce, x = "sum", y="detected", colour_by="tissue")
plotColData(example_sce, x = "sum", y="subsets_Mito_percent",
other_fields="tissue") + facet_wrap(~tissue)
plotHighestExprs(example_sce, exprs_values = "counts")
## remove damaged cells and poorly sequenced libraries.自己制定筛选条件
# 查看是否有spike-ins
rownames(example_sce)[grep("^ERCC-", rownames(example_sce))]
## 选择总counts数大于5,000,表达的基因数大于500的细胞。
keep.total <- example_sce$sum > 5e3
#table(keep.total)
keep.n <- example_sce$`total mRNA mol` > 500
# table(keep.n)
# 根据设定的条件进行过滤
example_sce <- example_sce[,keep.total & keep.n]
#dim(example_sce)
## 选择至少在三个细胞中表达的基因
keep_feature <- nexprs(example_sce, byrow=TRUE) >= 3
# table(keep_feature)
example_sce <- example_sce[keep_feature,]
# log转化并归一化
example_sce <- logNormCounts(example_sce)
# plotExplanatoryVariables(example_sce)
vars <- getVarianceExplained(example_sce,
variables=c("tissue", "total mRNA mol", "sex", "age"))
head(vars)
class(vars)
plotExplanatoryVariables(vars)
3. 基因表达值可视化作图
## Visualizing expression values
colnames(example_sce@colData)
## 不同分组中一个或多个基因的表达量
plotExpression(example_sce, rownames(example_sce)[1:3])
# 分组统计
plotExpression(example_sce, rownames(example_sce)[1:3],
x = "level1class")
# 加颜色
plotExpression(example_sce, rownames(example_sce)[1:3],
x = "level1class", colour_by="tissue")
## 基因表达量之间的相关性
plotExpression(example_sce, rownames(example_sce)[1:3],
x = rownames(example_sce)[10])
4. 降维并作图
## Dimensionality reduction
# calculatePCA(
# x,
# ncomponents = 50,
# ntop = 500,
# subset_row = NULL,
# scale = FALSE,
# transposed = FALSE,
# BSPARAM = bsparam(),
# BPPARAM = SerialParam()
# )
example_sce <- runPCA(example_sce)
# uses the top 500 genes with the highest variances to compute the first PCs
str(reducedDim(example_sce, "PCA"))
plotReducedDim(example_sce, dimred = "PCA",
colour_by = "level1class")
plotPCA(example_sce,colour_by = "level1class")
plotPCA(example_sce, colour_by="Mog")
# PCA1-4 两两之间作图
plotPCA(example_sce, ncomponents = 4,
colour_by = "level1class")
# 指定基因,指定名称,指定纬度数据
example_sce <- runPCA(example_sce, name="PCA2",
subset_row=rownames(example_sce)[1:1000],
ncomponents=25)
str(reducedDim(example_sce, "PCA2"))
set.seed(1000)
example_sce <- runTSNE(example_sce, perplexity=10) # 默认ncomponents = 2
example_sce <- runTSNE(example_sce)
#numeric; Perplexity parameter (should not be bigger than
# 3 * perplexity < nrow(X) - 1
head(reducedDim(example_sce, "TSNE"))
#dev.new()
# 颜色显示分组
# plotTSNE(example_sce, colour_by = "level1class")
plotReducedDim(example_sce, dimred = "TSNE",
colour_by = "level1class")
# 按基因表达量显示颜色
plotTSNE(example_sce, colour_by = "Snap25")
# "Snap25" %in% rownames(example_sce)
5. ggcells,ggfeatures作图
ggcells,ggfeatures函数将智能地从colData()、assays()、altExps()或reducedDims()中检索字段,以创建单个data.frame供立即使用。
# These functions generate a data.frame from the contents of
# a SingleCellExperiment and pass it to ggplot.
ggcells(example_sce, mapping=aes(x=level1class, y=Snap25)) +
geom_boxplot() +
facet_wrap(~tissue)
# 显示Snap25表达量并按组织来源分别显示的TSNE图。
ggcells(example_sce, mapping=aes(x=TSNE.1, y=TSNE.2, colour=Snap25)) +
geom_point() +
stat_density_2d() +
facet_wrap(~tissue) +
scale_colour_distiller(direction=1)
#examine the relationship between the size factors
# on the normalized gene expression
ggcells(example_sce, mapping=aes(x=sizeFactor, y=Actb)) +
geom_point() +
geom_smooth()
# 使列名(cell barcodes)有效
colnames(example_sce) <- make.names(colnames(example_sce))
make.names(".2way") # 输出[1] "X.2way"
ggfeatures(example_sce, mapping=aes(x=featureType, y=X1772062111_E06)) +
geom_violin()
参考
http://www.bioconductor.org/packages/release/bioc/vignettes/scater/inst/doc/overview.html