BatchQC优势:网页方式对数据操作并进行可视化展示,从而确定是否有批次效应以及去除后的数分布等。初步分析包括数据可视化、层次聚类、主成分分析(PCA)和显著性检验等。
1. 加载包
# 安装
if (!require("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("BatchQC")
# 加载
require(MCMCpack)
require(sva)
require(BatchQC)
# 设定工作目录
setwd("/test")
2. 数据载入
### 生成模拟数据
nbatch <- 3
ncond <- 2
npercond <- 10
data.matrix <- rnaseq_sim(ngenes=50, nbatch=nbatch, ncond=ncond,
npercond=npercond, basemean=10000,
ggstep=50, bbstep=2000, ccstep=800,
basedisp=100, bdispstep=-10, swvar=1000, seed=1234)
batch <- rep(1:nbatch, each=ncond*npercond)
condition <- rep(rep(1:ncond, each=npercond), nbatch)
modmatrix = model.matrix(~as.factor(condition), data=pdata)
modmatrix.null = model.matrix(~1,data=pdata)
## 模拟数据只有批次效应,condition下没有差别
data.matrix <- rnaseq_sim(ngenes=50, nbatch=nbatch, ncond=ncond,
npercond=npercond, basemean=5000, ggstep=50,
#bbstep=0, ccstep=2000,
bbstep=2000, ccstep=0,
basedisp=10, bdispstep=-4, swvar=1000, seed=1234)
## 真实数据
data(example_batchqc_data)
batch <- batch_indicator$V1
condition <- batch_indicator$V2
batchQC(signature_data, batch=batch, condition=condition,
report_file="batchqc_signature_data_report.html", report_dir=".",
report_option_binary="111111111",
view_report=FALSE, interactive=TRUE)
3. 可视化分析批次效应
# 交互操作,可以直接得到去除批次效应后的差异表达结果
batchQC(data.matrix, batch=batch, condition=condition,
report_file="batchqc_report.html", report_dir=".",
report_option_binary="111111111",
view_report=FALSE, interactive=TRUE, batchqc_output=TRUE)
## 去除批次效应后的数据
data.matrix_cor <- batchQC_condition_adjusted(data.matrix, batch, condition)
# Produce correlation heatmap plot
batchqc_correlation(data.matrix, batch, mod = NULL)
# Produce Median Correlation plot
batchqc_corscatter(data.matrix, batch, mod = NULL)
# Returns a list of explained variation by batch and condition combinations
batchqc_explained_variation(data.matrix, condition, batch)
# PCA(principal component analysis)降维可视化
pca <- batchqc_pca(data.matrix, batch, mod=modmatrix)
# SVD(Singular value decomposition) 降维可视化
pca_svd <- batchqc_pca_svd(data.matrix, batch, mod=modmatrix)
batchQC_sva(data.matrix, modmatrix)
# Performs test to check whether batch needs to be adjusted
batchtest(pca, batch, mod = NULL)
combat_data.matrix <- combatPlot(data.matrix, batch, mod = NULL, par.prior = TRUE,
prior.plots = TRUE)
# 热图
heatmap=batchqc_heatmap(data.matrix, batch, mod=modmatrix)
# 估算变量数
n.sv=batchQC_num.sv(data.matrix,modmatrix)
# Adjust for batch effects
combatPlot(data.matrix, batch, mod = NULL, par.prior = TRUE,
prior.plots = TRUE)
combat_data.matrix = ComBat(dat=data.matrix, batch=batch, mod=modmatrix)
## (a) Raw data
pValues = f.pvalue(data.matrix,modmatrix,modmatrix.null)
qValues = p.adjust(pValues,method="BH")
### (b) Apply Combat
pValuesComBat=f.pvalue(combat_data.matrix,modmatrix,modmatrix.null)
qValuesComBat = p.adjust(pValuesComBat,method="BH")
### (c) Include Batch
modBatch = model.matrix(~as.factor(condition) + as.factor(batch),data=pdata)
mod0Batch = model.matrix(~as.factor(batch),data=pdata)
pValuesBatch = f.pvalue(data.matrix,modBatch,mod0Batch)
qValuesBatch = p.adjust(pValuesBatch,method="BH")