1. 导入数据
# if (!require("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
#
# BiocManager::install("ChAMP")
library(ChAMP)
### 导入数据
# 利用minifi包的idat数据
baseDir <- system.file("extdata", package = "minfiData")
list.files(baseDir)
list.files(file.path(baseDir, "5723646052"))
data <- champ.load(directory = baseDir,arraytype="450K")
#data$beta <- data$beta+0.00001
class(data) #"list"
names(data) #"beta" "intensity" "pd"
dim(data$beta)
table(is.na(data$beta)) # 查看是否有缺失数据
data$beta[1:2,1:3]
dim(data$intensity)
dim(data$pd)
2.数据质控和标准化
### 数据质控和标准化
# 为CpG岛信息交互式绘图
CpG.GUI(CpG=rownames(data$beta),arraytype="450K")
setwd("/Users/zhengxueming/test")
champ.QC(beta = data$beta,
pheno=data$pd$Sample_Group,
mdsPlot=TRUE,
densityPlot=TRUE,
dendrogram=TRUE,
PDFplot=TRUE,
Rplot=TRUE,
Feature.sel="None",
resultsDir="./CHAMP_QCimages/")
QC.GUI(beta=data$beta,
pheno=data$pd$Sample_Group,
arraytype="450K")
data_norm <- champ.norm(beta=data$beta,
rgSet=myLoad$rgSet,
mset=myLoad$mset,
resultsDir="./CHAMP_Normalization/",
method="BMIQ",
plotBMIQ=FALSE,
arraytype="450K",
cores=3)
champ.SVD(beta = data_norm,
rgSet=NULL,
pd=data$pd,
RGEffect=FALSE,
PDFplot=TRUE,
Rplot=TRUE,
resultsDir="./CHAMP_SVDimages/")
#Combat校正批次效应
data_Combat <- champ.runCombat(beta=data_norm,pd=data$pd,batchname=c("Slide"))
# 查看数据处理结果
data_Combat[1:3,1:5]
data_norm[1:3,1:5]
data$beta[1:3,1:5]
3. 差异分析
### 差异分析
## 差异甲基化位点分析
dmp_list <- champ.DMP(beta = data_Combat,
pheno = data$pd$Sample_Group,
compare.group = NULL,
adjPVal = 0.01,
adjust.method = "BH",
arraytype = "450K")
class(dmp_list) #"list"
dmp_df <- dmp_list$GroupA_to_GroupB
head(dmp_df) # 含有丰富的位点注释信息
dim(dmp_df)
##差异甲基化区域
# 比较费时
myDMR <- champ.DMR(beta=data_Combat,
pheno=data$pd$Sample_Group,
compare.group=c("GroupA","GroupB"),# 指定谁和谁比较
arraytype="450K",
method = "Bumphunter",
minProbes=7,
adjPvalDmr=0.05,
cores=3)
class(myDMR) #"list"
head(myDMR$BumphunterDMR)
dim(myDMR$BumphunterDMR)
##识别甲基化差异block
myBlock<-champ.Block(beta=data_Combat,
pheno=data$pd$Sample_Group,
arraytype="450K",
maxClusterGap=250000,
B=500,
bpSpan=250000,
minNum=10,
cores=3)
Block.GUI(Block=myBlock,
beta=data_Combat,
pheno=data$pd$Sample_Group,
runDMP=TRUE,
compare.group=c("GroupA","GroupB"),
arraytype="450K")
class(myBlock) #"list"
names(myBlock) # "Block" "clusterInfo" "allCLID.v" "avbetaCL.m" "posCL.m"
head(myBlock$Block)
dim(myBlock$Block)
head(myBlock$clusterInfo)
dim(myBlock$clusterInfo)
head(myBlock$allCLID.v)
head(myBlock$avbetaCL.m)
head(myBlock$posCL.m)