拷贝数变异CNVs模拟数据生成及绘制CNV图
R包安装
if (!require("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("karyoploteR")
BiocManager::install("BSgenome.Hsapiens.UCSC.hg19")
生成模拟CNV数据
library(karyoploteR)
set.seed(1234)
# simulate CNV data generation
cnvs.data <- data.frame()
for(nsample in 1:10) {
# 模拟CNVs数据
data <- toDataframe(createRandomRegions(nregions=10, length.mean = 20e6,
length.sd=5e6, mask=NA, non.overlapping = TRUE))
# 整理为chrom:start-end格式
intervals <- paste0(data [,1], ":", data [,2], "-", data [,3])
# 合并
cnvs.data <- rbind(cnvs.data, data.frame(Sample=paste0("Sample", nsample),
Type=rep(c("DEL", "DUP"), 5),
Region=intervals, stringsAsFactors=FALSE))
}
head(cnvs.data)
# Sample Type Region
# 1 Sample1 DEL chr5:138146110-152110780
# 2 Sample1 DUP chr5:91067585-112454730
# 3 Sample1 DEL chr3:17996869-43419073
# 4 Sample1 DUP chr1:129050019-137321529
# 5 Sample1 DEL chr4:12466095-34611717
# 6 Sample1 DUP chr18:22535664-45065942
绘制CNVs在各样本分布情况
基于karyoploteR包进行CNVs的绘图,红色表示DUP区域,绿色表示DEL区域。
# 样本名称
samples <- as.character(unique(cnvs.data$Sample))
# 样本数量
num.samples <- length(samples)
kp <- plotKaryotype(plot.type = 4, ideogram.plotter = NULL, labels.plotter = NULL)
kpAddChromosomeNames(kp, srt=45)
kpAddCytobandsAsLine(kp)
for(nsample in seq_len(num.samples)) {
s <- samples[nsample]
# 提取样本数据
sample.regions <- cnvs.data[cnvs.data$Sample==s,]
regs <- data.frame(do.call(rbind, strsplit(x = sample.regions$Region,
split = c(":|-"))), stringsAsFactors=FALSE)
regs[,2] <- as.numeric(regs[,2])
regs[,3] <- as.numeric(regs[,3])
regs <- toGRanges(regs)
# 绘图
# 图Y轴"track"
r0 <- (nsample-1)/num.samples
# 0.05为样本之间的间隔
r1 <- (nsample)/num.samples - 0.05
# 添加label
kpAddLabels(kp, r0=r0, r1=r1, labels = s)
kpAbline(kp, h=0.5, r0=r0, r1=r1, col="#888888")
# 绘制DUP和DEL区域CNVs
kpSegments(kp, data=regs[sample.regions$Type=="DUP"], y0 = 0.5, y1=0.5, r0=r0, r1=r1, col="red", lwd=5)
kpSegments(kp, data=regs[sample.regions$Type=="DEL"], y0 = 0.5, y1=0.5, r0=r0, r1=r1, col="green", lwd=5)
}