Topic 1.克隆进化之sciClone

最近研究了一下克隆进化的流行pipeline,主流的主要有两套如下:

  1. Pyclone+ citup+ Timespace/fishplot(倾向具有CNV数据);

  2. sciClone+ clonevol(只具有体细胞突变位点)

其中第一套流程主要是python计算,R进行可视化,第二套流程完全使用R即可实现可视化。

这里我将克隆进化的两套流程进行分批次讲解,主要是因为在安装以及调用过程中会出下很多问题,在这里我也会一一解答遇到的bug该怎样解决。

首先就讲讲sciClone这个大家比较熟悉的软件,它主要是推断克隆结构和跟踪肿瘤演化的时空模式。

关于安装:大多是版本问题,具体看您安装的R版本对应着的R包以及Rtools,也可参考GitHub - genome/sciclone: An R package for inferring the subclonal architecture of tumors 如下:

install.packages('devtools')
install.packages('gridBase')
install.packages('gridExtra')
install.packages('ggplot2')
install.packages('igraph')
install.packages('packcircles')
install_github('hdng/trees')
install.packages("installr")
install.packages("stringr")    
install_github("genome/bmm")
install_github("genome/sciClone")

关于数据读取问题:一是SNV读取;二是CNV读取;三是LOH读取。对于这三类数据模式,在WGS或WES的分析中都可通过软件计算出来。

一是SNV数据格式主要包括五列:

  1. 染色体;

2.绝对位置;

3.支持是ref的reads数;

4.支持是变异的reads数;

5.突变频率[0-100]。如下:

   chr       pos ref_reads var_reads   vaf
1 chr1 226564877        92       108 54.00
2 chr3  47164829       133        56 29.63
3 chr5  79974803       144        36 20.00
4 chr5  86642512        10         5 33.33
5 chr5 112173275        39        16 29.09
6 chr6  20402666       343       288 45.57

二是CNV数据格式主要包括四列:

1.染色体;

2.CNV起始位置;

3.CNV终止位置;

4.拷贝值(假设正常的拷贝数为2)。

三是LOH数据格式主要包括三列:

1.染色体;

2.窗口的起始位置;

3.窗口的终止位置。

关于文章数据重现:

数据下载链接:GitHub - genome/sciclone-meta: accessory scripts and documentation related to the sciclone R package at genome/sciclone

文章中Figure 1 如下图:

图片

########Figure 1
library(sciClone)
setwd("sciclone-meta-master/manuscript/data/suppFigureN.theta.mmy")
load("out.Rdata")
v0 = read.table("data/mmy.snv.vafs", sep="\t")
cn0 = read.table("data/cn.dat")
cn0 = cn0[,c(1,2,3,5)]
reg0 = read.table("data/exclude.loh")
reg0 = reg0[,c(1,2,3)]
clusterParams="empty"
#clusterParams="no.pvalue.outlier.detection"
sc = sciClone(vafs=list(v0), 
               sampleNames=c("MMY4"), 
               copyNumberCalls=list(cn0), 
               regionsToExclude=list(reg0),
               minimumDepth=100, 
               doClustering=TRUE, 
               clusterParams=clusterParams,
               maximumClusters=10, 
               copyNumberMargins=0.25, 
               useSexChrs=FALSE
              )

文章中Figure 2 如下图:

图片

图片

setwd("sciclone-meta-master/sciclone-meta-master/manuscript/data/figure2")
#BRC sample
rcnt = read.table(paste("data/TCGA-A2-A0YG/vafs",sep=""),sep="\t")
cn = read.table(paste("data/TCGA-A2-A0YG/cn",sep=""),sep="\t")

sc = sciClone(vafs=list(rcnt), sampleNames=c("TCGA-2-A0YG"), copyNumberCalls=list(cn),
  cnCallsAreLog2=TRUE, minimumDepth=50)
writeClusterTable(sc,"clusters.brc")
writeClusterSummaryTable(sc,"clusters.summary.brc")

#UCEC sample
rcnt = read.table("data/TCGA-D1-A17T/variants.rcnt")
cn0 = read.table("data/TCGA-D1-A17T/copy_number.cbs")
cn0 = cn0[,c(1,2,3,5)]

sc2 = sciClone(vafs=list(rcnt), sampleNames=c("TCGA-D1-A17T"), copyNumberCalls=list(cn0), minimumDepth=50, cnCallsAreLog2=TRUE)
writeClusterTable(sc2, "clusters.ucec")
writeClusterSummaryTable(sc2, "cluster.summary.ucec")



#output cluster probabilities for genes of interest
anno = read.table("data/TCGA-D1-A17T/genesToAnnotate")
b = read.table("clusters.ucec",header=T)

cat("gene\tcluster\tcluster.prob.1\tcluster.prob.2\n")
for(i in 1:length(anno[,1])){
  x=b[b$chr==anno[i,1] & b$st==anno[i,2],];
  cat(paste(anno[i,3],x$cluster,x$cluster.prob.1,x$cluster.prob.2,sep="\t"),"\n")
}

文章中Figure 3a 如下图:

图片

setwd("sciclone-meta-master/manuscript/data/figure3")
tum = read.table("data/tumor.vafs", sep="\t", header=TRUE)
rel = read.table("data/relapse.vafs", sep="\t", header=TRUE)

highlight.genes <- read.table("data/genesToAnnotate", header=FALSE, sep="\t", as.is=TRUE)
highlight.genes<-highlight.genes[,1:2]
samples = c("AML28 Tumor", "AML28 Relapse")

cn1 = read.table("data/tumor.cn", sep="\t")
cn1 = cn1[,c(1,2,3,5)]
cn2 = read.table("data/relapse.cn", sep="\t")
cn2 = cn2[,c(1,2,3,5)]

# Change this to 1 to output an intermediate plot at every iteration (as shown in supplement)
plotIntermediateResults <- 0

sc <- sciClone(vafs=list(tum,rel), 
              sampleNames=samples, 
              useSexChrs=FALSE, 
              copyNumberCalls=list(cn1,cn2), 
              copyNumberMargins=0.25, 
              minimumDepth=100, 
              verbose=1, 
              doClusteringAlongMargins=TRUE, 
              plotIntermediateResults = plotIntermediateResults)
writeClusterTable(sc, "clusters")
writeClusterSummaryTable(sc, "clusters.summary")
sc.plot2d(sc, highlightsHaveNames=TRUE, positionsToHighlight=highlight.genes, "figure3.pdf")
connectivity.matrix <- getConnectivityMatrix(sc)
write.table(file="connectivity.matrix.tsv", connectivity.matrix, sep="\t", row.names=FALSE, col.names=FALSE, quote=FALSE)

文章中Figure 5abc 如下图:

图片

setwd("sciclone-meta-master/manuscript/data/figure5")
#read in data
regions_to_exclude = read.table(file='data/loh.regions',sep="\t",header=F);

samples = c("Pre-treatment Tumor 1","Pre-treatment Tumor 2","Post-treatment Tumor")

v1 = read.table("data/pre1.vafs",sep="\t")
v2 = read.table("data/pre2.vafs",sep="\t")
v3 = read.table("data/post1.vafs",sep="\t")

cn1 = read.table("data/pre1.cn",sep="\t")
cn1 = cn1[,c(1,2,3,5)]
cn2 = read.table("data/pre2.cn",sep="\t")
cn2 = cn2[,c(1,2,3,5)]
cn3 = read.table("data/post1.cn",sep="\t")
cn3 = cn3[,c(1,2,3,5)]

#cluster
sc = sciClone(vafs=list(v1,v2,v3), sampleNames=samples, copyNumberCalls=list(cn1,cn2,cn3), doClusteringAlongMargins=FALSE, maximumClusters=10, regionsToExclude=regions_to_exclude)
writeClusterTable(sc,"cluster")
writeClusterSummaryTable(sc,"cluster.summary")
sc.plot2d(sc,"figure5abc.pdf", singlePage=TRUE, scale=1.8)

文章中Figure 5d 如下图:

图片

##接上面的sc
sco = sc
 samplesToPlot=sc@sampleNames
 size=700
 open3d(windowRect = c(0,0, size, size) )

 a = sco@vafs.merged[,c(paste(samplesToPlot,".vaf",sep=""),"cluster")]
 a = a[!is.na(a$cluster),]
 a = a[!(a$cluster==0),]
 numClusters=max(a$cluster)
 cols=getClusterColors(numClusters)
 colvec = cols[a$cluster]
 samples = c("Pre-treatment Tumor 1","Pre-treatment Tumor 2","Post-treatment Tumor")
 
 plot3d(a[,1], a[,2], a[,3], xlim=c(0,100), ylim=c(0,100),zlim=c(0,100), axes=FALSE,
        xlab="Pre-treatment Tumor 1 VAF",ylab="Pre-treatment Tumor 2 VAF", zlab="Post-treatment Tumor VAF",
        type="s", col=colvec)
 axes3d( edges=c("x--", "y--", "z"),labels=FALSE)
 for(i in c("+","-")){
   for(j in c("+","-")){
     axes3d( edges=paste("x",i,j,sep=""), tick=FALSE, labels=FALSE)
     axes3d( edges=paste("y",i,j,sep=""), tick=FALSE, labels=FALSE)
     axes3d( edges=paste("z",i,j,sep=""), tick=FALSE, labels=FALSE)
   }
 }

到处为止,文章中出现使用sciClone包做出来的图表都已复现完成,希望对大家有一定的帮助!

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值