R语言bioconductor包—maftools的使用

maftools包分析和可视化大规模测序研究中的突变注释格式(MAF)文件。该软件包提供了各种功能来执行癌症基因组学中最常用的分析,并以最小的工作量创建功能丰富的可定制可视化。

1. 数据读入,生成MAF对象

# if (!requireNamespace("BiocManager", quietly = TRUE))
#   install.packages("BiocManager")
# 
# BiocManager::install("maftools")

library(maftools)
# ls("package:maftools")
# 演示数据
laml.maf = system.file('extdata', 'tcga_laml.maf.gz', package = 'maftools') 
laml.clin = system.file('extdata', 'tcga_laml_annot.tsv', package = 'maftools') 

laml = read.maf(maf = laml.maf, clinicalData = laml.clin) # MAF对象

# 查看MAF对象
getSampleSummary(laml)
getGeneSummary(laml)
getClinicalData(laml)
getFields(laml)
# write.mafSummary(maf = laml, basename = 'laml')

# 所有样本中突变统计图
plotmafSummary(maf = laml, rmOutlier = TRUE, addStat = 'median', 
               dashboard = TRUE, titvRaw = FALSE)

2. 取MAF子集

## 取MAF对象子集。tsb:subset by these samples (Tumor Sample Barcodes)
# mafObj = FALSE,返回data.frame
subsetMaf(maf = laml, tsb = c('TCGA-AB-3009', 'TCGA-AB-2933'), 
          mafObj = FALSE)[1:5]
# 默认mafObj = TRUE,返回MAF object
subsetMaf(maf = laml, tsb = c('TCGA-AB-3009', 'TCGA-AB-2933')) 

subsetMaf(maf = laml, genes = c('DNMT3A', 'NPM1'), mafObj = FALSE,
          query = "Variant_Classification == 'Splice_Site'")

subsetMaf(maf = laml, genes = c('DNMT3A', 'NPM1'), 
          mafObj = FALSE, 
          query = "Variant_Classification == 'Splice_Site'", 
          fields = 'i_transcript_name')

laml_m4 = subsetMaf(maf = laml, clinQuery = "FAB_classification %in% 'M4'")

# Filter MAF by genes or samples
filterMaf(maf = laml, genes =c("TTN", "AHNAK2"))

3. oncoplot 作图

Usage

oncoplot(
  maf,
  top = 20,
  minMut = NULL,
  genes = NULL,
  altered = FALSE,
  drawRowBar = TRUE,
  drawColBar = TRUE,
  leftBarData = NULL,
  leftBarLims = NULL,
  rightBarData = NULL,
  rightBarLims = NULL,
  topBarData = NULL,
  logColBar = FALSE,
  includeColBarCN = TRUE,
  clinicalFeatures = NULL,
  annotationColor = NULL,
  annotationDat = NULL,
  pathways = NULL,
  path_order = NULL,
  selectedPathways = NULL,
  pwLineCol = "#535c68",
  pwLineWd = 1,
  draw_titv = FALSE,
  titv_col = NULL,
  showTumorSampleBarcodes = FALSE,
  barcode_mar = 4,
  barcodeSrt = 90,
  gene_mar = 5,
  anno_height = 1,
  legend_height = 4,
  sortByAnnotation = FALSE,
  groupAnnotationBySize = TRUE,
  annotationOrder = NULL,
  sortByMutation = FALSE,
  keepGeneOrder = FALSE,
  GeneOrderSort = TRUE,
  sampleOrder = NULL,
  additionalFeature = NULL,
  additionalFeaturePch = 20,
  additionalFeatureCol = "gray70",
  additionalFeatureCex = 0.9,
  genesToIgnore = NULL,
  removeNonMutated = TRUE,
  fill = TRUE,
  cohortSize = NULL,
  colors = NULL,
  cBioPortal = FALSE,
  bgCol = "#CCCCCC",
  borderCol = "white",
  annoBorderCol = NA,
  numericAnnoCol = NULL,
  drawBox = FALSE,
  fontSize = 0.8,
  SampleNamefontSize = 1,
  titleFontSize = 1.5,
  legendFontSize = 1.2,
  annotationFontSize = 1.2,
  sepwd_genes = 0.5,
  sepwd_samples = 0.25,
  writeMatrix = FALSE,
  colbar_pathway = FALSE,
  showTitle = TRUE,
  titleText = NULL,
  showPct = TRUE
)
# 基因的突变图
oncoplot(maf = laml, top = 20)
oncoplot(maf = laml, draw_titv = TRUE)

# 自定义颜色
vc_cols = RColorBrewer::brewer.pal(n = 8, name = 'Paired')
names(vc_cols) = c(
  'Frame_Shift_Del',
  'Missense_Mutation',
  'Nonsense_Mutation',
  'Multi_Hit',
  'Frame_Shift_Ins',
  'In_Frame_Ins',
  'Splice_Site',
  'In_Frame_Del'
)

print(vc_cols)
oncoplot(maf = laml, colors = vc_cols, top = 10)

# 加上突变的信号通路
pathways = data.frame(
  Genes = c(
    "TP53",
    "WT1",
    "PHF6",
    "DNMT3A",
    "DNMT3B",
    "TET1",
    "TET2",
    "IDH1",
    "IDH2",
    "FLT3",
    "KIT",
    "KRAS",
    "NRAS",
    "RUNX1",
    "CEBPA",
    "ASXL1",
    "EZH2",
    "KDM6A"
  ),
  Pathway = rep(c(
    "TSG", "DNAm", "Signalling", "TFs", "ChromMod"
  ), c(3, 6, 4, 2, 3)),
  stringsAsFactors = FALSE
)

head(pathways)
oncoplot(maf = laml, pathways = pathways)

# 加上临床特征
oncoplot(maf = laml, top = 20,clinicalFeatures = "FAB_classification")

4. 其他突变统计分析图

### 1.plotTiTv
laml.titv = titv(maf = laml, plot = FALSE, useSyn = TRUE)

##plot titv summary: 
# 1.contributions of 6 mutational conversion 
# 2. Ti/Tv ratios of each sample
plotTiTv(res = laml.titv)

### 2. lollipopPlot
## 绘制棒棒糖的氨基酸变化图。蛋白质结构域来源于PFAM数据库。
lollipopPlot(
  maf = laml,
  gene = 'DNMT3A',
  AACol = 'Protein_Change',
  showMutationRate = TRUE,
  labelPos = 882, # 只表这个位置的突变:c(882,909),"all"
  repel = TRUE
)

### 3. plotProtein
# 画蛋白结构域图
plotProtein(gene = "TP53")
plotProtein(gene = "TP53",refSeqID = "NM_000546")
plotProtein(gene = "DNMT3A")
plotProtein(gene = "KRAS",showDomainLabel=FALSE)

# http://pfam.xfam.org

### 4. rainfallPlot
## 降雨图显示基因组突变热点区域。
brca <- system.file("extdata", "brca.maf.gz", package = "maftools")
brca = read.maf(maf = brca, verbose = FALSE)
rainfallPlot(maf = brca, detectChangePoints = TRUE, pointSize = 0.4)

### 5. tcgaCompare
## 将输入MAF中的突变负荷与来自MC3项目的所有33个TCGA队列进行比较。
par(fig=c(0,0.8,0,0.8))  #c(left, right,bottom,top)
laml.mutload = tcgaCompare(maf = laml, cohortName = 'Example-LAML',
                           logscale = TRUE, capture_size = 50)

### 6. plotVaf
# 统计基因在各自样本中突变频率的箱线图
plotVaf(maf = laml, vafCol = 'i_TumorVAF_WU')  # i_TumorVAF_WU: 样本名称
plotVaf(maf = laml, genes = "DNMT3A",vafCol = 'i_TumorVAF_WU')

### 7. plotCBSsegments
tcga.ab.009.seg <- system.file("extdata", "TCGA.AB.3009.hg19.seg.txt", 
                               package = "maftools")

# 画某一样本的拷贝数变异图
# tcga.ab.009.seg文件字段:Sample Chromosome Start End Num_Probes Segment_Mean
par(fig=c(0.01,1,0,1))  #c(left, right,bottom,top)
plotCBSsegments(cbsFile = tcga.ab.009.seg)


### 8. somaticInteractions

#前25个突变基因的排他/共出现事件统计分析作图。
dev.new()
somaticInteractions(maf = laml, top = 25, pvalue = c(0.05, 0.1))

### 9. plotOncodrive
# 基于变异的位置聚类检测癌症驱动基因并作图
laml.sig = oncodrive(maf = laml, AACol = 'Protein_Change', 
                     minMut = 5, pvalMethod = 'zscore')
head(laml.sig)
plotOncodrive(res = laml.sig, fdrCutOff = 0.01, 
              useFraction = TRUE, labelSize = 0.5)

### 10. pfamDomains
# 突变结构域统计图
laml.pfam = pfamDomains(maf = laml, AACol = 'Protein_Change', top = 10)
#Protein summary (Printing first 7 columns for display convenience)
laml.pfam$proteinSummary[,1:7, with = FALSE]
#Domain summary (Printing first 3 columns for display convenience)
laml.pfam$domainSummary[,1:3, with = FALSE]


5. 联合生存分析作图

# 根据基因突变分组并作生存分析图
mafSurvival(maf = laml, genes = 'DNMT3A', time = 'days_to_last_followup', 
            Status = 'Overall_Survival_Status', isTCGA = TRUE)

# 预测与生存相关的基因集
prog_geneset = survGroup(maf = laml, top = 20, geneSetSize = 2, 
                         time = "days_to_last_followup", 
                         Status = "Overall_Survival_Status", 
                         verbose = FALSE)
print(prog_geneset)
# 根据基因集(geneset)突变分组并作生存分析图
mafSurvGroup(maf = laml, geneSet = c("DNMT3A", "FLT3"), 
             time = "days_to_last_followup", 
             Status = "Overall_Survival_Status")

6.比较MAF并作图

###不同MAF对象比较##########
primary.apl = system.file("extdata", "APL_primary.maf.gz", package = "maftools")
primary.apl = read.maf(maf = primary.apl)
#Relapse APL MAF
relapse.apl = system.file("extdata", "APL_relapse.maf.gz", package = "maftools")
relapse.apl = read.maf(maf = relapse.apl)

# fisher检验两个队列(MAF),以发现差异突变基因。
pt.vs.rt <- mafCompare(m1 = primary.apl, m2 = relapse.apl, 
                       m1Name = 'Primary', m2Name = 'Relapse', 
                       minMut = 5)
print(pt.vs.rt)
# 不同队列中的基因突变差异森林图。x轴为log10转换优势比,y轴上的差异突变基因。
forestPlot(mafCompareRes = pt.vs.rt, pVal = 0.1)

# 不同队列中的基因突变差异图:显示具体突变类型
genes = c("PML", "RARA", "RUNX1", "ARID1B", "FLT3")
coOncoplot(m1 = primary.apl, m2 = relapse.apl, 
           m1Name = 'PrimaryAPL', m2Name = 'RelapseAPL', 
           genes = genes, removeNonMutated = TRUE)

coBarplot(m1 = primary.apl, m2 = relapse.apl, 
          m1Name = "Primary", m2Name = "Relapse")

lollipopPlot2(m1 = primary.apl, m2 = relapse.apl, 
              gene ="PML", 
              AACol1 = "amino_acid_change", 
              AACol2 = "amino_acid_change", 
              m1_name = "Primary", m2_name = "Relapse")
# 不同临床特征分组中基因突变差异比较并作富集图(每一组都和其余比)
fab.ce = clinicalEnrichment(maf = laml, clinicalFeature = 'FAB_classification')
fab.ce$groupwise_comparision[p_value < 0.05]
plotEnrichmentResults(enrich_res = fab.ce, pVal = 0.05)

7. 药物相互作用的突变基因分析

# 队列中和药物的相互作用的突变基因
dgi = drugInteractions(maf = laml, fontSize = 0.75)
length(table(dgi$Gene))
names(table(dgi$Gene))
#dgi[which(dgi$category=="LIPID KINASE"),]

dnmt3a.dgi = drugInteractions(genes = "DNMT3A", drugs = TRUE)
#Printing selected columns.
dnmt3a.dgi[,.(Gene, interaction_types, drug_name, drug_claim_name)]

8. 突变基因在致癌通路的富集分析

# 突变基因在已知致癌通路的富集及作图
OncogenicPathways(maf = laml)
PlotOncogenicPathways(maf = laml, pathways = "RTK-RAS")
PlotOncogenicPathways(maf = laml, pathways = "TP53")

9. VAF聚类

#基于变异等位基因频率(VAF)对某一样本的变异进行聚类
#install.packages("mclust")
library("mclust")
tcga.ab.2972.het = inferHeterogeneity(maf = laml, tsb = 'TCGA-AB-2972', 
                                      vafCol = 'i_TumorVAF_WU')
print(tcga.ab.2972.het$clusterMeans)
plotClusters(clusters = tcga.ab.2972.het)

seg = system.file('extdata', 'TCGA.AB.3009.hg19.seg.txt', package = 'maftools')
dev.new()
tcga.ab.3009.het = inferHeterogeneity(maf = laml, tsb = 'TCGA-AB-3009', 
                                      segFile = seg, vafCol = 'i_TumorVAF_WU')
# 加上拷贝数变异聚类信息
plotClusters(clusters = tcga.ab.3009.het, 
             genes = 'CN_altered', showCNvars = TRUE)

10. 突变特征(mutational signatures)分析

### Extract mutational signatures###########
# APOBEC富集样品和非APOBEC富集样品之间的差异并作图:
# 包括突变负荷、tCw基序分布和主要突变基因差异的子图。
library("BSgenome.Hsapiens.UCSC.hg19", quietly = TRUE)
laml.tnm = trinucleotideMatrix(maf = laml, prefix = 'chr', 
                               add = TRUE, 
                               ref_genome = "BSgenome.Hsapiens.UCSC.hg19")
dev.new()
plotApobecDiff(tnm = laml.tnm, maf = laml, pVal = 0.2)

#install.packages("NMF")
library('NMF')
laml.sign = estimateSignatures(mat = laml.tnm, nTry = 6, 
                               pConstant = 0.1, 
                               plotBestFitRes = FALSE, 
                               parallel = 2)
plotCophenetic(res = laml.sign)
laml.sig = extractSignatures(mat = laml.tnm, n = 3, 
                             pConstant = 0.1, parallel = 2)

laml.og30.cosm = compareSignatures(nmfRes = laml.sig, sig_db = "legacy")
#Compate against updated version3 60 signatures 
laml.v3.cosm = compareSignatures(nmfRes = laml.sig, sig_db = "SBS")

library('pheatmap')
pheatmap::pheatmap(mat = laml.og30.cosm$cosine_similarities, 
                   cluster_rows = FALSE, 
                   main = "cosine similarity against validated signatures")

pheatmap::pheatmap(mat = laml.v3.cosm$cosine_similarities, 
                   cluster_rows = FALSE, 
                   main = "cosine similarity against validated signatures")
maftools::plotSignatures(nmfRes = laml.sig, title_size = 1.2, sig_db = "SBS")

11.其他格式数据转化为MAF对象

# Converts annovar annotations into MAF.
# 参数 MAFobj 默认为FALSE,返回data.frame格式数据
var.annovar = system.file("extdata", "variants.hg19_multianno.txt", 
                          package = "maftools")
var.annovar.maf = annovarToMaf(annovar = var.annovar, 
                               Center = 'CSI-NUS', 
                               refBuild = 'hg19', 
                               MAFobj = TRUE,
                               tsbCol = 'Tumor_Sample_Barcode', 
                               table = 'ensGene')

# Converts ICGC Simple Somatic Mutation format file to MAF
esca.icgc <- system.file("extdata", "simple_somatic_mutation.open.ESCA-CN.sample.tsv.gz", package = "maftools")
esca.maf <- icgcSimpleMutationToMAF(icgc = esca.icgc, 
                                    addHugoSymbol = TRUE)
#Printing first 16 columns for display convenience.
print(esca.maf[1:5,1:16, with = FALSE])

12. 读入并处理gistic输出数据

## Read and summarize gistic output
all.lesions <- system.file("extdata", "all_lesions.conf_99.txt", package = "maftools")
amp.genes <- system.file("extdata", "amp_genes.conf_99.txt", package = "maftools")
del.genes <- system.file("extdata", "del_genes.conf_99.txt", package = "maftools")
scores.gis <- system.file("extdata", "scores.gistic", package = "maftools")

laml.gistic = readGistic(gisticAllLesionsFile = all.lesions, gisticAmpGenesFile = amp.genes, gisticDelGenesFile = del.genes, gisticScoresFile = scores.gis, isTCGA = TRUE)
class(laml.gistic)   #An object of class  GISTIC 

# 突出显示扩增和缺失区域片段的基因组图。
gisticChromPlot(gistic = laml.gistic, markBands = "all")
# 样本数对cytobands拷贝数变异显著性(-log10q)的气泡图
gisticBubblePlot(gistic = laml.gistic)


# 不同样本拷贝数变异图,类似展示基因突变的oncoplot
# par(fig=c(0,0.2,0.2,0.8))  #c(left, right,bottom,top)
#par(fin=c(5,6))
# par(mai =c(0,0.2,0.2,2))  # 无法设定
gisticOncoPlot(gistic = laml.gistic, clinicalData = getClinicalData(x = laml), 
               clinicalFeatures = 'FAB_classification',
               gene_mar = 8,
               barcode_mar = 6,
               sepwd_genes = 0.5,
               sepwd_samples = 0.25,
               sortByAnnotation = TRUE, top = 10)

参考

https://www.bioconductor.org/packages/release/bioc/html/maftools.html

  • 5
    点赞
  • 33
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值