ChIPseeker包对ChIP-seq peaks可视化

最新推荐文章于 2024-12-16 13:57:32 发布

qq_27390023

最新推荐文章于 2024-12-16 13:57:32 发布

阅读量2k

点赞数 3

文章标签： r语言生物信息学

本文链接：https://blog.csdn.net/qq_27390023/article/details/125359197

版权

本文介绍了ChIPseeker软件包的功能，包括峰值定位、基因注释、富集分析和可视化，帮助用户理解转录因子结合位点并进行跨实验比较。通过实例演示了如何使用ChIPseeker进行数据处理和生物学推断。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

ChIPseeker软件包实现了以下功能：检索峰值附近最近的基因、注释峰值的基因组区域、估计芯片峰值数据集之间重叠重要性的统计方法，以及整合地理数据库，以便用户将自己的数据集与存储在数据库中的数据集进行比较。比较可以用来推断合作规则，因此可以用来生成假设。实现了几个可视化功能，以总结峰实验的覆盖范围、结合到TSS区域的峰的平均轮廓和热图、基因组注释、到TSS的距离以及峰或基因的重叠。

######ChIPseeker####

# if (!require("BiocManager", quietly = TRUE))
#   install.packages("BiocManager")
# 
# BiocManager::install("ChIPseeker")

library(ChIPseeker)
library(TxDb.Hsapiens.UCSC.hg38.knownGene)
library(GenomicFeatures)
library(GenomicRanges)
library(clusterProfiler)
library(ggplot2)

txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene

### 1. 读入chip-seq峰值的bed文件
bed_file <- "/Users/test_summits.bed"
## .bed或.bed.gz 文件，peak的位置，可以由macs2软件生成。
peak <- readPeakFile(bed_file)
class(peak)  # "GRanges"
peak

### 2.作图展示peak位置
# plot peak coverage
covplot(peak, weightCol="V5")
# 画指定区域
covplot(peak, weightCol="V5", chrs=c("chr17", "chr18"), xlim=c(4.5e7, 5e7))
covplot(peak, weightCol="V5", chrs=c("chr17", "chr18"))

# 得到启动子区域
promoter <- getPromoters(TxDb=txdb, upstream=3000, downstream=3000)
class(promoter) #"GRanges"
# 计算tag矩阵
tagMatrix <- getTagMatrix(peak, windows=promoter)

# plot the heatmap of tagMatrix
tagHeatmap(tagMatrix, xlim=c(-3000, 3000), color="red")
# # 以上两行代码等同于下面一行代码
# #plot the heatmap of peaks align to flank sequences of TSS
# peakHeatmap(bed_file, TxDb=txdb, upstream=3000, downstream=3000, color="red")

# plot the profile of peaks
plotAvgProf(tagMatrix, xlim=c(-3000, 3000),
            xlab="Genomic Region (5'->3')", 
            ylab = "Read Count Frequency")
# 加上95%置信区间
plotAvgProf(tagMatrix, xlim=c(-3000, 3000), 
            conf = 0.95, resample = 1000)

### 3.peaks注释
## 注释峰值Annotate peaks
peakAnno <- annotatePeak(peak, tssRegion=c(-3000, 3000),
                         TxDb=txdb, annoDb="org.Hs.eg.db")
class(peakAnno) #"csAnno"

# 保存文件
write.table(peakAnno, file = 'peak1.txt',sep = '\t', 
            quote = FALSE, row.names = FALSE)

# 统计作图
plotAnnoPie(peakAnno)
plotAnnoBar(peakAnno)
#dev.new()
vennpie(peakAnno)
# upsetplot(peakAnno, vennpie=TRUE) #wrong
plotDistToTSS(peakAnno,
              title="Distribution of transcription factor-binding loci\nrelative to TSS")

### 4.富集分析
library(ReactomePA)
pathway1 <- enrichPathway(as.data.frame(peakAnno)$geneId)
head(pathway1, 2)

gene <- seq2gene(peak, tssRegion = c(-1000, 1000), flankDistance = 3000, TxDb=txdb)
pathway2 <- enrichPathway(gene)
head(pathway2, 2)
dotplot(pathway2)

### 5. 比较不同实验富集到的peaks（先注释好）
files <- getSampleFiles() #自带bed.gz文件
txdb_hg19 <- TxDb.Hsapiens.UCSC.hg19.knownGene
peakAnnoList <- lapply(files, annotatePeak, TxDb=txdb_hg19,
                       tssRegion=c(-3000, 3000), verbose=FALSE)
plotAnnoBar(peakAnnoList)

genes = lapply(peakAnnoList, function(i) as.data.frame(i)$geneId)
#names(genes) = sub("_", "\n", names(genes))
compKEGG <- compareCluster(geneCluster   = genes,
                           fun           = "enrichKEGG",
                           pvalueCutoff  = 0.05,
                           pAdjustMethod = "BH")
# fun	:One of "groupGO", "enrichGO", "enrichKEGG", "enrichDO" or "enrichPathway" .
dotplot(compKEGG, showCategory = 15, title = "KEGG Pathway Enrichment Analysis")

genes= lapply(peakAnnoList, function(i) as.data.frame(i)$geneId)
vennplot(genes)

p <- GRanges(seqnames=c("chr1", "chr3"),
             ranges=IRanges(start=c(1, 100), end=c(50, 130)))
shuffle(p, TxDb=txdb_hg19)

# peaks重叠的统计学意义 
# calculate overlap significant of ChIP experiments 
# based on the genome coordinations
enrichPeakOverlap(queryPeak     = files[[5]],
                  targetPeak    = unlist(files[1:4]),
                  TxDb          = txdb_hg19,
                  pAdjustMethod = "BH",
                  nShuffle      = 50,
                  chainFile     = NULL,
                  verbose       = FALSE)