if (!require("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("DESeq2")
install.packages("dplyr")
install.packages("tidyverse")
install.packages("gplots")
library(tidyverse)
library(dplyr)
library("DESeq2")
library(gplots)setwd("E:\\rna-seq\\down_gene\\htseq")
sample.names <- c("ZS135_1","ZS135_2","ZS135_3","ZS135_246_1","ZS135_246_2","ZS135_246_3")
file.names <- paste("./", sample.names,"_counts.txt",
sep="")
conditions <- factor(c(rep("control", 3), rep("treat", 3)))
sampleTable <- data.frame(sampleName=sample.names,
fileName=file.names,
condition=conditions)# read in the HTSeq count data
ddsHTSeq<-DESeqDataSetFromHTSeqCount(sampleTable=sampleTable,
directory=".",
design=~condition )
ddsHTSeq <- ddsHTSeq[rowSums(counts(ddsHTSeq)) > 10, ]
dds <-DESeq(ddsHTSeq)
###PCA图
rld <- rlogTransformation(dds, blind=FALSE)
plotPCA(rld, intgroup="condition", ntop=nrow(counts(ddsHTSeq)))###相关性热图
cU <-cor( as.matrix(assay(rld)))
cols <- c("dodgerblue3", "firebrick3")[conditions]
heatmap.2(cU, symm=TRUE, col= colorRampPalette(c("darkblue","white"))
(100),
labCol=colnames(cU), labRow=colnames(cU),
distfun=function(c) as.dist(1 - c), trace="none", Colv=TRUE,
cexRow=0.9, cexCol=0.9, key=F, font=2,
RowSideColors=cols, ColSideColors=cols)
###输出分析结果
res <- results(dds, contrast=c("condition", "treat", "control"))
grp.mean <- sapply(levels(dds$condition), function(lvl)
rowMeans(counts(dds,normalized=TRUE)[,dds$condition == lvl] ) )
norm.counts <- counts(dds, normalized=TRUE)
all <- data.frame(res, grp.mean, norm.counts)
write.table(all, file=“DESeq2_all_rm.txt”, sep="/t")# 加change列,标记上下调基因,可根据需求设定阈值
DEG <- as.data.frame(res)
DEG<- na.omit(DEG)
logFC = 1
P.adj = 0.05
k1 <- (DEG$padj< P.adj) & (DEG$log2FoldChange < -logFC)
k2 <- (DEG$padj < P.adj) & (DEG$log2FoldChange > logFC)
DEG <- mutate(DEG, change = ifelse(k1, "down", ifelse(k2, "up", "stable")))
table(DEG$change)
#筛选差异基因(基础表达量高、变化明显、p值显著)
select.log2FC <- abs(DEG$log2FoldChange)>1
select.Padj <- DEG$padj<0.05
select.vec <- select.log2FC & select.Padj
table(select.vec)degs.list <- as.character(rownames(DEG))[select.vec] #筛选出的基因名
write.table(degs.list,"DEG.txt",sep="\t",quote=F,row.names=F,col.names = F)
# 火山图
p <- ggplot(data = DEG,
aes(x = log2FoldChange,
y = -log10(padj))) +
geom_point(alpha = 0.4, size = 3.5,
aes(color = change)) +
ylab("-log10(Pvalue)")+
scale_color_manual(values = c("blue4", "grey", "red3"))+
geom_vline(xintercept = c(-logFC, logFC), lty = 4, col = "black", lwd = 0.8) +
geom_hline(yintercept = -log10(P.Value), lty = 4, col = "black", lwd = 0.8) +
theme_bw()
pplotMA(res, ylim=c(-5,5), alpha = 0.01)
topGene <- rownames(res)[res$padj <= sort(res$padj)[5] &!is.na(res$padj)]
with(res[topGene, ], {
points(baseMean, log2FoldChange, col="dodgerblue", cex=1.5, lwd=2)
text(baseMean, log2FoldChange, topGene, pos=2, col="dodgerblue")})
library(pheatmap)
sig.dat <- assay(rld)[res$padj < 0.01 & !is.na(res$padj), ]
annC <- data.frame(condition=conditions)
rownames(annC) <- colnames(sig.dat)
pheatmap(sig.dat, scale="row",fontsize_row=9,annotation_col = annC)
GO、KEGG作图
install.packages("ggrepel")
library(dplyr)
library(ggplot2)
library(ggrepel)
setwd("E:\\rna-seq\\down_gene")
#载入数据
KEGG_dataset <- read.table(file ="kegg.txt",header = TRUE, sep = "\t")#按照PValue从低到高排序[升序]
KEGG_dataset <- arrange(KEGG_dataset,KEGG_dataset[,5])
#Pathway列最好转化成因子型,否则作图时ggplot2会将所有Pathway按字母顺序重排序
#将Pathway列转化为因子型
KEGG_dataset$Term <- factor(KEGG_dataset$Term,levels = rev(KEGG_dataset$Term))#图片背景设定
mytheme <- theme(axis.title=element_text(face="bold", size=9,colour = 'black'), #坐标轴标题
axis.text=element_text(face="bold", size=8,colour = 'black'), #坐标轴标签
axis.line = element_line(size=0.5, colour = 'black'), #轴线
panel.background = element_rect(color='black'), #绘图区边框
legend.key = element_blank() #关闭图例边框
)#绘制KEGG气泡图
p <- ggplot(KEGG_dataset,aes(x=Fold.Enrichment,y=Term,colour=-1*log10(PValue),size=Count))+
geom_point()+
scale_size(range=c(2, 8))+
scale_colour_gradient(low = "blue",high = "red")+
theme_bw()+
ylab("KEGG Pathway Terms")+
xlab("Fold Enrichment")+
labs(color=expression(-log[10](PValue)))+
theme(legend.title=element_text(size=14), legend.text = element_text(size=14))+
theme(axis.title.y = element_text(margin = margin(r = 50)),axis.title.x = element_text(margin = margin(t = 20)))+
theme(axis.text.x = element_text(face ="bold",color="black",angle=0,vjust=1))
plot <- p+mytheme
plot
#保存图片
ggsave(plot,filename = "KEGG.pdf",width = 10,height = 6,dpi=300)
ggsave(plot,filename = "KEGG.png",width = 10,height = 6,dpi=300)#################GO#####################
# 安装并加载所需的包
install.packages("readxl")
install.packages("ggplot2")
library(ggplot2)
library(readxl)
library(ggplot2)#没有自己安装 install.package("ggplot2")
library(dplyr)go_enrich= read.table("gO.txt",header = T,sep = "\t")
# 将'term'列转换为有序因子,以便绘制纵向柱状图时按照指定的顺序显示
go_enrich$Term <- factor(go_enrich$Term, levels = go_enrich$Term, ordered = TRUE)
# 筛选p值从小到大的前十个条目
go_enrich_filtered <- go_enrich %>%
arrange(PValue) %>%
group_by(Category) %>%
slice_head(n = 10) %>%
ungroup()# 根据p值对term进行重新排序
go_enrich_filtered$Term <- factor(go_enrich_filtered$Term, levels = rev(go_enrich_filtered$Term))
# 绘制纵向柱状图并调整字体大小
ggplot(go_enrich_filtered, aes(x = Term, y = Count, fill = Category)) +
geom_bar(stat = "identity", width = 0.8) +
scale_fill_manual(values = c("#6666FF", "#33CC33", "#FF6666")) +
facet_wrap(~Category, ncol = 1, scales = "free_y") +
coord_flip() +
xlab("GO term") +
ylab("Gene_Number") +
labs(title = "GO Terms Enrich") +
theme_bw() +
theme(axis.text.x = element_text(size = 8, angle = 90, hjust = 1))# 气泡图
p2 <- ggplot(go_enrich_filtered,
aes(y = Term, x= Count)) +
geom_point(aes(size = Count, color = PValue)) +
facet_wrap(~Category, ncol = 1, scales = "free_y") +
scale_color_gradient(low = "red", high = "blue") +
labs(color = expression(PValue, size = "Count"),
x = "Gene Ratio", y = "GO term", title = "GO Enrichment") +
theme_bw()# 显示气泡图
print(p2)