rnaseq_Deseq2差异分析

hudi18

已于 2024-04-02 21:53:41 修改

阅读量425

点赞数 4

分类专栏： rnaseq 文章标签： r语言

于 2024-04-02 21:46:39 首次发布

本文链接：https://blog.csdn.net/qq_46454013/article/details/137292461

版权

rnaseq 专栏收录该内容

6 篇文章 0 订阅

订阅专栏

对标准化后的表达矩阵进行差异分析

########### Deseq2 ###############################
##Deseq2
library(DESeq2)
library(dplyr)
library(psych)
library(ggrepel)
library(pheatmap)
library(ggplot2)
library(dplyr)
library(gplots)
library(VennDiagram)
#导入counts数据矩阵，以行为基因，列为样本
count <- read.csv("C:\\Users\\DI\\Desktop\\PRJNA516151_lnc\\lncRNA_Gastrocnemius_transcript_matrix.csv",header=T,row.names=1)

## 过滤在所有重复样本中小于1的基因，表达量太低也没研究意义
count <- count[rowMeans(count)>1,]

##载入样本信息
data <- read.table("C:\\Users\\DI\\Desktop\\PRJNA516151_lnc\\sample_data.txt",header = T,row.names = 1)

#一定要变为因子数据，否者用DESeq2包分析时候会出错
data[,1] <- as.factor(data$Type)

all(rownames(data) %in% colnames(count))
all(rownames(data) == colnames(count))


dds <-  DESeqDataSetFromMatrix(countData = count,colData = data,design = ~ Type)
dim(dds)

#过滤
dds <- dds[rowSums(counts(dds)) > 1,]
nrow(dds) 

## 差异比较

dep <- DESeq(dds)

res <- results(dep,contrast = c('Type','12MO','27MO'))

diff = res
diff <- na.omit(diff)  ## 去除缺失值NA
dim(diff)

####################
##差异表达分析

#dds <-  DESeqDataSetFromMatrix(countData = count,colData = data,design = ~ Type)
#dds1 <- DESeq(dds, fitType = 'mean', minReplicatesForReplace = 7, parallel = FALSE) 
#res <- results(dds1)
#summary(res)
# res格式转化：用data.frame转化为表格形式
res1 <- data.frame(res, stringsAsFactors = FALSE, check.names = FALSE)
# 依次按照pvalue值log2FoldChange值进行排序
res1 <- res1[order(res1$pvalue, res1$log2FoldChange, decreasing = c(FALSE, TRUE)), ]
# 获取padj（p值经过多重校验校正后的值）小于0.05，表达倍数取以2为对数后大于1或者小于-1的差异表达基因。
res1_up<- res1[which(res1$log2FoldChange >= 1 & res1$pvalue < 0.05),]      # 表达量显著上升的基因
res1_down<- res1[which(res1$log2FoldChange <= -1 & res1$pvalue < 0.05),]    # 表达量显著下降的基因
res1_total <- rbind(res1_up,res1_down)

df <- count[intersect(rownames(count),rownames(res1_total)),] 

#write.csv(diff,"all_diff_14v16.csv")

####绘制热图
# 在原表达矩阵中找到差异表达基因
df2<- as.matrix(df)                                                 
pheatmap(df2,
         show_rownames = F,
         show_colnames = T,
         cluster_cols = F,
         cluster_rows=T,
         height=10,  
         scale = "row",
         frontsize = 10,
         angle_col=45, 
         color =colorRampPalette(c("#8854d0", "#ffffff","#fa8231"))(100),
         clustering_method = 'single',
) 

#########火山图
genes<- res1
# 根据上调、下调、不变为基因添加颜色信息
genes$color <- ifelse(genes$padj<0.05 & abs(genes$log2FoldChange)>= 1,ifelse(genes$log2FoldChange > 1,'red','blue'),'gray')
color <- c(red = "red",gray = "gray",blue = "blue")

p <- ggplot(
  # 指定数据、映射、颜色
  genes, aes(log2FoldChange, -log10(padj), col = color)) +  
  geom_point() +
  theme_bw() +
  scale_color_manual(values = color) +
  # 辅助线
  labs(x="log2 (fold change)",y="-log10 (q-value)") +
  geom_hline(yintercept = -log10(0.05), lty=4,col="grey",lwd=0.6) +
  geom_vline(xintercept = c(-1, 1), lty=4,col="grey",lwd=0.6) +
  # 图例
  theme(legend.position = "none",
        panel.grid=element_blank(),
        axis.title = element_text(size = 16),
        axis.text = element_text(size = 14)+
          # 注释
          geom_text_repel(
            data = subset(genes, padj < 1-100 & abs(genes$log2FoldChange) >= 10),
            aes(label = rownames(genes)),
            size = 5,
            box.padding = unit(0.35, "lines"),
            point.padding = unit(0.3, "lines"))
  )
p


#write.csv(diff,"C:\\Users\\DI\\Desktop\\PRJNA516151_lnc\\all_diff.csv")

获得差异分析热图以及火山图