rnaseq_Deseq2差异分析

 对标准化后的表达矩阵进行差异分析

########### Deseq2 ###############################
##Deseq2
library(DESeq2)
library(dplyr)
library(psych)
library(ggrepel)
library(pheatmap)
library(ggplot2)
library(dplyr)
library(gplots)
library(VennDiagram)
#导入counts数据矩阵,以行为基因,列为样本
count <- read.csv("C:\\Users\\DI\\Desktop\\PRJNA516151_lnc\\lncRNA_Gastrocnemius_transcript_matrix.csv",header=T,row.names=1)

## 过滤在所有重复样本中小于1的基因,表达量太低也没研究意义
count <- count[rowMeans(count)>1,]

##载入样本信息
data <- read.table("C:\\Users\\DI\\Desktop\\PRJNA516151_lnc\\sample_data.txt",header = T,row.names = 1)

#一定要变为因子数据,否者用DESeq2包分析时候会出错
data[,1] <- as.factor(data$Type)

all(rownames(data) %in% colnames(count))
all(rownames(data) == colnames(count))


dds <-  DESeqDataSetFromMatrix(countData = count,colData = data,design = ~ Type)
dim(dds)

#过滤
dds <- dds[rowSums(counts(dds)) > 1,]
nrow(dds) 

## 差异比较

dep <- DESeq(dds)

res <- results(dep,contrast = c('Type','12MO','27MO'))

diff = res
diff <- na.omit(diff)  ## 去除缺失值NA
dim(diff)

####################
##差异表达分析

#dds <-  DESeqDataSetFromMatrix(countData = count,colData = data,design = ~ Type)
#dds1 <- DESeq(dds, fitType = 'mean', minReplicatesForReplace = 7, parallel = FALSE) 
#res <- results(dds1)
#summary(res)
# res格式转化:用data.frame转化为表格形式
res1 <- data.frame(res, stringsAsFactors = FALSE, check.names = FALSE)
# 依次按照pvalue值log2FoldChange值进行排序
res1 <- res1[order(res1$pvalue, res1$log2FoldChange, decreasing = c(FALSE, TRUE)), ]
# 获取padj(p值经过多重校验校正后的值)小于0.05,表达倍数取以2为对数后大于1或者小于-1的差异表达基因。
res1_up<- res1[which(res1$log2FoldChange >= 1 & res1$pvalue < 0.05),]      # 表达量显著上升的基因
res1_down<- res1[which(res1$log2FoldChange <= -1 & res1$pvalue < 0.05),]    # 表达量显著下降的基因
res1_total <- rbind(res1_up,res1_down)

df <- count[intersect(rownames(count),rownames(res1_total)),] 

#write.csv(diff,"all_diff_14v16.csv")

####绘制热图
# 在原表达矩阵中找到差异表达基因
df2<- as.matrix(df)                                                 
pheatmap(df2,
         show_rownames = F,
         show_colnames = T,
         cluster_cols = F,
         cluster_rows=T,
         height=10,  
         scale = "row",
         frontsize = 10,
         angle_col=45, 
         color =colorRampPalette(c("#8854d0", "#ffffff","#fa8231"))(100),
         clustering_method = 'single',
) 

#########火山图
genes<- res1
# 根据上调、下调、不变为基因添加颜色信息
genes$color <- ifelse(genes$padj<0.05 & abs(genes$log2FoldChange)>= 1,ifelse(genes$log2FoldChange > 1,'red','blue'),'gray')
color <- c(red = "red",gray = "gray",blue = "blue")

p <- ggplot(
  # 指定数据、映射、颜色
  genes, aes(log2FoldChange, -log10(padj), col = color)) +  
  geom_point() +
  theme_bw() +
  scale_color_manual(values = color) +
  # 辅助线
  labs(x="log2 (fold change)",y="-log10 (q-value)") +
  geom_hline(yintercept = -log10(0.05), lty=4,col="grey",lwd=0.6) +
  geom_vline(xintercept = c(-1, 1), lty=4,col="grey",lwd=0.6) +
  # 图例
  theme(legend.position = "none",
        panel.grid=element_blank(),
        axis.title = element_text(size = 16),
        axis.text = element_text(size = 14)+
          # 注释
          geom_text_repel(
            data = subset(genes, padj < 1-100 & abs(genes$log2FoldChange) >= 10),
            aes(label = rownames(genes)),
            size = 5,
            box.padding = unit(0.35, "lines"),
            point.padding = unit(0.3, "lines"))
  )
p


#write.csv(diff,"C:\\Users\\DI\\Desktop\\PRJNA516151_lnc\\all_diff.csv")

获得差异分析热图以及火山图

  • 4
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值