1. 准备感兴趣基因集(genelist)并进行适当格式转换
# 对基因list进行整理
# 设置工作目录
setwd("E:/Rstudio/xieruyu/RNA-seq/2022-07-28/Rtreatment/heatmap")
# 将基因导入当前工作环境
list <- read.csv("zonghe.csv")
# 对特定感兴趣基因list进行提取
genes_V1<- as.vector(list[,1])
View(genes_V1)
# 对基因进行必要的格式转换
# 鼠源gene_symbol转化为gene_id
# 加载R包
# BiocManager::install("clusterProfiler")
library("clusterProfiler")
# 加载注释包
# BiocManager::install("org.Mm.eg.db")
# 小鼠 org.Mm.eg.db # 人org.Hs.eg.db # 猪org.Ss.eg.db # 鸡 org.Gg.eg.db # 酵母 org.Sc.sgd.db
# E coli strain K12 org.EcK12.eg.db # E coli strain Sakai org.EcSakai.eg.db
# 犬 org.Cf.eg.db # 牛org.Bt.eg.db
library("org.Mm.eg.db")
# 查看该R包可提供转化的数据类型
# keytypes(org.Hs.eg.db)
## 以下是R包org.Hs.eg.db拥有的ID类型,可供选择,对应原来的ids里面的类型
## ID的格式,你挑一个出来和下面的是对应的
# [1] "ACCNUM" "ALIAS" "ENSEMBL" "ENSEMBLPROT" "ENSEMBLTRANS" "ENTREZID"
# [7] "ENZYME" "EVIDENCE" "EVIDENCEALL" "GENENAME" "GO" "GOALL"
# [13] "IPI" "MAP" "OMIM" "ONTOLOGY" "ONTOLOGYALL" "PATH"
# [19] "PFAM" "PMID" "PROSITE" "REFSEQ" "SYMBOL" "UCSCKG"
# 由SYMBOL转换为ENSEMBL
genes_V2 <- bitr(genes_V1, # 输入待处理的gene_id
fromType = "SYMBOL", # fromType是指你的数据ID类型是属于哪一类的
toType = "ENSEMBL", # toType是指你要转换成哪种ID类型,可以写多种,也可以只写一种
OrgDb = org.Mm.eg.db) # Orgdb是指对应的注释包是哪个
# 由鼠源ENSEMBL编号转换为鸡源ENSEMBL编号
# https://cloud.tencent.com/developer/article/1708373
# https://www.jianshu.com/p/78a64d2d998a
# 加载biomaRt包
# BiocManager::install("biomaRt")
library("biomaRt")
# 选择目标数据库和数据集,这里选择人和小鼠的
# useMart一般后面跟两个参数
# 第一个参数是借助ensemble数据库
# 第二个参数是告诉选择哪个物种的数据集
chicken = useMart("ensembl", dataset = "ggallus_gene_ensembl", host = "https://dec2021.archive.ensembl.org/")
mouse = useMart("ensembl", dataset = "mmusculus_gene_ensembl", host = "https://dec2021.archive.ensembl.org/")
human = useMart("ensembl", dataset = "hsapiens_gene_ensembl", host = "https://dec2021.archive.ensembl.org/")
genes_V3 = getLDS(attributes = c("ensembl_gene_id"), # 输入数据集的属性参数,鼠源symbol为mgi_symbol,人源symbol为hgnc_symbol
filters = "ensembl_gene_id", # 输入数据集在查询中使用的参数过滤器,同上为鼠源symbol
values = genes_V2$ENSEMBL, # 输入的数据集,即待转换的gene_ENSEMBL向量集
mart = mouse, # 输入数据对应的数据库,鼠源即上面定义的“mouse”
attributesL = c("ensembl_gene_id"), # 输出数据集的属性参数,此处为人源symbol
martL = chicken, # 输出数据对应的数据库,鸡源即上面定义的“chicken”
uniqueRows=T) # 单独一行进行输出
# 通过listAttributes(mouse)查询mouse数据库中可使用的属性参数如下:
# 1 ensembl_gene_id
# 2 ensembl_gene_id_version
# 3 ensembl_transcript_id
# 4 ensembl_transcript_id_version
# 5 ensembl_peptide_id
# 6 ensembl_peptide_id_version
# 7 ensembl_exon_id
# 8 description
# 9 chromosome_name
# 10 start_position
# 11 end_position
# 12 strand
# 13 band
# 14 transcript_start
# 15 transcript_end
# 16 transcription_start_site
# 17 transcript_length
# 通过listAttributes(mouse)查询mouse数据库中可使用的属性参数如下:
# 1 chromosome_name
# 2 start
# 3 end
# 4 strand
# 5 chromosomal_region
# 6 with_biogrid
# 7 with_ccds
# 8 with_chembl
# 9 with_entrezgene_trans_name
# 10 with_embl
# 由ENSEMBL转换为SYMBOL
library("org.Gg.eg.db")
genes_V4 <- bitr(genes_V3$Gene.stable.ID.1, # 输入待处理的gene_id
fromType = "ENSEMBL", # fromType是指你的数据ID类型是属于哪一类的
toType = "SYMBOL", # toType是指你要转换成哪种ID类型,可以写多种,也可以只写一种
OrgDb = org.Gg.eg.db) # Orgdb是指对应的注释包是哪个
# 查看转化后的结果
View(genes_V2)
View(genes_V3)
View(genes_V4)
#将数据保存至heatmap文件中,进行查重处理
write.table(genes_V4, file = "E:/Rstudio/xieruyu/RNA-seq/2022-07-28/Rtreatment/heatmap/T_cell_pathway_genes.txt", row.names = FALSE)
2. 对各样本的FPKM值进行整理
#对FPKM数据进行整理
#清空环境变量
rm(list=ls())
#获取当前工作目录
getwd()
##将StringTie分析得到的含有FPKM数据的TAB文件导入当前工作环境中
#设置工作目录
setwd("E:/Rstudio/xieruyu/RNA-seq/2022-07-28/gene_tab/")
A1.gene.tab <- read.table("E:/Rstudio/xieruyu/RNA-seq/2022-07-28/gene_tab/A1_FRAS220122137.gene.tab", header = TRUE, sep = "\t" , quote = "\"")
A2.gene.tab <- read.table("E:/Rstudio/xieruyu/RNA-seq/2022-07-28/gene_tab/A2_FRAS220122138.gene.tab", header = TRUE, sep = "\t" , quote = "\"")
A3.gene.tab <- read.table("E:/Rstudio/xieruyu/RNA-seq/2022-07-28/gene_tab/A3_FRAS220122139.gene.tab", header = TRUE, sep = "\t" , quote = "\"")
B1.gene.tab <- read.table("E:/Rstudio/xieruyu/RNA-seq/2022-07-28/gene_tab/B1_FRAS220122140.gene.tab", header = TRUE, sep = "\t" , quote = "\"")
B2.gene.tab <- read.table("E:/Rstudio/xieruyu/RNA-seq/2022-07-28/gene_tab/B2_FRAS220122141.gene.tab&#