R语言 高通量数据处理-3 Counts_To_Expression_Matrix

# Counts_To_Expression_Matrix.R

工程文件目录:

项目文件目录:

source("./R_Function/R_Packages_Manage.R",encoding="utf-8")
rpackages <- c('GEOquery','tidyverse','magrittr','readxl','limma','stringr','data.table',
               'easyConvert','Rsubread','impute','org.Hs.eg.db','dplyr')
pkgs_in(rpackages)

# GSE_NO1 <- 'GSE235842'
# GPL_NO1 <- 'GPL24676'
inputPath <- paste0('./Gene_Data/GDCdata_HTC_BioProject/',GSE_NO1,'/OutPut_Data/')
outputPath <- paste0('./Gene_Data/GDCdata_HTC_BioProject/',GSE_NO1,'/OutPut_Data/')
GeneInfoPath <- './Gene_Data/GDCdata_HTC_BioProject/Gencode_Annotation/gene_info'

load(paste0(inputPath,"Counts.RData"))
load(paste0(inputPath,"gene.RData"))

Counts1 <- Counts1 %>% as.data.frame()
Counts2 <- Counts2 %>% as.data.frame()
Counts3 <- Counts2 %>% as.data.frame() 
unique_row_names <- make.unique(rownames(Counts3))
rownames(Counts3) <- unique_row_names
Counts3 <- data.frame(ID = unique_row_names, Counts3, row.names = NULL)

# 一种注释方式,但能全部注释好----
# library('org.Hs.eg.db')
# columns(org.Hs.eg.db)
# length(keys(org.Hs.eg.db))
# length(keys(org.Hs.eg.db, keytype = 'ENTREZID'))
# length(keys(org.Hs.eg.db, keytype = 'REFSEQ'))
# length(keys(org.Hs.eg.db, keytype = 'UNIPROT'))
# gplt1 <- select(org.Hs.eg.db, keys = Counts3$ID, columns = 'SYMBOL', keytype = 'ENTREZID')

GeneInfo <- AnnotationDbi::select(org.Hs.eg.db, keys = Counts3$ID, columns = 'SYMBOL', keytype = 'ENTREZID')
colnames(GeneInfo)[colnames(GeneInfo) == "ENTREZID"] <- "ID"

dat_result <- Counts3 %>%
  left_join(., GeneInfo, by = 'ID') # 根据ID列合并探针号

# 重新排列一下列的顺序
dat_result <- dat_result

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值