# Counts_To_Expression_Matrix.R
工程文件目录:
项目文件目录:
source("./R_Function/R_Packages_Manage.R",encoding="utf-8")
rpackages <- c('GEOquery','tidyverse','magrittr','readxl','limma','stringr','data.table',
'easyConvert','Rsubread','impute','org.Hs.eg.db','dplyr')
pkgs_in(rpackages)
# GSE_NO1 <- 'GSE235842'
# GPL_NO1 <- 'GPL24676'
inputPath <- paste0('./Gene_Data/GDCdata_HTC_BioProject/',GSE_NO1,'/OutPut_Data/')
outputPath <- paste0('./Gene_Data/GDCdata_HTC_BioProject/',GSE_NO1,'/OutPut_Data/')
GeneInfoPath <- './Gene_Data/GDCdata_HTC_BioProject/Gencode_Annotation/gene_info'
load(paste0(inputPath,"Counts.RData"))
load(paste0(inputPath,"gene.RData"))
Counts1 <- Counts1 %>% as.data.frame()
Counts2 <- Counts2 %>% as.data.frame()
Counts3 <- Counts2 %>% as.data.frame()
unique_row_names <- make.unique(rownames(Counts3))
rownames(Counts3) <- unique_row_names
Counts3 <- data.frame(ID = unique_row_names, Counts3, row.names = NULL)
# 一种注释方式,但能全部注释好----
# library('org.Hs.eg.db')
# columns(org.Hs.eg.db)
# length(keys(org.Hs.eg.db))
# length(keys(org.Hs.eg.db, keytype = 'ENTREZID'))
# length(keys(org.Hs.eg.db, keytype = 'REFSEQ'))
# length(keys(org.Hs.eg.db, keytype = 'UNIPROT'))
# gplt1 <- select(org.Hs.eg.db, keys = Counts3$ID, columns = 'SYMBOL', keytype = 'ENTREZID')
#
GeneInfo <- AnnotationDbi::select(org.Hs.eg.db, keys = Counts3$ID, columns = 'SYMBOL', keytype = 'ENTREZID')
colnames(GeneInfo)[colnames(GeneInfo) == "ENTREZID"] <- "ID"
dat_result <- Counts3 %>%
left_join(., GeneInfo, by = 'ID') # 根据ID列合并探针号
# 重新排列一下列的顺序
dat_result <- dat_result