library(GEOquery)
library(limma)
gset <- getGEO('GSE76427', destdir=".",
AnnotGPL = F, ## 注释文件
getGPL = F)
a=gset[[1]]
dat1=exprs(a)
dim(dat1)
metadata=pData(a)
#转换ID#GPL3921
library(hthgu133a.db)
ids=toTable(hthgu133aSYMBOL)
colnames(ids)=c('probe_id','symbol')
ids=ids[ids$symbol != '',]
ids=ids[ids$probe_id %in% rownames(dat1),]
dat1=dat1[ids$probe_id,]
ids$median=apply(dat1,1,median)
ids=ids[order(ids$symbol,ids$median,decreasing = T),]
ids=ids[!duplicated(ids$symbol),]
dat1=dat1[ids$probe_id,]
rownames(dat1)=ids$symbol
rt1 <- dat1
##GPL571
library(hgu133a2.db)
b=gset[[2]]
dat2=exprs(b)
id2=toTable(hgu133a2SYMBOL)
colnames(id2)=c('probe_id','symbol')
id2=id2[id2$symbol != '',]
id2=id2[id2$probe_id %in% rownames(dat2),]
dat2=dat2[id2$probe_id,]
id2$median=apply(dat2,1,median)
id2=id2[order(id2$symbol,id2$median,decreasing = T),]
id2=id2[!duplicated(id2$symbol),]
dat2=dat2[id2$probe_id,]
rownames(dat2)=id2$symbol
rt2 <- dat2
same <- intersect(row.names(rt2),row.names(rt1))
length(same)
rt <- cbind(rt2[same,],rt1[same,])
##标准化
boxplot(rt,las=2)
rt=normalizeBetweenArrays(rt)
boxplot(rt,las=2)
write.table(rt,file = 'GSE14520.txt',sep = '\t',quote = F)
###临床数据整理##
cli <- read.table('./00.data/03.GEO/GSE14520_Extra_Supplement.txt',sep = '\t',header = T,row.names = 1,check.names = F)
clinical <- cli[,c(1,8,9)]
table(clinical$`Tissue Type`)
clinical <- clinical[clinical$`Tissue Type`=='Tumor',]
clinical <- na.omit(clinical)
clinical$futime <- clinical$`Survival months`
clinical$fustat <- clinical$`Survival status`
clinical <- clinical[,-c(1:3)]
clinical$futime <- clinical$futime/12
rt1 <- t(rt)
same1 <- intersect(row.names(clinical),row.names(rt1))
rt <- cbind(clinical[same1,],rt1[same1,])
save(rt,file = './00.data/03.GEO/GSE14520_input.RData')
GEO处理总结
最新推荐文章于 2024-01-20 20:53:36 发布