目录
单细胞样本读取
文件夹下所有的样本必须为gz格式
比如这里面那些非gz格式的样本是不能用Read10X的函数去读取的
实际上只需要有以下三个文件即可
读取代码
# 数据储存本地 "E:/GEO/scRNAseq/HNSCC/GSE164690/data"
data_path <- "E:/GEO/scRNAseq/HNSCC/GSE164690/data"
files <- list.files(data_path,full.names = T)
files
system.time({
sceList = lapply(files,function(patient){
# patient=files[[1]]
print(patient)
ct <- Read10X(patient)
sce=CreateSeuratObject(counts = ct ,
project = str_split_fixed(patient,"_",n=2)[,2],
min.cells = 3, #Include features detected in at least this many cells.
min.features = 200 # Include cells where at least this many features are detected.
)
return(sce)
}) #返回一个List
})#记录一下运行时间
names(sceList)
samples = str_split_fixed(files,"_",n=2)[,2]
names(sceList) = samples
sce.all=merge(x=sceList[[1]],
y=sceList[ -1 ],add.cell.ids = samples)
head(sce.all@meta.data, 10)
table(sce.all@meta.data$orig.ident)
将病人的meta信息导入
# 增加病人的和来源的分组
phe=str_split(rownames(sce.all@meta.data),'_',simplify = T)
head(phe)
sce.all@meta.data$patients=phe[,1]
sce.all@meta.data$cell.orig=phe[,2]
table(sce.all@meta.data$patients)
table(sce.all@meta.data$cell.orig)
简单给病人分组后,meta信息如下
希望把临床信息导入meta信息中
其实按照病人的代号merge即可 不要想得太复杂!!!
# 增加病人的meta信息
patient_info <- read.csv("E:/GEO/scRNAseq/HNSCC/GSE164690/Patients_info_use.csv")
head(patient_info$Patients)
data <- sce.all@meta.data
colnames(patient_info)
# [1] "Patients" "Gender" "Age_group" "Smoking" "Alcohol"
# [6] "Disease_site" "T_Stage" "N_Stage" "M_Stage" "HPV"
# [11] "Inflam_status"
data <- rownames_to_column(data,var = "barcodes")
metadata <- merge(data,
patient_info,
by.x='patients',
by.y='Patients',
all=T)
metadata <- column_to_rownames(metadata,var = "barcodes")
sce.all <- AddMetaData(sce.all,metadata)
colnames(sce.all@meta.data)
基础几天不学 不熟练浪费大量时间!!!!!!一开始用循环去写的,实在离谱