一、数据下载
示例数据是GSE231920的第2个样本
找到数据集,点击custom
勾选样本后点击下方的download
下载后好后是一个tar的压缩包,需要手动或者用代码解压后使用。
1.解压缩
在工作目录下解压GSE231920_RAW.tar
untar("GSE231920_RAW.tar",exdir = "input")
#修改文件名去掉GSM7306055_sample2_前缀
library(stringr)
nn = str_remove(dir("input/"),"GSM7306055_sample2_")
nn
#file.rename来给文件改名
file.rename(paste0("input/",dir("input/")),
paste0("input/",nn))
#检查是否成功
dir("input/")
2.读取并创建Seurat对象
创建Seurat对象
rm(list = ls()) #清空右上角的所有变量,方便反复调试代码
library(Seurat)
library(patchwork)
library(tidyverse)
ct = Read10X("input/")
dim(ct)
seu.obj <- CreateSeuratObject(counts = ct,
min.cells = 3, #一个基因至少要在3个细胞里有表达才被保留
min.features = 200) #一个细胞里至少要200个基因有表达才被保留
dim(seu.obj)
#设计随机种子(节省资源,跑需要的数据不用设置这个)
set.seed(1234)
seu.obj = subset(seu.obj,downsample = 3000)
3.质控
#质控开始
seu.obj[["percent.mt"]] <- PercentageFeatureSet(seu.obj, pattern = "^MT-")
head(seu.obj@meta.data)
VlnPlot(seu.obj,
features = c("nFeature_RNA",
"nCount_RNA",
"percent.mt"),
ncol = 3,pt.size = 0.5)
#数据过滤
seu.obj = subset(seu.obj,
nFeature_RNA < 8500 &
nCount_RNA < 50000 &
percent.mt < 40)
dir()
4.降维聚类分析
#开始降维聚类分群
f = "obj.Rdata"
if(!file.exists(f)){
seu.obj = seu.obj %>%
NormalizeData() %>%
FindVariableFeatures() %>%
ScaleData(features = rownames(.)) %>%
RunPCA(features = VariableFeatures(.)) %>%
FindNeighbors(dims = 1:15) %>%
FindClusters(resolution = 0.5) %>%
RunUMAP(dims = 1:15) %>%
RunTSNE(dims = 1:15)
save(seu.obj,file = f)
}
load(f)
ElbowPlot(seu.obj)
p1 <- DimPlot(seu.obj, reduction = "umap",label = T)+NoLegend();p1