# （单细胞-SingleCell）单细胞标准流程（简化版）

## seurat流程

# 1.构建对象
min.cells = 0 # min.cells 某一个基因至少在多少个基因中表达
min.features = 0 # min.features 某个细胞至少表达多少个基因
# 2.数据清洗
# 用数据框的筛选形式可以对sce进行基因和样本筛选
erccs = grep('^ERCC-', x= rownames(sce),value = T) # value = T 获取名字
rp = grep("^RP[SL][[:digit:]]", x= rownames(sce),value = T) # value = T 获取名字
mt = grep('^MT-', x= rownames(sce),value = T) # value = T 获取名字

sce[["percent.ercc"]]  = PercentageFeatureSet(sce, pattern = "^ERCC-")
sce[["percent.rp"]]  = PercentageFeatureSet(sce, pattern = "^RP[SL][[:digit:]]")
sce[["percent.mt"]]  = PercentageFeatureSet(sce, pattern = "^MT-")
# 3. 筛选
sce = subset(x=sce, subset = nCount_RNA > 50000 & nFeature_RNA > 500)

# seurat 流程
# 1.log
sce = NormalizeData(object = sce,normalization.method =  "LogNormalize",  scale.factor = 1e6)
# 2.高变基因
sce = FindVariableFeatures(object = sce,selection.method = "vst", nfeatures = 2000)
# 3.标准化
sce = ScaleData(object = sce)
# 4. PCA
sce = RunPCA(object = sce, do.print = FALSE)
# 5.构建图
sce= FindNeighbors(sce, dims = 1:20)
# 6. 聚类
sce = FindClusters(sce, resolution = 0.5)
# 7.tsne
sce=RunTSNE(sce,dims.use = 1:20)  ##tsne降维


## scanpy流程

# creat scanpy object
df.columns = meta.index
df = df.T
cellinfo = pd.DataFrame(df.index,index=df.index,columns=['sample_index'])
geneinfo = pd.DataFrame(df.columns,index=df.columns,columns=['genes_index'])
cellinfo = pd.concat([cellinfo,meta],axis=1)
sce = sc.AnnData(df, obs=cellinfo, var = geneinfo)
# 2.数据清洗
sce.var_names_make_unique()
sce.obs_names_make_unique()
sc.pp.filter_cells(sce, min_genes=300)
sc.pp.filter_genes(sce, min_cells=5)
mt = sce.var_names[sce.var_names.str.match(r'^MT-')] # 线粒体DNA
rp = sce.var_names[sce.var_names.str.match(r'^RP[SL][0-9]')] # 核糖体DNA
ercc = sce.var_names[sce.var_names.str.match(r'^ERCC-')] # 外源DNA
sce.var_names[sce.var_names.str.match(r'.*\.[0-9]')] # 匹配有小数点的基因
sce.var['mt'] = sce.var_names.str.match(r'^MT-')
sc.pp.calculate_qc_metrics(sce, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)
# 3. 筛选
sce = sce[sce.obs.pct_counts_mt < 15, :]
sce = sce[sce.obs.total_counts < 25000, :]

# stander pipline
# =============================================================================
# 1. 去文库 + log
# 2. 高变基因
# 3. 标准化
# 4. pca
# 5. 构建图
# 6. 聚类
# 7. tsne
# =============================================================================
sc.pp.normalize_total(sce, target_sum=1e6)
sc.pp.log1p(sce)
sc.pp.regress_out(sce, ['total_counts', 'pct_counts_mt'])
sc.pp.scale(sce)
sc.tl.pca(sce, svd_solver='arpack', random_state=42)
sc.pp.neighbors(sce, n_neighbors=50, n_pcs=20, random_state=42)
sc.tl.leiden(sce, resolution=0.3, random_state=42)
sc.tl.tsne(sce, n_pcs=20, n_jobs=30)
sc.pl.tsne(sce, color='leiden')



Best Regards,
Yuan.SH
---------------------------------------
School of Basic Medical Sciences,
Fujian Medical University,
Fuzhou, Fujian, China.