002_单细胞流程简化版

  1. log
  2. 高变基因
  3. 标准化
  4. pca
  5. 构建图
  6. 聚类
  7. tsne

seurat流程

# 1.构建对象
min.cells = 0 # min.cells 某一个基因至少在多少个基因中表达
min.features = 0 # min.features 某个细胞至少表达多少个基因
sce = CreateSeuratObject(counts = raw.data,metadata = metadata,min.cells =min.cells,min.features =min.features)
sce = AddMetaData(object = sce,metadata = metadata)  
sce = AddMetaData(object = sce, percent.ercc, col.name = "percent.ercc") 
# 2.数据清洗
# 用数据框的筛选形式可以对sce进行基因和样本筛选
erccs = grep('^ERCC-', x= rownames(sce),value = T) # value = T 获取名字
rp = grep("^RP[SL][[:digit:]]", x= rownames(sce),value = T) # value = T 获取名字
mt = grep('^MT-', x= rownames(sce),value = T) # value = T 获取名字

sce[["percent.ercc"]]  = PercentageFeatureSet(sce, pattern = "^ERCC-")
sce[["percent.rp"]]  = PercentageFeatureSet(sce, pattern = "^RP[SL][[:digit:]]")
sce[["percent.mt"]]  = PercentageFeatureSet(sce, pattern = "^MT-")
# 3. 筛选
sce = subset(x=sce, subset = nCount_RNA > 50000 & nFeature_RNA > 500)

# seurat 流程
# 1.log
sce = NormalizeData(object = sce,normalization.method =  "LogNormalize",  scale.factor = 1e6)
# 2.高变基因
sce = FindVariableFeatures(object = sce,selection.method = "vst", nfeatures = 2000)
# 3.标准化
sce = ScaleData(object = sce)
# 4. PCA
sce = RunPCA(object = sce, do.print = FALSE)
# 5.构建图
sce= FindNeighbors(sce, dims = 1:20)
# 6. 聚类
sce = FindClusters(sce, resolution = 0.5) 
# 7.tsne
sce=RunTSNE(sce,dims.use = 1:20)  ##tsne降维

scanpy流程

# creat scanpy object
df = pd.read_csv('processfile/count.csv', index_col=0)
meta = pd.read_csv('processfile/metadata.csv', index_col=0)
df.columns = meta.index
df = df.T
cellinfo = pd.DataFrame(df.index,index=df.index,columns=['sample_index'])
geneinfo = pd.DataFrame(df.columns,index=df.columns,columns=['genes_index'])
cellinfo = pd.concat([cellinfo,meta],axis=1)
sce = sc.AnnData(df, obs=cellinfo, var = geneinfo)
# 2.数据清洗
sce.var_names_make_unique()
sce.obs_names_make_unique()
sc.pp.filter_cells(sce, min_genes=300)
sc.pp.filter_genes(sce, min_cells=5)
mt = sce.var_names[sce.var_names.str.match(r'^MT-')] # 线粒体DNA
rp = sce.var_names[sce.var_names.str.match(r'^RP[SL][0-9]')] # 核糖体DNA
ercc = sce.var_names[sce.var_names.str.match(r'^ERCC-')] # 外源DNA
sce.var_names[sce.var_names.str.match(r'.*\.[0-9]')] # 匹配有小数点的基因
sce.var['mt'] = sce.var_names.str.match(r'^MT-')
sc.pp.calculate_qc_metrics(sce, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)
# 3. 筛选
sce = sce[sce.obs.pct_counts_mt < 15, :]
sce = sce[sce.obs.total_counts < 25000, :]

# stander pipline
# =============================================================================
# 1. 去文库 + log
# 2. 高变基因
# 3. 标准化
# 4. pca
# 5. 构建图
# 6. 聚类
# 7. tsne
# =============================================================================
sc.pp.normalize_total(sce, target_sum=1e6) 
sc.pp.log1p(sce)
sc.pp.regress_out(sce, ['total_counts', 'pct_counts_mt'])
sc.pp.scale(sce)
sc.tl.pca(sce, svd_solver='arpack', random_state=42)
sc.pp.neighbors(sce, n_neighbors=50, n_pcs=20, random_state=42)
sc.tl.leiden(sce, resolution=0.3, random_state=42)
sc.tl.tsne(sce, n_pcs=20, n_jobs=30)
sc.pl.tsne(sce, color='leiden')

如果您觉得我的文章对您有帮助,请点赞+关注,可以的话打个赏奖励一杯星巴克(~ ̄(OO) ̄)ブ

Best Regards,  
Yuan.SH;
School of Basic Medical Sciences,  
Fujian Medical University,  
Fuzhou, Fujian, China.  
please contact with me via the following ways:  
(a) e-mail :yuansh3354@163.com  
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值