空间转录组数据分析之CNV burden和CNV聚类(python版本)

作者,Evil Genius
最近确实头疼,晚上睡不着,白天睡不醒。
还有一些之前的同事失业了,偶尔交流一下
日子反正一直在过,经历一下低谷未必是坏事。
这一篇我们来分享一下关于空间的一些分析。
我们来看看聚类

如果我们聚类的目的是划分区域,那么空间至少有6种聚类方式。
  • 形态学划分
  • 分子聚类
  • 细胞聚类
  • CNV聚类
  • QuPath划分
  • SME聚类,即考虑空间邻域的聚类方式。
我们这一篇就分享一下CNV聚类的相关内容,拿到如下的结果

废话就不多说了,直接来示例代码
大家直接先要分析inferCNV,这个分享了很多次了,不再重复了。



from IPython.display import display, HTML, Image
display(HTML("<style>.container { width:95% !important; }</style>"))

%load_ext autoreload
%autoreload 2

import warnings
warnings.filterwarnings("ignore")

import os
import copy

import numpy as np
import pandas as pd
import scanpy as sc
# import scFates as scf
import scanpy.external as sce
from anndata import AnnData
# import palantir

import matplotlib
from matplotlib import cm
import matplotlib.pyplot as plt
import matplotlib.patheffects as path_effects

## fix palantir breaking down some plots
import seaborn
seaborn.reset_orig()
%matplotlib inline
sc.set_figure_params()
# scf.set_figure_pubready()

import sys
sys.path.append("../../lib")
from stpalette import palette1
from plots import plotSpatialAll
from utils import loadCNVfromInferCNV
from utils import loadAdImage

sc.settings.verbosity = 3
sc.settings.logfile = sys.stdout

model = 'WM4007'

dataPath = '../../data/'
preprocessedCasperCNVDataPath = 'c:/Projects/A_ST/output_casper_%s/' % model
preprocessedInferCNVDataPath = 'c:/Projects/A_ST/inferCNV_results_%s/' % model

ids = sorted(np.loadtxt(dataPath + 'ids_%s_ST.txt' % model, dtype=str))
sids = [id[7:12] for id in ids]

palette1.update({sid: cm.terrain(0.01 + i/len(sids)) for i, sid in enumerate(sids)})

df_infercnv_cnv, df_infercnv_meta = loadCNVfromInferCNV(dataPath + 'For_inferCNV_%s_meta.data.tsv.gz' % model, 
                                                        [preprocessedInferCNVDataPath + 'infercnv.references.txt',
                                                         preprocessedInferCNVDataPath + 'infercnv.observations.txt'])

print(model)
se = pd.Series(index=pd.MultiIndex.from_frame(df_infercnv_meta).droplevel(0), data=(df_infercnv_cnv!=0).mean(axis=0).values)
se.groupby(level=[0,1]).mean().unstack().fillna(0).style.background_gradient(axis=None).applymap(lambda x: 'background-color: transparent; color: transparent' if x==0 else '')

print(model)
se.groupby(level=[0]).mean().to_frame().T.style.background_gradient(axis=None)

se.groupby(level=[0]).mean().to_frame().style.background_gradient(axis=None)

ad_all = sc.read(dataPath + 'ad_all_human_clustered_st_%s.h5ad' % model)
print(ad_all.shape)

ad_all.obs['CNV burden per clone'] = se.groupby(level=[0]).mean().reindex(ad_all.obs['cluster']).values

ad_all.obs['CNV burden'] = (df_infercnv_cnv!=0).astype(int).mean(axis=0).reindex(ad_all.obs.index).values

preprocessedStDataPath = 'c:/Projects/A_ST/from HPCC 11 28 2022/results_NF1-nod-t2t-k35/%s/' % model
ads = {id: ad_all[ad_all.obs['sample']==id, :] for id in ids}
images = {id: sc.read(preprocessedStDataPath + '%s/st_adata_processed.h5ad' % id).uns['spatial'] for id in ids}
for sample in ids[:]:
    ads[sample] = ad_all[ad_all.obs['sample']==sample, :].copy()    
    ads[sample].uns['spatial'] = images[sample]

plotSpatialAll(ads, ids=[id for id in ids if '_S2_' in id], identity='CNV burden', palette=palette1, f=0.75, fy=1.025, nx=6, panelSize=4, wspace = 0.25, hspace = 0.2)
plotSpatialAll(ads, ids=[id for id in ids if '_S2_' in id], identity='CNV burden per clone', palette=palette1, f=0.75, fy=1.025, nx=6, panelSize=4, wspace = 0.25, hspace = 0.2)

ad_cnv = sc.read(dataPath + 'ad_all_human_clustered_cnv_%s.h5ad' % model)
ad_cnv.obs['CNV burden'] = (ad_cnv.to_df().T!=0).astype(int).mean(axis=0).values
ad_cnv.obs['CNV burden amplifications'] = (ad_cnv.to_df().T>0).astype(int).mean(axis=0).values
ad_cnv.obs['CNV burden deletions'] = (ad_cnv.to_df().T<0).astype(int).mean(axis=0).values
ad_cnv.obs[['original_barcode', 'id', 'CNV burden', 'CNV burden amplifications', 'CNV burden deletions']].to_csv(dataPath + 'CNV_burden_%s.csv' % model)
ad_cnv.obs['CNV burden'].hist(bins=100, alpha=0.5)
(ad_cnv.obs['CNV burden amplifications']*2).hist(bins=100, alpha=0.5)
(ad_cnv.obs['CNV burden deletions']*2).hist(bins=100, alpha=0.5)


palette1.update({id: cm.terrain(0.01 + i/len(ids)) for i, id in enumerate(ids)})

ad_all = sc.read(dataPath + 'ad_all_human_clustered_st_%s.h5ad' % model)


df_infercnv_cnv, df_infercnv_meta = loadCNVfromInferCNV(dataPath + 'For_inferCNV_%s_meta.data.tsv.gz' % model, 
                                                        [preprocessedInferCNVDataPath + 'infercnv.references.txt',
                                                         preprocessedInferCNVDataPath + 'infercnv.observations.txt'])


ad_cnv = sc.AnnData(df_infercnv_cnv.T)
ad_cnv.obs = df_infercnv_meta.loc[ad_cnv.obs.index]

ad_cnv.uns['cluster_colors'] = ad_all.uns['%s_colors' % rna_cluster_identity]
ad_cnv.uns['sample_colors'] = ad_all.uns['sample_colors']
ad_cnv.uns['time_colors'] = ad_all.uns['T_colors'] #[2:]
ad_cnv.obs['original_barcode'] = ad_all.obs['original_barcode'].loc[ad_cnv.obs.index]
ad_cnv.obs['id'] = ad_cnv.obs['sample'].replace(dict(zip(sids, ids)))

sc.pp.pca(ad_cnv, n_comps=100, zero_center=True, use_highly_variable=False)
pca_projections = pd.DataFrame(ad_cnv.obsm['X_pca'], index=ad_cnv.obs_names)

sc.pp.neighbors(ad_cnv, n_neighbors=100, use_rep='X_pca', metric='correlation')
sc.tl.umap(ad_cnv)
plt.rcParams["figure.figsize"] = (7, 7)
sc.pl.embedding(ad_cnv, color=['time', 'cluster'], basis='X_umap', title='time', cmap='nipy_spectral', size=50)

# generate neighbor draph in multiscale diffusion space
dm_res = palantir.utils.run_diffusion_maps(pca_projections)
ms_data = palantir.utils.determine_multiscale_space(dm_res, n_eigs=10)
ad_cnv.obsm['X_palantir']=ms_data.values
sc.pp.neighbors(ad_cnv, n_neighbors=100, use_rep='X_palantir', metric='correlation')

sc.tl.umap(ad_cnv)
plt.rcParams["figure.figsize"] = (4, 4)
sc.pl.embedding(ad_cnv, color=['time', 'cluster'], basis="X_umap", title='time', cmap='nipy_spectral', size=25)

res = 0.25
plt.rcParams["figure.figsize"] = (4, 4)
sc.tl.leiden(ad_cnv, key_added='cnv_clusters', resolution=res)
sc.pl.embedding(ad_cnv, color=['cnv_clusters'], basis="X_umap", title='time', palette=palette1)

Load images and create ADs

images = {id: [(ad_temp := sc.read(preprocessedStDataPath + '%s/st_adata_processed.h5ad' % id)).uns['spatial'], ad_temp.obs.index, ad_temp.obsm['spatial']] for id in ids}
ads = dict()
for sample in ids:
    ads[sample] = ad_cnv[ad_cnv.obs['id']==sample, :].copy()    
    ads[sample].uns['spatial'] = images[sample][0]
    ads[sample].obsm['spatial'] = pd.DataFrame(index=images[sample][1], data=images[sample][2]).reindex(ads[sample].obs['original_barcode']).values

plotSpatialAll(ads, identity='cnv_clusters', palette=palette1, f=0.75)

生活很好,有你更好
  • 6
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值