10X空间转录组数据分析之CNV轨迹层级

作者,Evil Genius
今天我们要完成一项工程,那就是CNV层级轨迹,如下图:

首先来看一下什么是CNV谱系
拷贝数变异(CNVs)在细胞增殖过程中在基因组中积累,提供了系统发育谱系的特征。通过转录组数据上使用intercnv生成CNV谱,能够以空间和时间分辨率跟踪克隆系统发育。
文章空间转录组数据分析之CNV burden和CNV聚类(python版本)已经做了基础的CNV分析和聚类,这个是前提。
CNV谱系如下:

我们来完成这项工程,示例数据大家自行下载,大家都是生信资深人员了,中间过程就不啰嗦了。
from IPython.display import display, HTML, Image
display(HTML("<style>.container { width:95% !important; }</style>"))

%load_ext autoreload
%autoreload 2

import warnings
warnings.filterwarnings("ignore")

import os
import copy

import numpy as np
import pandas as pd
import scanpy as sc
import scanpy.external as sce
from anndata import AnnData

import matplotlib
from matplotlib import cm
import matplotlib.pyplot as plt
import matplotlib.patheffects as path_effects

import sys
sys.path.append("../../lib")
from stpalette import palette_WM4007_rna, palette_WM4237_rna
from plots import plotSpatialAll, plotSpatialEdge, euclidean_p1
from cheatmap import sMED, cdist, cplot
from utils import loadAdImage, filterCNV

model = 'WM4007'
palette_rna = palette_WM4007_rna if model=='WM4007' else palette_WM4237_rna

preprocessedStDataPath = 'd:/from HPCC 11 28 2022/results_NF1-nod-t2t-k35/%s/' % model
dataPath = '../../data/'
ids = sorted(np.loadtxt(dataPath + 'ids_%s_ST.txt' % model, dtype=str))
sids = [id[7:12] for id in ids]
from scipy.spatial.distance import cdist, pdist, squareform

ad_all = sc.read(dataPath + 'ad_all_human_clustered_st_%s.h5ad' % model)
ad_all = ad_all[ad_all.obs['human_ratio']>=0.25]
filter_spots_index = ad_all.obs.index.copy()
读取空间数据

以下内容为付费内容,定价 ¥200.00点击修改

ad_cnv = sc.read(dataPath + 'ad_all_human_clustered_cnv_%s.h5ad' % model)
ad_cnv = ad_cnv[ad_cnv.obs.index.intersection(filter_spots_index), :]

plt.rcParams["figure.figsize"] = (4, 3)
sc.pl.embedding(ad_cnv[ad_cnv.obs.index.intersection(filter_spots_index), :], color=['cluster', 'cnv_clusters'], basis="X_umap", title='time', palette=palette_rna, size=25)

dfc = ad_cnv.obs[['cnv_clusters', 'sample']].value_counts().unstack().fillna(0).astype(int)
dfc

cnv Burden
use_raw_distances = False
metric = 'cosine'

if use_raw_distances:
    df_cnv = ad_cnv.to_df()
    df_cnv.index = ad_cnv.obs['cnv_clusters']
else:
    df_cnv = pd.DataFrame(ad_cnv.obsm['X_pca'], index=ad_cnv.obs['cnv_clusters'])
    
uclusters = list(ad_cnv.obs['cnv_clusters'].unique())

res = dict()
for c1 in uclusters:
    for c2 in uclusters:
        if (c1!=c2) and (not (c1, c2) in res.keys()) and (not (c2, c1) in res.keys()):
            print(c1, '\t', c2, '\t', df_cnv.loc[c1].shape[0], df_cnv.loc[c2].shape[0])
            d = cdist(df_cnv.loc[c1], df_cnv.loc[c2], metric=metric).mean() # euclidean cosine
            res.update({(c1, c2): d})
df_pdist = pd.Series(res).unstack()
df_ppdist = df_pdist.reindex(uclusters).T.reindex(uclusters).fillna(0.) + df_pdist.reindex(uclusters).T.reindex(uclusters).T.fillna(0.)

df_ppdist = df_pdist.reindex(uclusters).T.reindex(uclusters).fillna(0.) + df_pdist.reindex(uclusters).T.reindex(uclusters).T.fillna(0.)
df_styled = df_ppdist.style.background_gradient(axis=None, vmin=df_ppdist[df_ppdist!=0].min().min(), cmap='bwr')
df_styled.to_excel(f'{model}_PCA_cosine_distance_average_over_cnv_clusters.xlsx')
df_styled

Load spatial
images = {id: [(ad_temp := sc.read(preprocessedStDataPath + '%s/st_adata_processed.h5ad' % id)).uns['spatial'], ad_temp.obs.index, ad_temp.obsm['spatial']] for id in ids}

ads = dict()
for sample in ids:
    print(sample, end='\t')
    ads[sample] = ad_cnv[ad_cnv.obs['id']==sample, :].copy()    
    ads[sample].uns['spatial'] = images[sample][0]
    ads[sample].obsm['spatial'] = pd.DataFrame(index=images[sample][1], data=images[sample][2]).reindex(ads[sample].obs['original_barcode']).values
    ads[sample][:, :] = filterCNV(ads[sample].to_df().T, n=10).T

plotSpatialAll(ads, identity='cnv_clusters', palette=palette_rna, f=0.75)

plotSpatialAll({id: ads[id] for id in ids if not '_TE_' in id and '_S2_' in id}, identity='cnv_cosine_distance', palette=palette_rna, nx=6, f=0.6, cmap='nipy_spectral', uniformColorScale=True, wspace=0.3)

params = dict(fontsize=12, markerscale=1.5, linewidth_factor=2.0, alpha=0.5, alpha_img=0.5, decay_power=4, absolute_cutoff=None, R=1, # 20
              alpha_quantile=0.99, method='cosine', cacheEdges=False, useSavedCache=False)

res=plotSpatialEdge({id: ads[id] for id in ids if not '_TE_' in id}, identity='cluster', palette=palette_rna, f=1.0, invert=False, use_raw_distances=False, noplot=False, **params)

params = dict(fontsize=12, markerscale=1.25, maintainScale=False, linewidth_factor=2.0, alpha=0.65, alpha_img=0.5, decay_power=4, absolute_cutoff=None, R=1, # 20
              alpha_quantile=0.99, method='cosine', cacheEdges=False, useSavedCache=False)

res=plotSpatialEdge({id: ads[id] for id in ids if (('_T0_' in id) and (not '_S2_' in id)) or  (('_T1_' in id) and (not '_S2_' in id)) or (('_TC_' in id) and (not '_S1_' in id))},
                    identity='cluster', palette=palette_rna, f=0.75, nx=3, invert=False, use_raw_distances=False, noplot=False, suptitle=False,  **params)

params = dict(fontsize=12, markerscale=1.25, maintainScale=False, linewidth_factor=2.0, alpha=0.65, alpha_img=0.5, decay_power=4, absolute_cutoff=0.8, R=1, # 20
              alpha_quantile=0.99, method='cosine', cacheEdges=False, useSavedCache=False)

res=plotSpatialEdge({id: ads[id] for id in ids if (('_T0_' in id) and (not '_S2_' in id)) or  (('_T1_' in id) and (not '_S2_' in id)) or (('_TC_' in id) and (not '_S1_' in id))},
                    identity='cluster', palette=palette_rna, f=0.75, nx=3, invert=False, use_raw_distances=False, noplot=False, suptitle=False,  **params)

params = dict(fontsize=12, markerscale=1.25, maintainScale=False, linewidth_factor=2.0, alpha=0.65, alpha_img=0.5, decay_power=4, absolute_cutoff=0.8, R=1, # 20
              alpha_quantile=0.99, method='cosine', cacheEdges=False, useSavedCache=False)

res=plotSpatialEdge({id: ads[id] for id in ids if (('_T0_' in id) and (not '_S2_' in id)) or  (('_T1_' in id) and (not '_S2_' in id)) or (('_TC_' in id) and (not '_S1_' in id))},
                    identity='cnv_clusters', palette=palette_rna, f=0.75, nx=3, invert=False, use_raw_distances=False, noplot=False, suptitle=False,  **params)

cnv_palette = {str(i): cm.Set3(c-1) for i, c in enumerate([2,1,7,9,5,11,6,10,8,4,3])}
cnv_palette.update({'T0': '#DE8C00', 'T1': '#7CAE00', 'T2': '#00C08B', 'T3': '#B79F00', 'T4': '#C77CFF', 'TC': '#FF64B0', '-1': 'white'})

from vplot import wrapVplotRatio, wrapVplotScore, wrapVplotGene, vplot

params = dict(fontsize=12, markerscale=1.25, maintainScale=False, linewidth_factor=2.0, alpha=0.95, alpha_img=0.5, decay_power=4, absolute_cutoff=2.0, R=1, # 20
              alpha_quantile=0.99, method='cosine', cacheEdges=False, useSavedCache=False)

res=plotSpatialEdge({id: ads[id] for id in ids if (('_T0_' in id) and (not '_S2_' in id)) or  (('_T1_' in id) and (not '_S2_' in id)) or (('_TC_' in id) and (not '_S1_' in id))},
                    identity='cnv_clusters', palette=cnv_palette, f=0.75, nx=3, invert=False, use_raw_distances=False, noplot=False, suptitle=False,  **params)

params = dict(fontsize=12, markerscale=1.25, maintainScale=False, linewidth_factor=2.0, alpha=0.95, alpha_img=0.5, decay_power=4, absolute_cutoff=0.8, R=1, # 20
              alpha_quantile=0.99, method='cosine', cacheEdges=False, useSavedCache=False)

res=plotSpatialEdge(ads, identity='cnv_clusters', palette=cnv_palette, f=0.75, nx=4, invert=False, use_raw_distances=False, noplot=False, suptitle=False,  **params)

def wrapVplotScoreLoc(score, genesDicts=None, identity=None, selected=[], palette=None, ads=None, ids=None, vprefix='score_', **kwargs):

    if len(selected)==0:
        selected = pd.Series(np.hstack([ads[id].obs[identity] for id in ids])).sort_values().unique().tolist()

    df = pd.concat([pd.Series(ads[id][ads[id].obs[identity].isin(selected), :].obs[vprefix + score].fillna(0).values) for id in ids], keys=ids, axis=1)
    dfc = pd.concat([pd.Series(ads[id][ads[id].obs[identity].isin(selected), :].obs[identity].values.astype(int)) for id in ids], keys=ids, axis=1)
    
    return vplot(df, dfc, name=vprefix + score, palette=palette, **kwargs)

for sample in ids:
    ads[sample].obs['score_avgdist'] = ads[sample].obs['avgdist']
wrapVplotScoreLoc('avgdist', identity='cnv_clusters', palette=cnv_palette, ads=ads, ids=ids, vmin=-0.1)
# wrapVplotScoreLoc('avgdist', identity='cluster', palette=palette_rna, ads=ads, ids=ids)

def sMED(v1, v2):

    """Simple Minimum Event Distance (MED), similar (but not equivalent!) to the one implemented in MEDALT.
    MED is the minimal number and the series of single-copy gains or losses that are required to evolve one genome to another.
    """
    a = v1 - v2
    a = a[np.roll(a, -1) != a]
    a = np.abs(a[a != 0]).sum()
    return int(a)

v1 = np.array([0,0,0,0,1,1,1,1,0,0,0,0,-1,-1,-1,0,0,0,1,1,0,0,0,1])
v2 = np.array([0,0,0,0,0,1,1,0,0,0,0,0,-1,-1,-1,0,0,0,-1,-1,0,0,0,1])

plt.rcParams["figure.figsize"] = (4, 4)
ad_cnv.obs['resistant'] = ((ad_cnv.obs['cluster'].isin(['8'])) & (ad_cnv.obs['time'].isin(['T4']))).astype(int).astype(str).astype('category')
ad_cnv.obs['resistantEarly'] = ((ad_cnv.obs['cluster'].isin(['8'])) & (ad_cnv.obs['time'].isin(['T1', 'T2', 'T3']))).astype(int).astype(str).astype('category')
sc.pl.embedding(ad_cnv, color=['time', 'cluster', 'resistant', 'resistantEarly'], basis="X_umap", palette=palette_rna, size=25)

identity = 'resistant'
sc.tl.embedding_density(ad_cnv, basis='umap', groupby=identity)
sc.pl.embedding_density(ad_cnv, groupby=identity, ncols=10, s=5)

ad_all = sc.read(dataPath + 'ad_all_human_clustered_st_%s.h5ad' % model)

ad_all[(ad_all.obs['cluster'].isin(['8'])) & (ad_cnv.obs['time'].isin(['T1', 'T2', 'T3', 'T4']))

identity = 'resistantEarly'
sc.tl.embedding_density(ad_cnv, basis='umap', groupby=identity)
sc.pl.embedding_density(ad_cnv, groupby=identity, ncols=10, s=50)

sc.pl.embedding(ad_cnv, color=['resistant', 'resistantEarly'], basis="X_umap", palette=palette_rna, size=5)

Load images and create ADs
images = {id: [(ad_temp := sc.read(preprocessedStDataPath + '%s/st_adata_processed.h5ad' % id)).uns['spatial'], ad_temp.obs.index, ad_temp.obsm['spatial']] for id in ids}

ads = dict()
for sample in ids:
    print(sample, end='\t')
    ads[sample] = ad_cnv[ad_cnv.obs['id']==sample, :].copy()    
    ads[sample].uns['spatial'] = images[sample][0]
    ads[sample].obsm['spatial'] = pd.DataFrame(index=images[sample][1], data=images[sample][2]).reindex(ads[sample].obs['original_barcode']).values
    ads[sample][:, :] = filterCNV(ads[sample].to_df().T, n=10).T


params2 = dict(fontsize=12, markerscale=1.5, linewidth_factor=1.0, alpha=1.0, alpha_img=1.0, decay_power=1,
               absolute_cutoff=0.4, alpha_quantile=0.75, method='pearson', cacheEdges=False) 

df_info = plotSpatialEdge({(id:='WM4007_T3_S1_ST'): ads[id]}, identity='cluster', palette=palette_rna, f=2, invert=False, **params2)

ad = ads['WM4007_T3_S1_ST']

df_temp, df_meta_temp = ad.to_df().T, ad.obs[['time', 'cluster']].sort_values(by=['time', 'cluster'], ascending=False)
df_temp = df_temp[df_meta_temp.index]
cplot(df_temp, df_meta_temp, sampleMED=1000, clusterObsByGroups=False, optimalOrderingForObs=False,
            palette=palette_rna, clusterVar=False, clusterObs=True, addLinesOnHeatmap=False, addLinesOnGroups=False, useMEDforObsGroups=True, useMEDforObs=True,
            colorbarLabel='InferCNV CNV', colorbarLabels=['Ampl.', 'Del.'], safetyLimit=2000, figsize=[14,8], dendrogramLineWidth=1.0);

dataPath = '../../data/'
dfc_WM4007 = sc.read(dataPath + 'ad_all_scaled_unfiltered_st_WM4007.h5ad').obs.set_index(['T', 'sample'])['in_tissue'].groupby(level=[0,1]).count().replace(0, np.nan).dropna()
dfc_WM4007_h = sc.read(dataPath + 'ad_all_human_clustered_st_WM4007.h5ad').obs.set_index(['T', 'sample'])['in_tissue'].groupby(level=[0,1]).count().replace(0, np.nan).dropna()

params2 = dict(fontsize=12, markerscale=1.5, linewidth_factor=1.0, alpha=1.0, alpha_img=1.0, decay_power=2, absolute_cutoff=70, alpha_quantile=0.75, method='MED', cacheEdges=False)

plotSpatialEdge(ads, identity='cluster', palette=palette_rna, f=1., invert=False, **params2)

params = dict(fontsize=12, markerscale=1.5, linewidth_factor=1.0, alpha=1.0, alpha_img=1.0, decay_power=2, absolute_cutoff=70, method='MED', alpha_quantile=0.75, cacheEdges=True)

plotSpatialEdge(ads, identity='cluster', palette=palette_rna, f=1., invert=False, **params)

示例数据上传到了https://images.jax.org/webclient/?show=project-1451,大家有意随意下载测试。
生活很好,有你更好
  • 14
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值