相关性、偏相关性和切线空间嵌入,其中切线空间嵌入 优于标准相关性和偏相关。
Dadi 2019
# 导入数据
from nilearn import datasets
development_dataset = datasets.fetch_development_fmri(n_subjects=30)
msdl_data = datasets.fetch_atlas_msdl()
msdl_coords = msdl_data.region_coords
n_regions = len(msdl_coords)
# ROI的信号提取
# 这里使用了filter和detrend,事实上dparsf会自动做这些,所以这行代码可以去掉,如果dparsf没有做,就带上。
from nilearn import input_data
masker = input_data.NiftiMapsMasker(
msdl_data.maps, resampling_target="data", t_r=2, detrend=True,
low_pass=.1, high_pass=.01, memory='nilearn_cache', memory_level=1).fit()
# 计算区域信号并提取有用的表型信息
children = []
pooled_subjects = []
groups = [] # child or adult
for func_file, confound_file, phenotypic in zip(
development_dataset.func,
development_dataset.confounds,
development_dataset.phenotypic):
time_series = masker.transform(func_file, confounds=confound_file)
pooled_subjects.append(time_series)
if phenotypic['Child_Adult'] == 'child':
children.append(time_series)
groups.append(phenotypic['Child_Adult'])
# 1.计算ROI的相关性(full correlation)
from nilearn.connectome import ConnectivityMeasure
correlation_measure = ConnectivityMeasure(kind='correlation')
correlation_matrices = correlation_measure.fit_transform(children)
mean_correlation_matrix = correlation_measure.mean_
# 显示前3个孩子的连接组矩阵
from nilearn import plotting
from matplotlib import pyplot as plt
_, axes = plt.subplots(1, 3, figsize=(15, 5))
for i, (matrix, ax) in enumerate(zip(correlation_matrices, axes)):
plotting.plot_matrix(matrix, tri='lower', colorbar=False, axes=ax,
title='correlation, child {}'.format(i))
# 换一种可视化方式
plotting.plot_connectome(mean_correlation_matrix, msdl_coords,
title='mean correlation over all children')
# 2.计算ROI的偏相关
partial_correlation_measure = ConnectivityMeasure(kind='partial correlation')
partial_correlation_matrices = partial_correlation_measure.fit_transform(
children)
_, axes = plt.subplots(1, 3, figsize=(15, 5))
for i, (matrix, ax) in enumerate(zip(partial_correlation_matrices, axes)):
plotting.plot_matrix(matrix, tri='lower', colorbar=False, axes=ax,
title='partial correlation, child {}'.format(i))
# 换一种可视化方式
plotting.plot_connectome(
partial_correlation_measure.mean_, msdl_coords,
title='mean partial correlation over all children')
#3.计算切线空间。我们可以用两种相关和部分相关的组级的捕捉可重复的连接模式,切线空间比上述两种方式可重复性更好
tangent_measure = ConnectivityMeasure(kind='tangent')
tangent_matrices = tangent_measure.fit_transform(children)
_, axes = plt.subplots(1, 3, figsize=(15, 5))
for i, (matrix, ax) in enumerate(zip(tangent_matrices, axes)):
plotting.plot_matrix(matrix, tri='lower', colorbar=False, axes=ax,
title='tangent offset, child {}'.format(i))
下面是三种方法的图:
可以看到,直接的相关性分析,强度都比较高,但是它的假阳性高;偏相关的话,连接都变得很弱;而切线空间就处于中间,连接强度不是太高也不是太低。
测试哪种连接计算对分类最强大
from sklearn.svm import LinearSVC
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.metrics import accuracy_score
import numpy as np
kinds = ['correlation', 'partial correlation', 'tangent']
_, classes = np.unique(groups, return_inverse=True)
cv = StratifiedShuffleSplit(n_splits=15, random_state=0, test_size=5)
pooled_subjects = np.asarray(pooled_subjects)
scores = {}
for kind in kinds:
scores[kind] = []
for train, test in cv.split(pooled_subjects, classes):
# *ConnectivityMeasure* can output the estimated subjects coefficients
# as a 1D arrays through the parameter *vectorize*.
connectivity = ConnectivityMeasure(kind=kind, vectorize=True)
# build vectorized connectomes for subjects in the train set
connectomes = connectivity.fit_transform(pooled_subjects[train])
# fit the classifier
classifier = LinearSVC().fit(connectomes, classes[train])
# make predictions for the left-out test subjects
predictions = classifier.predict(
connectivity.transform(pooled_subjects[test]))
# store the accuracy for this cross-validation fold
scores[kind].append(accuracy_score(classes[test], predictions))
显示结果
mean_scores = [np.mean(scores[kind]) for kind in kinds]
scores_std = [np.std(scores[kind]) for kind in kinds]
plt.figure(figsize=(6, 4))
positions = np.arange(len(kinds)) * .1 + .1
plt.barh(positions, mean_scores, align='center', height=.05, xerr=scores_std)
yticks = [k.replace(' ', '\n') for k in kinds]
plt.yticks(positions, yticks)
plt.gca().grid(True)
plt.gca().set_axisbelow(True)
plt.gca().axvline(.8, color='red', linestyle='--')
plt.xlabel('Classification accuracy\n(red line = chance level)')
plt.tight_layout()
Dadi et al 2019 表明,在许多队列和临床问题中,切线类型应该是首选。
可以看到,切线空间的分配精度比其他两种方式要高。
因此,建议用tangent来计算ROI之间的功能连接。
上面是学习了功能连接的相关性分析、稀疏逆协方差分析、切线空间分析,更推荐使用切线空间。
总的来说,切线空间的代码如下,其实也很简单。
measure = ConnectivityMeasure(kind='tangent')
connectivities = measure.fit([time_series_1, time_series_2, ...])
group_connectivity = measure.mean_