TIGRAMITE是一个python包,运用PCMCI,从时间序列中发现因果关系。它可以基于PCMCI框架从离散或连续时间序列中提取因果图和滞后图,并绘制出高质量的结果图。
本文以四个变量为例,介绍如何使用TIGRAMITE重构因果图。
基础准备:
- Python 3.8
- Numpy
- TIGERAMITE (Installation: git clone, cd clone dirname, python setup.py install)
数据准备:
import numpy as np
import matplotlib.pyplot as plt
import tigramite
from tigramite import data_processing as pp
from tigramite import plotting as tp
from tigramite.pcmci import PCMCI
from tigramite.independence_tests import parcorr, gpdc, cmiknn
def dyn(x1,x2,x3,x4):
x1 = -0.287*x2 + np.random.normal()
x2 = 0.4*x2 + 0.287*x1+ np.random.normal()
x3 = 0.9*x3 + np.random.normal()
x4 = 0.9 * x2 + np.random.normal()
return x1,x2,x3,x4
x1_ini,x2_ini,x3_ini,x4_ini = np.random.rand(4,1) #初态
timestep = 200 #时间步长
data = np.expand_dims(np.concatenate((x1_ini,x2_ini,x3_ini,x4_ini ), axis=0),axis=0) #shape = [1,4]
for step in range(timestep): # run dynamics
x1_ini,x2_ini,x3_ini,x4_ini = dyn(x1_ini,x2_ini,x3_ini,x4_ini)
temp = np.expand_dims(np.concatenate((x1_ini,x2_ini,x3_ini,x4_ini), axis=0),axis=0)
data = np.concatenate((data, temp), axis=0)
print(data.shape) #shape = [timestep+1,4]
var_names = ['x1', 'x2', 'x3', 'x4']
dataframe = pp.DataFrame(data, datatime=np.arange(len(data)), var_names=var_names)
绘画时间序列
tp.plot_timeseries(dataframe,label_fontsize=20,tick_label_size=10)
寻找lag time
correlations = pcmci.get_lagged_dependencies(tau_max=20, val_only=True)['val_matrix']
lag_func_matrix = tp.plot_lagfuncs(val_matrix=correlations, setup_args={'var_names':var_names,'x_base':5, 'y_base':.5})
绘画因果图
ParCorr = parcorr.ParCorr(significance='analytic')
pcmci = PCMCI(
dataframe=dataframe,
cond_ind_test=ParCorr,
verbosity=1)pcmci.verbosity = 1
tau_max=2
results = pcmci.run_pcmci(tau_max=tau_max, pc_alpha=None)
q_matrix = pcmci.get_corrected_pvalues(p_matrix=results['p_matrix'], tau_max=tau_max, fdr_method='fdr_bh')
tp.plot_graph(graph = results['graph'],
val_matrix=results['val_matrix'],
var_names=var_names,
link_colorbar_label='MCI',
node_colorbar_label='auto-MCI',
link_label_fontsize=20,
label_fontsize=20,
tick_label_size=14,
node_label_size=20,
edge_ticks=0.5,
node_ticks=0.5,
node_size=0.5
)
绘画因果时序图
tp.plot_time_series_graph(
figsize=(4, 4),
val_matrix=results['val_matrix'],
graph = results['graph'],
var_names=var_names,
link_colorbar_label='MCI',
label_fontsize=20,
tick_label_size=20
)
PCMCI is described here: J. Runge, P. Nowack, M. Kretschmer, S. Flaxman, D. Sejdinovic, Detecting and quantifying causal associations in large nonlinear time series datasets. Sci. Adv. 5, eaau4996 (2019)
ps: 如果你想用selected_links这个功能
selected_links={i:{} for i in range(self.node_num)}
for n1, n2 in links:
for lag in range(tau_min, tau_max + 1):
if n1 in selected_links:
selected_links[n1][(n2,-lag)] = '-->' # Dictionary of form {j:{(i, -tau): link_type, ...}, ...}
else:
selected_links[n1] = {(n2,-lag):'-->'}...
results = pcmci.run_pcmci( tau_min=tau_min, tau_max=tau_max, pc_alpha=None, link_assumptions=selected_links)