python 如何计算转移熵?

这个工具箱里有:

https://github.com/nmtimme/Neuroscience-Information-Theory-Toolbox

上述的工具箱是matlab撰写的, 里边很多函数,没有整理;

下边的这个工具箱,相对完整,

Time Series Measures — PyInform 0.2.0 documentation

这个工具箱考虑了:

source  X, target Y

以及背景噪声:background,W

import numpy as np
from pyinform.transferentropy import *

k = 2
#生成两个随机序列
source = [0,1,1,1,1,0,0,0,0]
target = [0,0,1,1,1,1,0,0,0]
print("the tranfer entropy of X -> Y is {:.2f}".format(transfer_entropy(source, target, k=k)))

the tranfer entropy of X -> Y is 0.68

计算转移熵的时候,只能在0 1 序列上进行嘛?如果在普通连续信号(比如神经元的钙信号)上计算呢?

----------------------

还有一个博客,介绍了如何计算传递熵,写的还算详细:

机器学习——建立因果连系(传递熵)_Alphoseven的博客-程序员宅基地 - 程序员宅基地

最核心的一块是,计算联合条件概率和边缘概率分布。以及条件概率分布:

-----------------------

还有一个计算转移熵的仓库:

https://github.com/majianthu/pycopent/blob/master/copent/copent.py

#---
from scipy.special import digamma
from scipy.stats import rankdata as rank
from math import gamma, log, pi
from numpy import array, ndarray, abs, max, sum, sqrt, square, vstack, zeros
from numpy.random import normal as rnorm

##### calculating distance matrix
def dist(x, dtype = 2):
    (N,d) = x.shape
    xd = ndarray(shape = (N,N), dtype = float)

    for i in range(0,N):
        for j in range(i,N):
            if dtype == 1:
                xd[i,j] = sqrt(sum(square(x[i,:]-x[j,:])))
                xd[j,i] = xd[i,j]
            else: ## dtype = 2
                xd[i,j] = max(abs(x[i,:]-x[j,:]))
                xd[j,i] = xd[i,j]

    return xd
##### calculating the distance between two samples
def dist_ij(xi,xj, dtype = 2):
    if dtype == 1:
        return sqrt(sum(square(xi-xj)))
    else: ## dtype = 2
        return max(abs(xi-xj))
def construct_empirical_copula(x):
    (N,d) = x.shape
    xc = np.zeros([N,d])
    for i in range(0,d):
        xc[:,i] = rank(x[:,i]) / N

    return xc
##### constructing empirical copula density [1]
def construct_empirical_copula(x):
    (N,d) = x.shape
    xc = zeros([N,d])
    for i in range(0,d):
        xc[:,i] = rank(x[:,i]) / N

    return xc
##### Estimating entropy with kNN method [2]
def entknn(x, k = 3, dtype = 2, mode = 1):
    (N,d) = x.shape

    g1 = digamma(N) - digamma(k)

    if dtype == 1: # euciledean distance
        cd = pi**(d/2) / 2**d / gamma(1+d/2)
    else:  # maximum distance
        cd = 1;

    logd = 0
    if mode == 1: # for speed / small data
        distx = dist(x, dtype)
        for i in range(0,N):
            distx[i,:].sort()
            logd = logd + log( 2 * distx[i,k] ) * d / N
    else: # 2, for space / large data
        for i in range(0,N):
            disti = []
            for j in range(0,N):
                disti.append( dist_ij(x[i,:],x[j,:],dtype) )
            disti.sort()
            logd = logd + log( 2 * disti[k] ) * d / N

    return (g1 + log(cd) + logd)
##### 2-step Nonparametric estimation of copula entropy [1]
def copent(x, k = 3, dtype = 2, mode = 1, log0 = False):
    xarray = array(x)

    if log0:
        (N,d) = xarray.shape
        max1 = max(abs(xarray), axis = 0)
        for i in range(0,d):
            if max1[i] == 0:
                xarray[:,i] = rnorm(0,1,N)
            else:
                xarray[:,i] = xarray[:,i] + rnorm(0,1,N) * max1[i] * 0.000005

    xc = construct_empirical_copula(xarray)

    try:
        return -entknn(xc, k, dtype, mode)
    except ValueError: # log0 error
        return copent(x, k, dtype, mode, log0 = True)
##### conditional independence test [3]
##### to test independence of (x,y) conditioned on z
def ci(x, y, z, k = 3, dtype = 2, mode = 1):
    xyz = vstack((x,y,z)).T
    yz = vstack((y,z)).T
    xz = vstack((x,z)).T
    return copent(xyz,k,dtype,mode) - copent(yz,k,dtype,mode) - copent(xz,k,dtype,mode)
##### estimating transfer entropy from y to x with lag [3]
def transent(x, y, lag = 1, k = 3, dtype = 2, mode = 1):
    xlen = len(x)
    ylen = len(y)
    if (xlen > ylen):
        l = ylen
    else:
        l = xlen
    if (l < (lag + k + 1)):
        return 0
    x1 = x[0:(l-lag)]
    x2 = x[lag:l]
    y = y[0:(l-lag)]
    return ci(x2,y,x1,k,dtype,mode)

# source = np.array([[0,1,1,1,1,0,0,0,0],[0,0,1,1,1,1,0,0,0]])
source = array([0,1,1,1,1,0,0,0,0])
target = array([0,0,1,1,1,1,0,0,0])

print("te is {:.2f}".format(transent(source, target)))

te is 1.09

----------------------

奇怪不同的方法, 算出来的结果差距这么大么?

-----------------

还有一个计算转移熵的代码:

https://github.com/nmtimme/Neuroscience-Information-Theory-Toolbox

% [te_result, ci_result, all_te] = ASDFTE(asdf, j_delay, i_order, j_order, windowsize)
% Parameters:
%   asdf        - Time series in Another Spike Data Format (ASDF)
%   j_delay     - Number of bins to lag sender (j series) or a vector [default 1]
%   i_order     - Order of receiver [default 1]
%   j_order     - Order of sender [default 1]
%   windowsize  - window size used for Coincidence Index calculation (odd number only)
%
% Returns:
%   te_result - (nNeu, nNeu) NxN matrix where N(i, j) is the transfer entropy from i->j
%               If multiple j_delay is given, this is a peak value of TE over delay.
%   ci_result - (nNeu, nNeu) NxN matrix where N(i, j) is the Coincidence Index from i->j
%               Multiple delays are necessary to calculate it.
%   all_te    - (nNeu, nNeu, delays) NxNxd matrix where N(i, j, k) is the transfer entropy
%               from i->j at delay of jdelay(k). For those who need all the delays.
%
% Examples:
% >> te = ASDFTE(asdf); % gives you delay 1 TE.
% >> [maxte, ci] = ASDFTE(asdf, 1:30); % gives you TEPk and TECI based on delay of 1 to 30.
% >> [maxte, ci] = ASDFTE(asdf, 1:30, 2, 3); % gives you HOTEPk and HOTECI (k=2, l=3) based on delay of 1 to 30.
% >> [maxte, ci, te] = ASDFTE(asdf, 1:30, 1, 1, 3); % gives you TEPk and TECI with CI window of 3, also gives you TE at all delays.

%==============================================================================
% Copyright (c) 2011, The Trustees of Indiana University
% All rights reserved.
%
% Authors: Michael Hansen (mihansen@indiana.edu), Shinya Ito (itos@indiana.edu)
%
% Redistribution and use in source and binary forms, with or without
% modification, are permitted provided that the following conditions are met:
%
%   1. Redistributions of source code must retain the above copyright notice,
%      this list of conditions and the following disclaimer.
%
%   2. Redistributions in binary form must reproduce the above copyright notice,
%      this list of conditions and the following disclaimer in the documentation
%      and/or other materials provided with the distribution.
%
%   3. Neither the name of Indiana University nor the names of its contributors
%      may be used to endorse or promote products derived from this software
%      without specific prior written permission.
%
% THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
% AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
% IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
% ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
% LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
% CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
% SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
% INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
% CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
% ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
% POSSIBILITY OF SUCH DAMAGE.
%==============================================================================

function [te_result, ci_result, all_te] = ASDFTE(asdf, j_delay, i_order, j_order, windowsize)
% Set defaults
if nargin < 2
    j_delay = 1;
end

if nargin < 3
    i_order = 1;
end

if nargin < 4
    j_order = 1;
end

if nargin < 5
    windowsize = 5;
end

if length(j_delay) == 1
    te_result = transent(asdf, j_delay, i_order, j_order); % Single delay
    return;
else
    % Multiple delays
    num_delays = length(j_delay);
    info = asdf{end};
    num_neurons = info(1);

    % Allocate space for all matrices
    all_te = zeros(num_neurons, num_neurons, num_delays);

    % Compute TE for delay times
    for d = 1:num_delays % Change this for to parfor for parallelization.
        all_te(:, :, d) = transent(asdf, j_delay(d), i_order, j_order);
    end

    % Reduce to final matrix
    te_result = max(all_te, [], 3); % reduction in 3rd dimension

    ci_result = zeros(num_neurons);
    if nargout > 1
        for i = 1:num_neurons
            for j = 1:num_neurons
                ci_result(i, j) = CIReduce(all_te(i, j, :), windowsize);
            end
        end
    end
end % if multiple delays
 

-----------------

此外,还有个工具箱,能够计算转移熵;

JIDT

 输入如下:

 

输出如下:

所以,从结果来看,还是推荐使用pyinfor,或者JIDT


                
  • 2
    点赞
  • 22
    收藏
    觉得还不错? 一键收藏
  • 6
    评论
评论 6
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值