最大信息系数MIC的python代码

完整代码

import numpy as np
import logging
import sys
import pandas as pd

class CyrusMIC(object):
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)
    screen_handler = logging.StreamHandler(sys.stdout)
    screen_handler.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s - %(module)s.%(funcName)s:%(lineno)d - %(levelname)s - %(message)s')
    screen_handler.setFormatter(formatter)
    logger.addHandler(screen_handler)
    def __init__(self,x_num=[None,None],y_num=[None,None]):
        self.x_max_num = x_num[1]
        self.x_min_num = x_num[0]
        self.y_min_num = y_num[0]
        self.y_max_num = y_num[1]
        self.x = None
        self.y = None
    def cal_mut_info(self,p_matrix):
        """
        计算互信息值
        :param p_matrix: 变量X和Y的构成的概率矩阵
        :return: 互信息值
        """
        mut_info = 0
        p_matrix = np.array(p_matrix)
        for i in range(p_matrix.shape[0]):
            for j in range(p_matrix.shape[1]):
                if p_matrix[i,j] != 0:
                    mut_info += p_matrix[i,j]*np.log2(p_matrix[i,j]/(p_matrix[i,:].sum()*p_matrix[:,j].sum()))
        self.logger.info("信息系数为:{}".format(mut_info/np.log2(min(p_matrix.shape[0],p_matrix.shape[1]))))
        return mut_info/np.log2(min(p_matrix.shape[0],p_matrix.shape[1]))

    def divide_bin(self,x_num,y_num):
        """
        指定在两个变量方向上需划分的网格数,返回概率矩阵
        :param x_num:
        :param y_num:
        :return: p_matrix
        """
        p_matrix = np.zeros([x_num,y_num])
        x_bin = np.linspace(self.x.min(),self.x.max()+1,x_num+1)
        y_bin = np.linspace(self.y.min(),self.y.max()+1,y_num+1)
        for i in range(x_num):
            for j in range(y_num):
                p_matrix[i,j] = sum([1 if (self.x[value] < x_bin[i + 1] and self.x[value] >= x_bin[i] and self.y[value] < y_bin[j + 1] and
                      self.y[value] >= y_bin[j]) else 0 for value in range(self.x.shape[0])])/self.x.shape[0]
        return p_matrix

    def cal_MIC(self,x,y):
        self.x = np.array(x).reshape((-1,))
        self.y = np.array(y).reshape((-1,))
        if not self.x_max_num:
            self.x_max_num = int(round(self.x.shape[0]**0.3,0))
            self.y_max_num = self.x_max_num
            self.x_min_num = 2
            self.y_min_num = 2
        mics = []
        for i in range(self.x_min_num,self.x_max_num+1):
            for j in range(self.y_min_num,self.x_max_num+1):
                self.logger.info("划分区间数量为:[{},{}]".format(i,j))
                mics.append(self.cal_mut_info(self.divide_bin(i,j)))
        self.logger.info("最大信息系数为:{}".format(max(mics)))
        return max(mics)

if __name__ == '__main__':
    import matplotlib.pyplot as plt
    from matplotlib.font_manager import FontProperties
    font_set = FontProperties(fname=r"c:\windows\fonts\simsun.ttc", size=15)
    df = pd.read_excel("xxx.xls")
    x = df['a']
    y = df['b']
    fig = plt.figure(figsize=(5, 5))
    ax = fig.add_subplot(1, 1, 1)
    ax.set_title('xxx', fontsize=11, fontstyle='oblique', fontproperties=font_set)
    ax.set_xlabel('xxx', fontsize=11, fontfamily='sans-serif', fontstyle='italic', fontproperties=font_set)
    ax.set_ylabel('xxx', fontsize=11, fontstyle='oblique', fontproperties=font_set)
    plt.scatter(x, y, c='g')
    mic_tool = CyrusMIC()
    mic_tool.cal_MIC(x, y)
    plt.show()

效果展示
在这里插入图片描述

  • 1
    点赞
  • 19
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

慕斯-ing

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值