THU-HyperG中hyperg内容

最新推荐文章于 2024-09-01 21:08:00 发布

umbrellazg

最新推荐文章于 2024-09-01 21:08:00 发布

阅读量409

点赞数 10

文章标签：人工智能

本文链接：https://blog.csdn.net/m0_51576139/article/details/134628963

版权

utils.py

import time
from itertools import product
import numpy as np
from scipy.optimize import linear_sum_assignment

将带有时间戳的消息打印到控制台。
def print_log(message):
    """
    :param message: str,
    :return:
    """
    print("[{}] {}".format(time.strftime("%Y-%m-%d %X", time.localtime()), message))

#用于初始化标签矩阵
def init_label_matrix(y):
    """
    y 是输入的标签数据，Shape=(n_nodes,) 的 NumPy 数组。其中，-1 表示未标记的数据，而其他数字表示已标记的数据。
    :return:
    """
    y = y.reshape(-1)
    labels = list(np.unique(y))

    if -1 in labels:
        labels.remove(-1)

    n_nodes = y.shape[0]
    Y = np.ones((n_nodes, len(labels))) * (1/len(labels))
    for idx, label in enumerate(labels):
        Y[np.where(y == label), :] = 0
        Y[np.where(y == label), idx] = 1

    return Y

#计算准确率的函数
def calculate_accuracy(F, y_test):
    predict_y = np.argmax(F, axis=1).reshape(-1)
    return sum(predict_y == y_test) / len(predict_y)

#计算类别间交并比（Intersection over Union, IoU）的函数
def iou_socre(pred, target):
    """
    :param pred:
    :param target:
    :return:
    """
    ious = []
    n_class = target.max() + 1
    
    # IOU for background class ("0")
    for c in range(1, n_class):
        pred_idx = pred == c
        target_idx = target == c
        intersection = (pred_idx & target_idx).sum()
        union = (pred_idx | target_idx).sum()
        ious.append((intersection + 1e-6)/(union + 1e-6))

    return ious

交并比（Intersection over Union, IoU）是用于评估两个集合重叠程度的度量，通常用于计算对象检测或图像分割任务的性能。对于两个集合 A 和 B，IoU 的计算公式如下：

如果 IoU 等于 1，表示两个集合完全重叠，预测与真实标签完全一致。
如果 IoU 大于 0 且小于 1，表示存在一定的重叠，IoU 的值越接近 1，重叠越多。
如果 IoU 等于 0，表示两个集合没有重叠。

#最大最小归一化
def minmax_scale(array, ranges=(0., 1.)):
    """
    normalize to [min, max], default is [0., 1.]
    :param array: ndarray
    :param ranges: tuple, (min, max)
    :return:
    """
    _min = ranges[0]
    _max = ranges[1]
    return (_max - _min) * (array - array.min()) / (array.max() - array.min()) + _min

#用于从输入的三维数组 x 中按照指定的 patch_size 提取图像块的特征
def gather_patch_ft(x, patch_size):
    """
    :param x: M x N x C
    :param patch_size: row x column
    :return:
    """
    #检查输入张量 x 是否为三维，patch的大小必须为2
    assert len(x.shape) == 3
    assert len(patch_size) == 2
    #将输入张量 x 重塑为形状为 (M*N) x C 的二维数组，在这个重塑后的张量的开头连接一行零。
    x_row_num, x_col_num = x.shape[:2]
    x = x.reshape(-1, x.shape[2])
    x = np.concatenate([np.zeros(x.shape[1])[np.newaxis, :], x])

    # 生成输出索引
    out_idx = []
    center_row, center_col = (patch_size[0] + 1) // 2 - 1, (patch_size[1] + 1) // 2 - 1
    x_idx = np.arange(x_row_num * x_col_num).reshape(x_row_num, x_col_num)
    x_idx_pad = np.zeros((x_row_num + patch_size[0] - 1, x_col_num + patch_size[1] - 1))
    x_idx_pad[center_row:center_row + x_row_num, center_col:center_col + x_col_num] = x_idx + 1

    for _row, _col in product(range(patch_size[0]), range(patch_size[1])):
        out_idx.append(x_idx_pad[_row:_row + x_row_num, _col:_col + x_col_num].reshape(-1, 1))
    out_idx = np.concatenate(out_idx, axis=1).astype(np.long)   # MN x kk

    # 最终返回的张量 out 的形状是 M x N x (kk * C)。
    out = x[out_idx.reshape(-1)]    # MNkk x C
    out = out.reshape(x_row_num, x_col_num, -1) # M x N x kkC
    return out

#用于计算聚类的准确率。
def calculate_clustering_accuracy(y_gnd, y_pred):
    """
    参数y_gnd,y_pred表示真实的类别标签和预测的类别标签。函数的目标是通过匈牙利算法（Hungarian algorithm）计算出最佳匹配，然后计算聚类的准确率。
    """
    y_pred = y_pred.reshape(-1)
    y_gnd = y_gnd.reshape(-1)
    
    n_samples = y_gnd.shape[0]
    n_class = np.unique(y_gnd).shape[0]
    
    M = np.zeros((n_class, n_class))

    for i in range(n_samples):
        r = y_gnd[i]
        c = y_pred[i]
        M[r, c] += 1

    row_idx, col_idx = linear_sum_assignment(-M)

    map = np.zeros((n_class, n_class))
    map[row_idx, col_idx] = 1.

    acc = np.sum(M * map) / n_samples

    return acc

init.py

from . import generation
from . import learning
from . import hyperg
from . import utils
from .version import __version__

__all__ = [
    'generation',
    'learning',
    'hyperg',
    'utils'
]

不懂的可以看一下这个，解释得挺好的：

【python】__init__.py文件到底是什么？ - 知乎 (zhihu.com)

version.py:

__version__ = '0.0.3'

hyperg.py:(重点)

import time
import scipy.sparse as sparse
import numpy as np

class HyperG:
    def __init__(self, H, X=None, w=None):
        """ 
        初始化超图的关联矩阵、节点特征矩阵和超边权重向量
        :param H: scipy coo_matrix of shape (n_nodes, n_edges)
        :param X: numpy array of shape (n_nodes, n_features)
        :param w: numpy array of shape (n_edges,)
        """
        #检查H是否是稀疏矩阵，而且是二维的
        assert sparse.issparse(H)
        assert H.ndim == 2
        
        self._H = H
        self._n_nodes = self._H.shape[0]
        self._n_edges = self._H.shape[1]
        #如果X不为空，就检查X是不是np.ndarray（numpy的多维数组类型）
        if X is not None:
            assert isinstance(X, np.ndarray) and X.ndim == 2
            self._X = X
        else:
            self._X = None
        #初始化超边权重，w不为空就拉长成向量形式，为空就全置为1。
        if w is not None:
            self.w = w.reshape(-1)
            assert self.w.shape[0] == self._n_edges
        else:
            self.w = np.ones(self._n_edges)

        self._DE = None #用于存储边的阶（Degree ）的矩阵
        self._DV = None #用于存储点的阶（Degree ）的矩阵
        self._INVDE = None #用于存储边阶矩阵的逆矩阵
        self._DV2 = None #用于存储节点阶矩阵的逆平方根矩阵
        self._THETA = None #用于存储超图的θ矩阵
        self._L = None #用于存储超图的拉普拉斯矩阵

    def num_edges(self):
        return self._n_edges

    def num_nodes(self):
        return self._n_nodes

    def incident_matrix(self):
        return self._H

    def hyperedge_weights(self):
        return self.w

    def node_features(self):
        return self._X

    #计算节点的阶矩阵 
    def node_degrees(self):
        if self._DV is None:
            H = self._H.tocsr()
            dv = H.dot(self.w.reshape(-1, 1)).reshape(-1)
            self._DV = sparse.diags(dv, shape=(self._n_nodes, self._n_nodes))
        return self._DV

    #计算超边的阶矩阵 
    def edge_degrees(self):
        if self._DE is None:
            H = self._H.tocsr()
            de = H.sum(axis=0).A.reshape(-1)
            self._DE = sparse.diags(de, shape=(self._n_edges, self._n_edges))
        return self._DE
    
    #用于计算超边的阶矩阵的逆矩阵
    def inv_edge_degrees(self):
        if self._INVDE is None:
            self.edge_degrees()
            inv_de = np.power(self._DE.data.reshape(-1), -1.)
            self._INVDE = sparse.diags(inv_de, shape=(self._n_edges, self._n_edges))
        return self._INVDE
    
    #用于计算节点的阶矩阵的逆平方根矩阵。
    def inv_square_node_degrees(self):
        if self._DV2 is None:
            self.node_degrees()
            dv2 = np.power(self._DV.data.reshape(-1), -0.5)
            self._DV2 = sparse.diags(dv2, shape=(self._n_nodes, self._n_nodes))
        return self._DV2

    #计算θ的值
    def theta_matrix(self):
        if self._THETA is None:
            self.inv_square_node_degrees()
            self.inv_edge_degrees()

            W = sparse.diags(self.w)
            self._THETA = self._DV2.dot(self._H).dot(W).dot(self._INVDE).dot(self._H.T).dot(self._DV2)
        return self._THETA

    #计算拉普拉斯算子
    def laplacian(self):
        if self._L is None:
            self.theta_matrix()
            self._L = sparse.eye(self._n_nodes) - self._THETA
        return self._L

    #更新超边权重，清除与权重相关的矩阵
    def update_hyedge_weights(self, w):
        assert isinstance(w, (np.ndarray, list)), #超图应该是多维矩阵或者一个列表

        self.w = np.array(w).reshape(-1)
        assert w.shape[0] == self._n_edges

        self._DV = None
        self._DV2 = None
        self._THETA = None
        self._L = None

    #更新关联矩阵H
    def update_incident_matrix(self, H):
        assert sparse.issparse(H)
        assert H.ndim == 2
        assert H.shape[0] == self._n_nodes
        assert H.shape[1] == self._n_edges

        #清除权重相关联的缓存
        self._H = H
        self._DE = None
        self._DV = None
        self._INVDE = None
        self._DV2 = None
        self._THETA = None
        self._L = None

这里为什么要清除权重相关的矩阵不太清楚。

class IMHL:
    def __init__(self, M, omega):
        assert isinstance(M, list)
        assert isinstance(omega, np.ndarray)
        assert len(M) == omega.shape[0]

        self.M = M
        self.omega = omega