流形正则化极限学习机代码

流形正则化极限学习是在正则化极限学习机的基础上引入流形正则化,将流形正则化引入到正则化极限学习机的目标函数上。目标函数如下。

使用公式得到隐藏层和输出层之间的权值矩阵

代码如下

#!usr/bin/python3
# coding:utf-8

from numpy import *
import pandas as pd
import datetime
from sklearn.model_selection import train_test_split


# 寻找k个邻居
# inX: dataSet:数据集 k:k个邻居
def knn(inX, dataSet, k):
    # dataSetSize: 数据个数
    dataSetSize = dataSet.shape[0]
    # 相异矩阵
    diffMat = tile(inX, (dataSetSize, 1)) - dataSet
    sqDiffMat = array(diffMat) ** 2
    # 对每一条数据进行相加
    sqDistances = sqDiffMat.sum(axis=1)
    distances = sqDistances ** 0.5
    # 进行排序
    sortedDistIndicies = distances.argsort()
    return sortedDistIndicies[0:k]


# 得到拉普拉斯矩阵
# dataMat:数据集 k:k个邻居 t:t是一个参数
def laplaEigen(dataMat, k, t):
    m, n = shape(dataMat)
    W = mat(zeros([m, m]))
    D = mat(zeros([m, m]))
    for i in range(m):
        k_index = knn(dataMat[i, :], dataMat, k)
        for j in range(k):
            sqDiffVector = dataMat[i, :] - dataMat[k_index[j], :]
            sqDiffVector = array(sqDiffVector) ** 2
            sqDistances = sqDiffVector.sum()
            W[i, k_index[j]] = math.exp(-sqDistances / t)
            D[i, i] += W[i, k_index[j]]
    L = D - W
    return L


# L:拉普拉斯矩阵 M:隐藏层节点个数 C1:正则化因子 C2:流型正则化因子
def MRELM(x_train, y_train, x_test, y_test, L, M, C1, C2):
    #############训练阶段############
    # 训练数据的开始时间
    time_train_start = datetime.datetime.now()
    # 训练样本大小
    N_train = int(shape(x_train)[0])
    # 输入数据维数
    n = int(shape(x_train)[1])
    # 标签的类别
    m = int(shape(y_train)[1])
    # 测试样本的大小
    N_test = int(shape(x_test)[0])
    # 随机生成输入层和隐藏层之间的权值矩阵w
    w = random.rand(n, M) * 2 - 1
    # 随机生成训练数据隐藏层的偏置b_train
    b_train = (random.rand(1, M) * 2 - 1) + ones((N_train, 1))
    # 通过计算得到训练数据隐藏层的输出矩阵H_temp_train
    H_temp_train = dot(x_train, w) + b_train
    del x_train
    del b_train
    # 使用sigmod函数进行转换得到H_train
    H_train = 1 / (1 + exp(-H_temp_train))
    del H_temp_train
    # 通过计算得到隐藏层和输出层之间的权值矩阵beta 公式:beta = inv(H.T*H + I/C1 + (H.T*L*H)/C2) H.T * Y
    beta = dot(dot(
        linalg.inv((eye(shape(H_train)[1]) / float(C1)) + dot(H_train.T, H_train) + dot(dot(H_train.T, L), H_train)),
        H_train.T) / float(C2), y_train)
    # print "beta:\n", beta
    print("beta size:\n", shape(beta))
    # 训练数据的预测值
    y_train_predict = dot(H_train, beta)
    del H_train
    # 训练数据的结束时间
    time_train_end = datetime.datetime.now()
    # 训练数据的时间
    train_time = time_train_end - time_train_start
    #############训练阶段结束############

    #############测试阶段############
    # 测试数据的开始时间
    time_test_start = datetime.datetime.now()
    # 随机生成测试数据隐藏层的偏置b_test
    b_test = random.rand(1, M) + ones((N_test, 1))
    # 通过计算得到训练数据隐藏层的输出矩阵H_temp_test
    H_temp_test = dot(x_test, w) + b_test
    del x_test
    del b_test
    # 使用sigmod函数进行转换得到H_train
    H_test = 1 / (1 + exp(-H_temp_test))
    del H_temp_test
    # 测试数据的预测值
    y_test_predict = dot(H_test, beta)
    del H_test
    # 测试数据的结束时间
    time_test_end = datetime.datetime.now()
    # 测试数据的时间
    test_time = time_test_end - time_test_start
    #############训练阶段结束############

    #############计算准确率############
    #  对训练和测试分类错误进行计数
    MissClassify_train = 0
    MissClassify_test = 0

    for i in range(len(y_train)):
        location_train = argmax(y_train[i, :])
        location_predict = argmax(y_train_predict[i, :])
        if location_train != location_predict:
            MissClassify_train += 1

    # 训练正确率
    Training_accuracy = 1 - float(MissClassify_train) / len(y_train)

    for i in range(len(y_test)):
        location_train = argmax(y_test[i, :])
        location_predict = argmax(y_test_predict[i, :])
        if location_train != location_predict:
            MissClassify_test += 1

    # 测试正确率
    Testing_accuracy = 1 - float(MissClassify_test) / len(y_test)

    print("训练正确率:", Training_accuracy)
    print("测试正确率:", Testing_accuracy)
    print("训练时间:", train_time)
    print("测试时间:", test_time)
    return


def main():
    # 导入数据,数据有16的属性
    # data 数据类型class 'pandas.core.frame.DataFrame'
    data = pd.read_csv("C:/Users/54376/Desktop/data/data.csv")
    # 去掉表中的序号从1到1308
    data = data[
        ['CON_STAT', 'CPU_USED', 'MEM_TOTAL', 'MEM_USED', 'ETH0_RECV_BYTES', 'ETH0_RECV_PKTS', 'ETH2_SEND_BYTES',
         'ETH2_SEND_PKTS', 'FD_PROCESS_CPU', 'FD_PROCESS_MEM', 'OTHER_PRO_CPU', 'SYS_UPTIME', 'DF_RESTART', 'SN',
         'AVG_LEN',
         'FDPKTS']]

    # 导入标签
    # 标签数据类型<class 'pandas.core.series.Series'>
    label = pd.read_csv("C:/Users/54376/Desktop/data/label.csv")
    # 去掉表中的序号从1到1308
    label = label['YL']

    # 查看数据的数量(1308, 16)
    print(data.shape)
    # 查看标签的数量(1308,)
    print(label.shape)

    # 对数据进行分割,train:test = 8:2比例
    X_train, X_test, y_train, y_test = train_test_split(data, label, train_size=0.8, random_state=1)

    # 训练数据
    # print("训练数据:\n", X_train)
    X_train = array(X_train)
    print("训练数据大小:\n", shape(X_train))
    print(type(X_train))
    # 训练标签
    y_train = array(pd.get_dummies(y_train))  # 1*6的矩阵
    # print("训练标签:\n", y_train)
    print("训练标签大小:\n", shape(y_train))

    # 测试数据
    # print("测试数据:\n", X_test)
    X_test = array(X_test)
    print("训练数据大小:\n", shape(X_test))
    print(type(X_test))
    # 测试标签
    y_test = array(pd.get_dummies(y_test))
    # print("测试标签:\n", y_test)
    print("测试标签大小:\n", shape(y_test))

    L = laplaEigen(X_train, 15, 5.0)

    print("拉普拉斯矩阵大小:\n", shape(L))
    MRELM(X_train, y_train, X_test, y_test, L, 20, 0.1, 0.2)
    return


if __name__ == '__main__':
    main()

  • 5
    点赞
  • 40
    收藏
    觉得还不错? 一键收藏
  • 14
    评论
评论 14
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值