流形正则化极限学习是在正则化极限学习机的基础上引入流形正则化,将流形正则化引入到正则化极限学习机的目标函数上。目标函数如下。
使用公式得到隐藏层和输出层之间的权值矩阵
代码如下
#!usr/bin/python3
# coding:utf-8
from numpy import *
import pandas as pd
import datetime
from sklearn.model_selection import train_test_split
# 寻找k个邻居
# inX: dataSet:数据集 k:k个邻居
def knn(inX, dataSet, k):
# dataSetSize: 数据个数
dataSetSize = dataSet.shape[0]
# 相异矩阵
diffMat = tile(inX, (dataSetSize, 1)) - dataSet
sqDiffMat = array(diffMat) ** 2
# 对每一条数据进行相加
sqDistances = sqDiffMat.sum(axis=1)
distances = sqDistances ** 0.5
# 进行排序
sortedDistIndicies = distances.argsort()
return sortedDistIndicies[0:k]
# 得到拉普拉斯矩阵
# dataMat:数据集 k:k个邻居 t:t是一个参数
def laplaEigen(dataMat, k, t):
m, n = shape(dataMat)
W = mat(zeros([m, m]))
D = mat(zeros([m, m]))
for i in range(m):
k_index = knn(dataMat[i, :], dataMat, k)
for j in range(k):
sqDiffVector = dataMat[i, :] - dataMat[k_index[j], :]
sqDiffVector = array(sqDiffVector) ** 2
sqDistances = sqDiffVector.sum()
W[i, k_index[j]] = math.exp(-sqDistances / t)
D[i, i] += W[i, k_index[j]]
L = D - W
return L
# L:拉普拉斯矩阵 M:隐藏层节点个数 C1:正则化因子 C2:流型正则化因子
def MRELM(x_train, y_train, x_test, y_test, L, M, C1, C2):
#############训练阶段############
# 训练数据的开始时间
time_train_start = datetime.datetime.now()
# 训练样本大小
N_train = int(shape(x_train)[0])
# 输入数据维数
n = int(shape(x_train)[1])
# 标签的类别
m = int(shape(y_train)[1])
# 测试样本的大小
N_test = int(shape(x_test)[0])
# 随机生成输入层和隐藏层之间的权值矩阵w
w = random.rand(n, M) * 2 - 1
# 随机生成训练数据隐藏层的偏置b_train
b_train = (random.rand(1, M) * 2 - 1) + ones((N_train, 1))
# 通过计算得到训练数据隐藏层的输出矩阵H_temp_train
H_temp_train = dot(x_train, w) + b_train
del x_train
del b_train
# 使用sigmod函数进行转换得到H_train
H_train = 1 / (1 + exp(-H_temp_train))
del H_temp_train
# 通过计算得到隐藏层和输出层之间的权值矩阵beta 公式:beta = inv(H.T*H + I/C1 + (H.T*L*H)/C2) H.T * Y
beta = dot(dot(
linalg.inv((eye(shape(H_train)[1]) / float(C1)) + dot(H_train.T, H_train) + dot(dot(H_train.T, L), H_train)),
H_train.T) / float(C2), y_train)
# print "beta:\n", beta
print("beta size:\n", shape(beta))
# 训练数据的预测值
y_train_predict = dot(H_train, beta)
del H_train
# 训练数据的结束时间
time_train_end = datetime.datetime.now()
# 训练数据的时间
train_time = time_train_end - time_train_start
#############训练阶段结束############
#############测试阶段############
# 测试数据的开始时间
time_test_start = datetime.datetime.now()
# 随机生成测试数据隐藏层的偏置b_test
b_test = random.rand(1, M) + ones((N_test, 1))
# 通过计算得到训练数据隐藏层的输出矩阵H_temp_test
H_temp_test = dot(x_test, w) + b_test
del x_test
del b_test
# 使用sigmod函数进行转换得到H_train
H_test = 1 / (1 + exp(-H_temp_test))
del H_temp_test
# 测试数据的预测值
y_test_predict = dot(H_test, beta)
del H_test
# 测试数据的结束时间
time_test_end = datetime.datetime.now()
# 测试数据的时间
test_time = time_test_end - time_test_start
#############训练阶段结束############
#############计算准确率############
# 对训练和测试分类错误进行计数
MissClassify_train = 0
MissClassify_test = 0
for i in range(len(y_train)):
location_train = argmax(y_train[i, :])
location_predict = argmax(y_train_predict[i, :])
if location_train != location_predict:
MissClassify_train += 1
# 训练正确率
Training_accuracy = 1 - float(MissClassify_train) / len(y_train)
for i in range(len(y_test)):
location_train = argmax(y_test[i, :])
location_predict = argmax(y_test_predict[i, :])
if location_train != location_predict:
MissClassify_test += 1
# 测试正确率
Testing_accuracy = 1 - float(MissClassify_test) / len(y_test)
print("训练正确率:", Training_accuracy)
print("测试正确率:", Testing_accuracy)
print("训练时间:", train_time)
print("测试时间:", test_time)
return
def main():
# 导入数据,数据有16的属性
# data 数据类型class 'pandas.core.frame.DataFrame'
data = pd.read_csv("C:/Users/54376/Desktop/data/data.csv")
# 去掉表中的序号从1到1308
data = data[
['CON_STAT', 'CPU_USED', 'MEM_TOTAL', 'MEM_USED', 'ETH0_RECV_BYTES', 'ETH0_RECV_PKTS', 'ETH2_SEND_BYTES',
'ETH2_SEND_PKTS', 'FD_PROCESS_CPU', 'FD_PROCESS_MEM', 'OTHER_PRO_CPU', 'SYS_UPTIME', 'DF_RESTART', 'SN',
'AVG_LEN',
'FDPKTS']]
# 导入标签
# 标签数据类型<class 'pandas.core.series.Series'>
label = pd.read_csv("C:/Users/54376/Desktop/data/label.csv")
# 去掉表中的序号从1到1308
label = label['YL']
# 查看数据的数量(1308, 16)
print(data.shape)
# 查看标签的数量(1308,)
print(label.shape)
# 对数据进行分割,train:test = 8:2比例
X_train, X_test, y_train, y_test = train_test_split(data, label, train_size=0.8, random_state=1)
# 训练数据
# print("训练数据:\n", X_train)
X_train = array(X_train)
print("训练数据大小:\n", shape(X_train))
print(type(X_train))
# 训练标签
y_train = array(pd.get_dummies(y_train)) # 1*6的矩阵
# print("训练标签:\n", y_train)
print("训练标签大小:\n", shape(y_train))
# 测试数据
# print("测试数据:\n", X_test)
X_test = array(X_test)
print("训练数据大小:\n", shape(X_test))
print(type(X_test))
# 测试标签
y_test = array(pd.get_dummies(y_test))
# print("测试标签:\n", y_test)
print("测试标签大小:\n", shape(y_test))
L = laplaEigen(X_train, 15, 5.0)
print("拉普拉斯矩阵大小:\n", shape(L))
MRELM(X_train, y_train, X_test, y_test, L, 20, 0.1, 0.2)
return
if __name__ == '__main__':
main()