import matplotlib.pyplot as plt
import numpy as np
from sklearn.datasets import load_iris, load_digits
from sklearn.metrics import mean_squared_error
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
import seaborn as sns
from sklearn.model_selection import train_test_split
def sigmoid(x):
return 1 / (1 + np.exp(-x))
class MLP:
def __init__(self, size):
self.size = size
self.w = [np.random.normal(0.0, i ** -0.5, (i, j)) for i, j in zip(size[1:], size[:-1])]
self.b = [np.random.normal(0.0, i ** -0.5, (i, 1)) for i in size[1:]]
def predict_fun(self, X):
out = X.T
for w, b in zip(self.w, self.b):
net = np.matmul(w, out) + b
out = sigmoid(net)
out = out.T
predict = np.zeros(out.shape[0])
for i in range(out.shape[0]):
predict[i] = np.argmax(out[i])
return predict
def bp(self, x, y, lr):
new_w = [np.zeros(w.shape) for w in self.w]
new_b = [np.zeros(b.shape) for b in self.b]
out = x.T
out_list = [out]
net_list = []
'''前向传播'''
for w, b in zip(self.w, self.b):
net = np.dot(w, out) + b
out = sigmoid(net)
out_list.append(out)
net_list.append(net)
error = mean_squared_error(out, y)
print(error)
'''反向传播'''
delta = out_list[-1] * (1 - out_list[-1]) * (out_list[-1] - y)
new_b[-1] = np.sum(delta, axis=1).reshape(-1, 1)
new_w[-1] = np.dot(delta, out_list[-2].T)
# #
for i in range(2, len(self.size)):
i = -i
out = out_list[i]
delta = np.dot(self.w[i + 1].T, delta) * out * (1 - out)
new_b[i] = np.sum(delta, axis=1).reshape(-1, 1)
new_w[i] = np.dot(delta, out_list[i - 1].T)
for i in range(len(new_w)):
self.w[i] -= lr * new_w[i]
self.b[i] -= lr * new_b[i]
return out_list[-1]
def main(self, dataset, target, epoch, lr):
'''多分类转为oneHot编码'''
oneHot = np.identity(self.size[-1])
target = oneHot[target]
target = target.T
for i in range(epoch):
self.bp(dataset, target, lr)
if __name__ == '__main__':
X, y = load_digits(return_X_y=True)
input_num = len(X[0])
output_num = len(set(y))
'''建模'''
model = MLP([input_num, 40, 20, output_num])
'''训练'''
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.2)
model.main(X_train, Y_train, 3000, 0.001)
'''预测'''
predict = model.predict_fun(X_test)
'''模型评估'''
acc = accuracy_score(predict, Y_test)
print('最终准确率为:', acc)
c_m = confusion_matrix(predict, Y_test)
sns.heatmap(c_m, annot=True, vmax=1, vmin=0)
plt.show()
算法本省不难,我发现和初始化的w和b有关,用normal正态分布的话,效果真的比随机的randn好很多,这里我用的4层神经网络,效果还是很好的
最终准确率也有个96往上