红酒的质量预测:https://www.kaggle.com/uciml/red-wine-quality-cortez-et-al-2009/tasks?taskId=4684
ActivateFuncs.py
import pandas as pd
import os
import numpy as np
def ReLu(x):
return np.maximum(0,x)
def deriv_ReLu(x):
ret = x
ret[ret<=0]=0
ret[ret>0]=1
return ret
main.py
import pandas as pd
import os
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import ActivateFuncs
from sklearn.preprocessing import StandardScaler
#参数设置
hidden_layer_number = 5 #隐藏层数
hidden_neuron_number = 11 #隐藏层神经元数
feature_number = 11 #特征数
learn_rate = 0.01 #学习率
max_epochs = 100 #循环迭代次数
sample_number = 1600#样本数
mse_final = 6.5e-4 #设置一个均方误差的阈值,小于它则停止迭代
#读取数据
trainFile = "C:\\Users\Mr.K\\Desktop\\机器学习:红酒质量预测\\winequality-red.csv"
data = pd.read_csv(trainFile);#读取到的是dataframe类型
data = np.array(data)
traindata = data[slice(0,1600)]
#矩阵化
inputs = traindata[0:sample_number,0:11].T #输入数据
y_hat = traindata[0:sample_number,11:12].T #真实质量值
ss=StandardScaler()
inputs = ss.fit_transform(inputs)
#从0开始
W = []
B = []
#初始化W,B
for i in range(0,hidden_layer_number):
W.append(np.random.randn(11, 11) * 0.05)
B.append(np.ones((11,1)))
#输出层W
W.append(np.random.randn(1,11)*0.05)
B.append(np.ones((1,1)))
#从1开始,Z[1]表示第一层隐藏层
Z = [np.zeros((hidden_neuron_number,1))]
A = [inputs]
for layer in range(0,hidden_layer_number):#初始化
Z.append(np.zeros((hidden_neuron_number,1)))
A.append(np.zeros((hidden_neuron_number,1)))
Z.append(np.array([[0]]))
A.append(np.array([[0]]))
mse_history=[] #空列表,存储迭代的误差
dw = []
db = []
dz = []
err0 = 0
for i in range(0,max_epochs):
for layer in range(1,hidden_layer_number+1):
Z[layer] = np.dot(W[layer-1],A[layer-1])+B[layer-1]
A[layer] = ActivateFuncs.ReLu(Z[layer])
#输出层传播
Z[hidden_layer_number+1] = np.dot(W[hidden_layer_number], A[hidden_layer_number]) + B[hidden_layer_number]
A[hidden_layer_number+1] = ActivateFuncs.ReLu(Z[hidden_layer_number+1])
err = np.average(np.square(y_hat - A[hidden_layer_number + 1]))
mse_history.append(err)
#反向传播
dz = A[hidden_layer_number+1]-y_hat
for layer in range(hidden_layer_number+1,0,-1):
dw = np.dot(dz , A[layer-1].T) / sample_number
db = np.sum(dz,axis=1,keepdims=True) / sample_number
dz = np.dot(W[layer-1].T , dz) * ActivateFuncs.deriv_ReLu(Z[layer-1])
W[layer-1] -= learn_rate * dw
B[layer-1] -= learn_rate * db
err = np.average(np.square(y_hat-A[hidden_layer_number+1]))
if i==0: err0 = err
print('第%d次训练完成! 错误率:'%(i)+str(err))
print('错误率下降'+str(err0-err))
#损失值画图
print(mse_history)
loss=np.log10(mse_history)
min_mse=min(loss)
plt.plot(loss,label='loss')
plt.plot([0,len(loss)],[min_mse,min_mse],label='min_mse')
plt.xlabel('iteration')
plt.ylabel('MSE loss')
plt.title('Log10 MSE History',fontdict={'fontsize':18,'color':'red'})
plt.legend()
plt.show()