Code
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
def predict(model, X):
W, b, U, z = model['W'], model['b'], model['U'], model['z']
D = X.dot(W) + b
G = 1 / (1 + np.exp(-D))
F = G.dot(U) + z
YH = np.exp(F) / np.sum(np.exp(F), axis=1, keepdims=True)
return np.argmax(YH, axis=1)
def plot_decision_boundary(model):
x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
h = 0.01
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
Z = predict(model,np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
ax1 = plt.subplot(1, 2, 1, )
ax1.contourf(xx, yy, Z,cmap='rainbow',alpha=0.4)
ax1.scatter(X[:, 0], X[:, 1], c=y,s=5,cmap='rainbow' )
def loss_cal(X,y,model):
N=len(X)
W, b, U, z = model['W'], model['b'], model['U'], model['z']
D = X.dot(W) + b
G = 1 / (1 + np.exp(-D))
F = G.dot(U) + z
YH = np.exp(F) / np.sum(np.exp(F), axis=1, keepdims=True)
loss=1/N*np.sum(-np.log(YH[range(N),y]))
return loss
def train(X,y,num_hide_node,num_kind,it,lr):
N=len(X)
np.random.seed(0)
W=np.random.randn(2,num_hide_node) #正态分布随机数
b=np.random.randn(1,num_hide_node)
U=np.random.randn(num_hide_node,num_kind)
z=np.random.randn(1,num_kind)
loss_list = []
for i in range(it):
D=X.dot(W)+b
G=1/(1+np.exp(-D))
F=G.dot(U)+z
YH=np.exp(F)/np.sum(np.exp(F),axis=1,keepdims=True)
YH[range(N),y]=YH[range(N),y]-1
GU=1/N* (G.T).dot(YH)
Gz=1/N* np.sum(YH,axis=0,keepdims=True)
GW=1/N* (X.T).dot( (G*(1-G)) * (YH.dot(U.T)) )
Gb=1/N* np.sum((G*(1-G)) * (YH.dot(U.T)),axis=0,keepdims=True)
U=U-lr*GU
z=z-lr*Gz
W=W-lr*GW
b=b-lr*Gb
model={'W':W,'b':b,'U':U,'z':z}
if i%100 == 0 :
loss_list.append(loss_cal(X,y,model))
return model,loss_list
def five_ford_cross(X,y,num_hide_node,num_kind,it,lr):
kf = KFold(n_splits=5, shuffle=True) # 初始化KFold
acc_all=0
for train_index, test_index in kf.split(X): # 调用split方法切分数据
model,loss_list=train(X[train_index],y[train_index],num_hide_node,num_kind,it,lr)
yh=predict(model,X[test_index])
ct=0
for j in range(len(yh)):
if yh[j]==y[test_index][j]: ct=ct+1
acc=ct/len(yh)
print("acc",acc)
acc_all+=acc
return acc_all/5
path = 'C:/Users/rshs/Desktop/GMM/GMM8.txt'
data=pd.read_table(path)
X=data.iloc[:,1:3];X=X.values
Y=data.iloc[:,0:1];Y=Y.values
y=Y.reshape(len(Y))
num_hide_node=10;num_kind=8;it=20000;lr=0.05
model,loss_list=train(X,y,num_hide_node,num_kind,it,lr)
plot_decision_boundary(model)
ax2=plt.subplot(1, 2, 2)
ix=[]
for i in range(len(loss_list)):
ix.append(i*100)
ax2.plot(ix,loss_list)
plt.show()
print("5折交叉验证得到正确率:",five_ford_cross(X,y,num_hide_node,num_kind,it,lr))
Result
Derivation