基于神经网络来实现二分类问题
网络结构图如下:
输入层包含两个节点(B1是 bias 不属于输入层)
本网络包含两个隐藏层,每层有两个节点
输出层的输出 y_ 是对二分类中标签为1的预测概率,标签为0的预测概率是 1 - y_
神经网络的前向传播:
z1 = X1 * W1 + B1
a1 = sigmoid( z1 )
z2 = a1 * W2 + B2
a2 = sigmoid(z2)
z3 = a2 * W3 + B3
y_ = sigmoid(z3)
通过前向传播我们会得到对标签的预测值y_ ,定义 损失函数为 loss = - (y * log(y) + (1-y) * log(1-y))
通过最小化 loss 反向传播就可以得到一个优化解。
如何优化呢,可将loss看做是以 W1 、W2、W3、B1、B2、B3 为变量的函数,loss分别对其求偏导,便可以得到 loss 与每一个参数的关系。
# -*- coding: utf-8 -*-
"""
Created on Mon Oct 21 22:17:36 2019
@author: Icheng
"""
import numpy as np
from openpyxl import load_workbook
import matplotlib.pyplot as plt
filename = 'testdata.xlsx'
lr = 0.01
reg_lambda = 0.01
iter_num=20000
def loadData(filename):
wb = load_workbook(filename)
sheet = wb['testdata']
data = []
for row in sheet.rows:
temp = []
for i in row:
temp.append(i.value)
data.append(temp)
return np.array(data)
def sigmoid(x):
return (1)/(1+np.exp(-x))
def normalization(X):
Xmean = X.mean(axis = 0)
Xmin = X.min(axis = 0)
Xmax = X.max(axis = 0)
Xnorm = (X - Xmean)/(Xmax - Xmin)
return Xnorm
def plotData(X,y,filename='temp.png'):
plt_data = plt.figure(1)
ind1 = np.where(y==1)
ind2 = np.where(y==0)
p1 = plt.scatter(X[ind1,0],X[ind1,1],marker='s',color='red')
p2 = plt.scatter(X[ind2,0],X[ind2,1],marker='o',color='blue')
plt.legend((p1,p2),('Like',"Don't like"),loc = 'upper right')
plt_data.savefig(filename)
plt.show(plt_data)
plt.close(plt_data)
def loss(y,y_):
l = -(y*np.log(y_)+(1-y)*np.log(1-y_))
return l.mean()
def plot_decision_boundary(X,y,model):
x_min, x_max = X[:, 0].min() - 0.2, X[:, 0].max() + 0.2
y_min, y_max = X[:, 1].min() - 0.2, X[:, 1].max() + 0.2
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.005),
np.arange(y_min, y_max, 0.005))
Z = predict(np.c_[xx.ravel(), yy.ravel()],model)
Z = Z.reshape(xx.shape)
plt.figure()
# red yellow blue
plt.contourf(xx, yy, Z, cmap=plt.cm.RdYlBu, alpha=0.6)
plt.scatter(X[:, 0], X[:, 1], c=y,cmap=plt.cm.RdYlBu)
plt.show()
def predict(X,model):
W1, b1, W2, b2, W3, b3 = model['W1'], model['b1'], model['W2'], model['b2'], model['W3'], model['b3']
z1 = np.dot(X,W1) + b1
a1 = sigmoid(z1)
z2 = np.dot(a1,W2) + b2
a2 = sigmoid(z2)
z3 = np.dot(a2,W3) + b3
y_ = sigmoid(z3)
ind1 = np.where(y_ >= 0.5)
ind2 = np.where(y_ < 0.5)
y_[ind1] = 1
y_[ind2] = 0
#result = y==y_
return y_#result.mean()
def build_model(X,y):
y = y.reshape(-1,1)
model = {}
W1 = np.random.randn(6).reshape(2,3)
b1 = np.random.randn(3)
W2 = np.random.randn(9).reshape(3,3)
b2 = np.random.randn(3)
W3 = np.random.randn(3).reshape(3,1)
b3 = np.random.randn(1)
model = { 'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2, 'W3': W3, 'b3': b3}
for i in range(iter_num):
# 正向传播
z1 = np.dot(X,W1) + b1
a1 = sigmoid(z1)
z2 = np.dot(a1,W2) + b2
a2 = sigmoid(z2)
z3 = np.dot(a2,W3) + b3
y_ = sigmoid(z3)
# 反向传播
db3 = y_ - y
dW3 = np.dot(a2.T, db3)
db2 = np.dot(db3, W3.T) * a2 * (1 - a2)
dW2 = np.dot(a1.T, db2)
db1 = np.dot(db2,W2.T) * a1 * (1 - a1)
dW1 = np.dot(X.T, db1)
# 添加正则项
dW3 += reg_lambda * W3
dW2 += reg_lambda * W2
dW1 += reg_lambda * W1
# 跟新参数
b3 = b3 - lr * np.sum(db3,axis=0)
W3 = W3 - lr * dW3
b2 = b2 - lr * np.sum(db2,axis=0)
W2 = W2 - lr * dW2
b1 = b1 - lr * np.sum(db1,axis=0)
W1 = W1 - lr * dW1
model = { 'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2, 'W3': W3, 'b3': b3}
l = loss(y,y_)
if i % 1000 == 0:
print("{1} loss {0}".format(l,i))
return model
if __name__ == '__main__':
data = loadData(filename)
X = data[:,:2]
y = data[:,2]
# plotData(X,y)
X = normalization(X)
model = build_model(X,y)
# plotData(X,y)
# W,b = BGD(X,y,iter_num,lr)
y_ = predict(X,model)
y_ = y_.reshape(1,-1)
acc = (y==y_).mean()
plot_decision_boundary(X,y,model)
print("acc:{0}".format(acc))
运行结果图: