简介
完全基于numpy实现一个单层的softmax回归网络,此代码主要用于学习如何求梯度,反向传播。
模型代码
import numpy as np
from ._base_network import _baseNetwork
class SoftmaxRegression(_baseNetwork):
def __init__(self, input_size=28 * 28, num_classes=10):
super().__init__(input_size, num_classes)
self._weight_init()
self.net = _baseNetwork()
self.input_size = input_size
self.num_classes = num_classes
def _weight_init(self):
np.random.seed(1024)
self.weights['W1'] = 0.001 * np.random.randn(self.input_size, self.num_classes)
self.gradients['W1'] = np.zeros((self.input_size, self.num_classes))
def forward(self, X, y, mode='train'):
X = X.reshape(-1, 28 * 28)
N, C = X.shape
X1 = np.dot(X, self.weights['W1'])
X2 = self.net.ReLU(X1)
Y = self.net.softmax(X2)
loss = self.net.cross_entropy_loss(Y, y)
accuracy = self.net.compute_accuracy(Y, y)
if mode == 'train':
gradient = np.dot(X.transpose(1, 0), self.net.cross_entropy_dev(X2, y) * self.net.ReLU_dev(X1)) / N
self.gradients['W1'] = gradient
return loss, accuracy
return loss, accuracy
骨干网络
import numpy as np
class _baseNetwork:
def __init__(self, input_size=28 * 28, num_classes=10):
self.input_size = input_size
self.num_classes = num_classes
self.weights = dict()
self.gradients = dict()
def _weight_init(self):
pass
def forward(self):
pass
def softmax(self, scores):
prob = np.zeros(scores.shape)
pro = []
for i in range(scores.shape[0]):
pro.append(np.sum(np.exp(scores[i])))
for i in range(scores.shape[0]):
for j in range(scores.shape[1]):
prob[i,j] = np.exp(scores[i,j])/pro[i]
return prob
def cross_entropy_loss(self, x_pred, y):
yy = []
for i in range(x_pred.shape[0]):
y0 = np.zeros(x_pred.shape[1])
y0[y[i]] = 1
yy.append(y0)
yy = np.array(yy)
loss = 0
for i in range(x_pred.shape[0]):
loss += -np.log(x_pred[i,y[i]])
loss = loss / (x_pred.shape[0])
return loss
def compute_accuracy(self, x_pred, y):
right = 0
for i in range(x_pred.shape[0]):
if y[i] == np.argmax(x_pred[i]):
right += 1
acc = right / x_pred.shape[0]
return acc
def sigmoid(self, X):
out = np.zeros(X.shape)
for i in range(X.shape[0]):
for j in range(X.shape[1]):
out[i,j] = 1/(1+np.exp(-X[i,j]))
return out
def sigmoid_dev(self, x):
"""
The analytical derivative of sigmoid function at x
:param x: Input data
:return: The derivative of sigmoid function at x
"""
ds = self.sigmoid(x) * (1 - self.sigmoid(x))
return ds
def ReLU(self, X):
out = np.zeros(X.shape)
for i in range(X.shape[0]):
for j in range(X.shape[1]):
if X[i,j] > 0:
out[i, j] = X[i,j]
else:
out[i, j] = 0
return out
def ReLU_dev(self, X):
out = np.zeros(X.shape)
inr = self.ReLU(X)
for i in range(X.shape[0]):
for j in range(X.shape[1]):
if inr[i,j] > 0:
out[i, j] = 1. #inr[i,j]
else:
out[i, j] = 0
return out
def cross_entropy_dev(self, x_pred, y):
yy = []
for i in range(x_pred.shape[0]):
y0 = np.zeros(x_pred.shape[1])
y0[y[i]] = 1
yy.append(y0)
yy = np.array(yy)
out = np.zeros(x_pred.shape)
sm_out = self.softmax(x_pred)
out = sm_out - yy
# 这里注意cross_entropy和softmax是默认连接在一块的,所以梯度也是一块求的,而且这样更简单
return out
注意事项
- 前向传播比较简单,反向传播的时候注意,损失对任意偏置也好,权重也好求梯度的时候,梯度的维度是与该权重/偏置是保持一致的!!
- 因此,一旦维度不一致那么一般有两种方法统一维度:①通过链式法则的前后两个梯度矩阵做矩阵乘法统一维度 ②如果是偏置如维度不一致什么的,比如求出的偏置梯度维度是(128,10),但是实际上,根据(1),偏置的梯度的维度需要跟偏置的维度保持一致即,应该维度是(128,),所以可以对某一个维度求平均或者求和(如np.mean或者np.sum)将维度统一。
- 特别注意交叉熵损失+Softmax这一个模块的反向求梯度,一般是默认连在一块求的,具体证明见下,因此不用挨个去求交叉熵模块,或者Softmax模块的梯度,而是直接用公式 gradient of module = yy - sm_out。其中gradient of module就是交叉熵+Softmax的梯度,yy是label的one-hot embedding,sm_out是logits经过Softmax的结果。
交叉熵与Softmax梯度简化求法推导过程