Softmax损失值使用的是交叉熵函数;
交叉熵损失函数的计算方式如下:
而在CS231n作业中,正确标签的值为1,因此其交叉损失值为:
也可以写成:
通过对损失函数进行求导,可以得到损失值对权重的导数为;
python 代码如下:
def softmax_loss_naive(W, X, y, reg):
# Initialize the loss and gradient to zero.
loss = 0.0
dW = np.zeros_like(W)
num_train = X.shape[0]
num_classes = W.shape[1]
F = X.dot(W)
normalized_F = F - np.max(F, axis=1).reshape(num_train,1)
# 每一行减去每一行的最大值,而每一行即代表一组数据
exp_normalized_F = np.exp(normalized_F)
# compute loss
for i in range(num_train):
s_yi = exp_normalized_F[i][y[i]]
sum_i = np.sum(exp_normalized_F[i])
loss -= np.log(s_yi*1.0 / sum_i)
loss /= num_train
loss += reg*np.sum(np.square(W))
# compute dW
for i in range(num_train):
sum_i = np.sum(exp_normalized_F[i])
for j in range(num_classes):
dW[:, j] += (exp_normalized_F[i][j]*1.0 / sum_i)*X[i]
if j == y[i]:
dW[:, j] -= X[i] # 对于j==y[i]这种情况,根据公式,还需要额外的减去X[i]
dW /= num_train
dW += 2*reg*W
pass
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
return loss, dW
向量化计算方法
def softmax_loss_vectorized(W, X, y, reg):
"""
Softmax loss function, vectorized version.
Inputs and outputs are the same as softmax_loss_naive.
"""
# Initialize the loss and gradient to zero.
loss = 0.0
dW = np.zeros_like(W)
num_train = X.shape[0]
num_classes = W.shape[1]
F = X.dot(W)
exp_normalized_F = np.exp(F - np.max(F, axis=1).reshape(num_train,1))
# compute loss
sum_i = np.sum(exp_normalized_F, axis=1)
p_i = exp_normalized_F[range(num_train), y] / sum_i
L_i = - np.log(p_i)
loss = np.sum(L_i)
loss /= num_train
loss += reg*np.sum(W * W)
# compute gradient
acc_effect = exp_normalized_F / sum_i.reshape(num_train,1)
acc_effect[range(num_train), y] -= 1.0 # 正确标签位置还需要额外减去1
dW = X.T.dot(acc_effect)
dW /= num_train
dW += 2*reg*W
pass
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
return loss, dW