# SOFTMAX - ALL IN ONE
from keras.datasets import mnist
import numpy as np
# load data
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train.reshape((X_train.shape[0], np.product(X_train.shape[1:]))).T / 255.0
X_test = X_test.reshape((X_test.shape[0], np.product(X_test.shape[1:]))).T / 255.0
# parameters
(D, N) = X_train.shape
C = len(np.unique(y_train))
eps = 1e-03
step = 0.5
# init W
W = np.zeros((D, C))
p = np.exp(W.T.dot(X_train))
p /= np.sum(p, axis=0) # 按列相加
# optimization
loss = 1e10
last_loss = 0
while np.abs(loss - last_loss) > eps:
# compute gradient
dW = X_train.dot(p.T) / N
for k in range(C):
dW[:,k] -= np.sum(X_train[:,y_train==k], axis=1) / N
# gradient descent
W -= step * dW
# compute loss
p = np.exp(W.T.dot(X_train))
p /= np.sum(p, axis=0)
last_loss = loss
loss = 0
for k in range(C):
loss += - np.sum(np.log(p[k,y_train==k])) / N
print('Loss:', loss)
# test
p = np.exp(W.T.dot(X_test))
p /= np.sum(p, axis=0)
y = np.argmax(p, axis=0)
acc = np.sum(y==y_test) * 1.0 / len(y)
print('Accuracy:', acc)