参考cs231n作业一的softmax
import numpy as np
import matplotlib.pyplot as plt
def load_data(path):
with np.load(path) as f:
x_train, y_train = f['x_train'].astype('float32').reshape(60000,784), f['y_train']
x_test, y_test = f['x_test'].astype('float32').reshape(10000,784), f['y_test']
return x_train, y_train, x_test, y_test
x_train, y_train, x_test, y_test = load_data('dataset/mnist.npz')
print(type(x_train),x_train.shape)
print(type(x_test),x_test.shape)
print(type(y_train),y_train.shape)
print(type(y_test),y_test.shape)
#<class 'numpy.ndarray'> (60000, 784)
#<class 'numpy.ndarray'> (10000, 784)
#<class 'numpy.ndarray'> (60000,)
#<class 'numpy.ndarray'> (10000,)
mean_image=np.mean(x_train,axis=0)
x_train-=mean_image
x_test-=mean_image
class SoftmaxReg(object):
#def __init__(self):
def train(self,w,x,y,lr=1e-3,reg=1e-5,batch_size=256):
num_train,dim=x.shape
num_classes=np.max(y)+1
x_batch=None
y_batch=None
indices=np.random.choice(num_train,batch_size)
x_batch=x[indices]
y_batch=y[indices]
loss,grad=self.softmax_loss(w,x_batch,y_batch,reg)
# w-=lr*grad
return loss,grad
def predict(self,x,w):
y_pred=np.argmax(np.dot(x,w),axis=1)
return y_pred
def softmax_loss(self,W, X, y, reg):
"""
Softmax loss function, vectorized version.
Inputs and outputs are the same as softmax_loss_naive.
"""
# Initialize the loss and gradient to zero.
loss = 0.0
dW = np.zeros_like(W)
#############################################################################
# TODO: Compute the softmax loss and its gradient using no explicit loops. #
# Store the loss in loss and the gradient in dW. If you are not careful #
# here, it is easy to run into numeric instability. Don't forget the #
# regularization! #
#############################################################################
N = X.shape[0]
f = np.dot(X, W) # f.shape = N, C
f -= f.max(axis = 1).reshape(N, 1)#防止exp溢出
s = np.exp(f).sum(axis = 1)
loss = np.log(s).sum() - f[range(N), y].sum()
counts = np.exp(f) / s.reshape(N, 1)
counts[range(N), y] -= 1
dW = np.dot(X.T, counts)
loss = loss / N + 0.5 * reg * np.sum(W * W)#正则化
dW = dW / N + reg * W
return loss, dW
best_train=-1
best_test=-1
num_iters=1500
best_w= None
softmax = SoftmaxReg()
lr=5e-6
w=0.0001*np.random.randn(784,10)
for it in range(num_iters):
loss,grad=softmax.train(w,x_train,y_train,lr=lr,reg=1e-5,batch_size=256)
if it %50==0:
y_pred_train = softmax.predict(x_train,w)
acc_train = np.mean(y_pred_train== y_train)
y_pred=softmax.predict(x_test,w)
test_acc=np.mean(y_test==y_pred)
print('iteration %d/%d:loss %f,test_acc %f'%(it,num_iters,loss,test_acc))
if test_acc > best_test:
best_test = test_acc
best_w = w
w-=lr*grad
y_pred=softmax.predict(x_test,best_w)
test_acc=np.mean(y_test==y_pred)
print("test_acc:",test_acc)