加一层非线性,相比于单纯的softmax,效果立马就好了啊。
import numpy as np
import matplotlib.pyplot as plt
N = 100;
D = 2;
K = 3;
X = np.zeros((N*K,D))
y = np.zeros(N*K,dtype = 'uint8')
for j in xrange(K):
ix = range(N*j,N*(j+1));
r = np.linspace(0,1,N);
#t = np.linspace(j*np.pi*2/3,(j+1)*np.pi*2/3,N) + np.random.randn(N)*0.2
t = np.linspace(j*4,(j+1)*4,N) + np.random.randn(N)*0.2
X[ix] = np.c_[r*np.sin(t),r*np.cos(t)]
y[ix] = j
plt.scatter(X[:,0],X[:,1],c = y,s = 40,cmap = plt.cm.Spectral)
#plt.show();
h = 100;
W = 0.01 * np.random.randn(D,h);
b = np.zeros((1,h));
W2 = 0.01 * np.random.randn(h,K);
b2 = np.zeros((1,K));
step_size = 1e-0;
reg = 1e-3;
num_examples = X.shape[0];
for i in xrange(10000):
hidden_layer = np.maximum(0,np.dot(X,W)+b);
scores = np.dot(hidden_layer,W2)+b2
exp_scores = np.exp(scores);
probs = exp_scores / np.sum(exp_scores,axis = 1,keepdims = True);
correct_logprobs = -np.log(probs[range(num_examples),y]);
data_loss = np.sum(correct_logprobs) / num_examples;
reg_loss = 0.5 * reg * np.sum(W*W) + 0.5 * reg * np.sum(W2*W2);
loss = data_loss + reg_loss;
if i%1000 == 0:
print('iteration %d: loss %f'%(i,loss));
dscores = probs;
dscores[range(num_examples),y] -= 1;
dscores /= num_examples;
dW2 = np.dot(hidden_layer.T,dscores);
db2 = np.sum(dscores , axis = 0 , keepdims = True);
dhidden = np.dot(dscores , W2.T);
dhidden[hidden_layer <=0 ] = 0;
dW = np.dot(X.T , dhidden);
db = np.sum(dhidden,axis = 0,keepdims = True);
dW2 += reg * W2;
dW += reg * W;
W -= dW * step_size
b -= db * step_size
W2 -= dW2 * step_size
b2 -= db2 * step_size
hidden_layer = np.dot(X,W)+b;
hidden_layer = np.maximum(0,np.dot(X,W)+b);
scores = np.dot(hidden_layer,W2)+b2;
predicted_class = np.argmax(scores,axis = 1);
print('training accuracy: %.2f'%(np.mean(predicted_class == y)))
iteration 0: loss 1.098560
iteration 1000: loss 0.309795
iteration 2000: loss 0.273101
iteration 3000: loss 0.253078
iteration 4000: loss 0.249456
iteration 5000: loss 0.248533
iteration 6000: loss 0.248164
iteration 7000: loss 0.247937
iteration 8000: loss 0.247742
iteration 9000: loss 0.247512
iteration 10000: loss 0.247349
iteration 11000: loss 0.247238
iteration 12000: loss 0.247140
iteration 13000: loss 0.247068
iteration 14000: loss 0.247005
iteration 15000: loss 0.246956
iteration 16000: loss 0.246909
iteration 17000: loss 0.246871
iteration 18000: loss 0.246841
iteration 19000: loss 0.246812
training accuracy: 0.99