神经网络部分。要先把理论知识搞的比较清楚才好写作业。
梯度推导参考:
http://www.jianshu.com/p/004c99623104multiclass
梯度推导:
调整参数部分,一开始想要把参数综合起来找一个最优参数,结果发现一次训练跑了一个小时,于是我就对每个参数进行单独训练,找到一个最好的参数,保留下来,换另一个参数。最后的测试正确率有54.8%
import numpy as np
import matplotlib.pyplot as plt
class TwoLayerNet(object):
"""
A two-layer fully-connected neural network. The net has an input dimension of
N, a hidden layer dimension of H, and performs classification over C classes.
We train the network with a softmax loss function and L2 regularization on the
weight matrices. The network uses a ReLU nonlinearity after the first fully
connected layer.
In other words, the network has the following architecture:
input - fully connected layer - ReLU - fully connected layer - softmax
The outputs of the second fully-connected layer are the scores for each class.
"""
def __init__(self, input_size, hidden_size, output_size, std=1e-4):
"""
Initialize the model. Weights are initialized to small random values and
biases are initialized to zero. Weights and biases are stored in the
variable self.params, which is a dictionary with the following keys:
W1: First layer weights; has shape (D, H)
b1: First layer biases; has shape (H,)
W2: Second layer weights; has shape (H, C)
b2: Second layer biases; has shape (C,)
Inputs:
- input_size: The dimension D of the input data.
- hidden_size: The number of neurons H in the hidden layer.
- output_size: The number of classes C.
"""
self.params = {}
self.params['W1'] = std * np.random.randn(input_size, hidden_size)
self.params['b1'] = np.zeros(hidden_size)
self.params['W2'] = std * np.random.randn(hidden_size, output_size)
self.params['b2'] = np.zeros(output_size)
def loss(self, X, y=None, reg=0.0):
"""
Compute the loss and gradients for a two layer fully connected neural
network.
Inputs:
- X: Input data of shape (N, D). Each X[i] is a training sample.
- y: Vector of training labels. y[i] is the label for X[i], and each y[i] is
an integer in the range 0 <= y[i] < C. This parameter is optional; if it
is not passed then we only return scores, and if it is passed then we
instead return the loss and gradients.
- reg: Regularization strength.
Returns:
If y is None, return a matrix scores of shape (N, C) where scores[i, c] is
the score for class c on input X[i].
If y is not None, instead return a tuple of:
- loss: Loss (data loss and regularization loss) for this batch of training
samples.
- grads: Dictionary mapping parameter names to gradients of those parameters
with respect to the loss function; has the same keys as self.params.
"""
# Unpack variables from the params dictionary
W1, b1 = self.params['W1'], self.params['b1']
W2, b2 = self.params['W2'], self.params['b2']
N, D = X.shape
# Compute the forward pass
scores = None
h1 = np.maximum(0,np.dot(X,W1)+b1)
scores = np.dot(h1,W2)+b2
#############################################################################
# TODO: Perform the forward pass, computing the class scores for the input. #
# Store the result in the scores variable, which should be an array of #
# shape (N, C). #
#############################################################################
pass
#############################################################################
# END OF YOUR CODE #
#############################################################################
# If the targets are not given then jump out, we're done
if y is None:
return scores
# Compute the loss
loss = None
scores = scores - np.reshape(np.max(scores,axis=1),(N,-1))
p = np.exp(scores)/np.reshape(np.sum(np.exp(scores),axis=1),(N,-1))
loss = -sum(np.log(p[np.arange(N),y]))/N
loss += 0.5*reg*np.sum(W1*W1)+0.5*reg*np.sum(W2*W2)
#############################################################################
# TODO: Finish the forward pass, and compute the loss. This should include #
# both the data loss and L2 regularization for W1 and W2. Store the result #
# in the variable loss, which should be a scalar. Use the Softmax #
# classifier loss. So that your results match ours, multiply the #
# regularization loss by 0.5 #
#############################################################################
pass
#############################################################################
# END OF YOUR CODE #
#############################################################################
# Backward pass: compute gradients
grads = {}
dscores = p
dscores[range(N),y]-=1.0
dscores/=N
dW2 = np.dot(h1.T,dscores)
dh2 = np.sum(dscores,axis=0,keepdims=False)
da2 = np.dot(dscores,W2.T)
da2[h1<=0]=0
dW1 = np.dot(X.T,da2)
dh1 = np.sum(da2,axis=0,keepdims=False)
dW2 += reg*W2
dW1 += reg*W1
grads['W1']=dW1
grads['b1']=dh1
grads['W2']=dW2
grads['b2']=dh2
##########################################################