functions
from common.np import *
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def relu(x):
return np.maximum(0, x)
def softmax(x):
if x.ndim == 2:
x = x - x.max(axis=1, keepdims=True)
x = np.exp(x)
x /= x.sum(axis=1, keepdims=True)
elif x.ndim == 1:
x = x - np.max(x)
x = np.exp(x) / np.sum(np.exp(x))
return x
def crossentryerror(y, t):
if y.ndim == 1:
t = t.reshape(1, t.size)
y = y.reshape(1, y.size)
if t.size == y.size:
t = t.argmax(axis=1)
batch_size = y.shape[0]
return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size
if __name__ == '__main__':
a = np.array([0.3, 3, 5])
print(sum(softmax(a)))
layers
from function import *
import numpy as np
class Sigmoid:
'''
sigmoid = 1 / (1 + exp(-x))
'''
def __init__(self):
self.params = []
self.grads = []
self.out = None
def forward(self, x):
self.out = sigmoid(x)
return self.out
def backward(self,dout):
'''
y = 1 / (1 + exp(-x))
求导:y(1-y)
:param dout: 来自上一级计算得到的梯度
:return: dx 本级计算的梯度作为下一级的梯度
'''
return dout * (1 - self.out) * self.out
class Affine:
def __init__(self, W, b):
self.params = [W, b]
self.grads = [np.zeros_like(W),np.zeros_like(b)]
self.x = None
self.matmul = MatMul(W)
def forward(self, x):
W, b = self.params
out = self.matmul.forward(x) + b
return out
def backward(self,dout):
'''
W,b = self.params
dw = np.dot(self.x.T,dout)
db = np.sum(dout,axis=0)
dx = np.dot(dout,W.T)
self.grads[0][...] = dw
self.grads[1][...] = db
'''
dx = self.matmul.backward(dout)
self.grads[0][...] = self.matmul.grads[0]
self.grads[1][...] = np.sum(dout,axis=0)
return dx
class Softmax:
def __init__(self):
self.params = []
self.grads = []
self.out = None
def forward(self, x):
out = softmax(x)
self.out = out
return out
def backward(self,dout):
return dout * (1 - self.out) * self.out
class SoftmaxWithLoss:
def __init__(self):
self.params = []
self.grads = []
self.out = None
self.table = None
def forward(self, x, table):
self.table = table
self.out = softmax(x)
if self.table.size == self.out.size:
self.table = self.table.argmax(axis=1)
self.loss = crossentryerror(self.out, self.table)
return self.loss
def backward(self, dout):
batch_size = self.table.shape[0]
dx = self.out.copy()
dx[np.arange(batch_size),self.table] -= 1
dx *= dout
dx = dx / batch_size
return dx
class MatMul:
def __init__(self,W):
self.params = [W]
self.grads = [np.zeros_like(W)]
self.x = None
def forward(self,x):
W, = self.params
out = np.dot(x,W)
self.x = x
return out
def backward(self,dout):
W, = self.params
dx = np.dot(dout,W.T)
dw = np.dot(self.x.T,dout)
self.grads[0][...] = dw
return dx
optimizer
class SGD:
def __init__(self, lr=0.01):
self.lr = lr
def update(self, params, grads):
for i in range(len(params)):
params[i] -= self.lr * grads[i]
TwoLayerNet
import numpy as np
from utils.layers import *
from utils.function import *
from utils.optimizer import *
class TwoLayerNet:
def __init__(self ,input_size ,hidden_size ,output_size,init_weigth=0.01):
I ,H ,O = input_size ,hidden_size ,output_size
W1 = init_weigth * np.random.random((I ,H)).astype('f')
b1 = np.zeros(H).astype('f')
W2 = init_weigth * np.random.random((H ,O)).astype('f')
b2 = np.zeros(O).astype('f')
self.layers = [
Affine(W1 ,b1),
Sigmoid(),
Affine(W2 ,b2)
]
self.loss_layer = SoftmaxWithLoss()
self.params = []
self.grads = []
for layer in self.layers:
self.params += layer.params
self.grads += layer.grads
self.losses = None
def predict(self ,x):
for layer in self.layers:
x = layer.forward(x)
return x
def loss(self ,x ,y):
score = self.predict(x)
self.losses = self.loss_layer.forward(score ,y)
return self.losses
def backward(self,dout=1):
dout = self.loss_layer.backward(dout)
for layer in reversed(self.layers):
dout = layer.backward(dout)
return dout
import sys
sys.path.append('..')
from dataset import spiral
import matplotlib.pyplot as plt
if __name__ == '__main__':
model = TwoLayerNet(2 ,10 ,3)
optimizer = SGD(lr=0.5)
print(len(model.params))
epoch = 500
batch = 100
e_step = 0
for e in range(epoch):
step = 0
for i in range(batch):
x_batch,t_batch = spiral.load_data()
loss = model.loss(x_batch,t_batch)
model.backward()
optimizer.update(model.params,model.grads)
step +=1
print("echo {}/{} | batch {}/{} | loss {}".format(e_step,epoch,step,batch,model.losses))
e_step +=1
Trainer
import matplotlib.pyplot as plt
from two_layer_net import TwoLayerNet
from dataset import spiral
from optimizer import SGD
import numpy as np
import time
class Trainer(object):
def __init__(self,model,optimizer):
self.model = model
self.optim = optimizer
self.loss_list = []
def fit(self,x,t,num_epoch,batch_size,eval_interval=10):
total_loss = 0
total_count = 0
max_iter = len(x) // batch_size
start_time = time.time()
for epoch in range(num_epoch):
idx = np.random.permutation(len(x))
x, t = x[idx], t[idx]
for iter in range(max_iter):
batch_x = x[iter*batch_size:(iter+1)*batch_size]
batch_t = t[iter*batch_size:(iter+1)*batch_size]
loss = self.model.loss(batch_x,batch_t)
total_loss += loss
total_count += 1
self.model.backward()
self.optim.update(self.model.params,self.model.grads)
if iter % eval_interval == 0:
avg_loss = total_loss / total_count
self.loss_list.append(avg_loss)
print("| epoch %d/%d | iter %d/%d | avg_loss %.2f | loss %.2f |"%(
epoch+1,num_epoch,iter+1,max_iter,avg_loss,loss
))
total_loss = 0
total_count = 0
time_cost = time.time()-start_time
h = time_cost // 3600
m = time_cost % 3600 // 60
s = time_cost % 60 // 1
ms = time_cost % 1 * 1000
print("time:%02d:%02d:%02d:%dms"%(
h,m,s,ms
) )
def plot(self):
plt.figure()
plt.plot(self.loss_list)
plt.show()
if __name__ == '__main__':
x,t = spiral.load_data()
input_size = 2
hidden_size = 16
output_size = 3
lr = 1.0
num_epoch = 3000
batch_size = 30
optimizer = SGD(lr=lr)
model = TwoLayerNet(input_size=input_size,hidden_size=hidden_size,output_size=output_size)
trainer = Trainer(model,optimizer)
trainer.fit(x=x,t=t,num_epoch=num_epoch,batch_size=batch_size,eval_interval=10)
trainer.plot()