BP神经网络源码
one hot
def one_hot ( labels, classes) :
n = len ( labels)
output = np. zeros( ( n, classes) , dtype= np. int32) // 构建n行classes列的矩阵
for row, label in enumerate ( labels) :
output[ row, label] = 1
return output
Dataset and Dataloader
class Dataset :
def __init__ ( self, images, labels) :
self. images = images
self. labels = labels
def __getitem__ ( self, index) :
return self. images[ index] , self. labels[ index]
def __len__ ( self) :
return len ( self. images)
class DataLoaderIterator :
def __init__ ( self, dataloader) :
self. dataloader = dataloader
self. cursor = 0
self. indexs = list ( range ( self. dataloader. count_data) )
if self. dataloader. shuffle:
random. shuffle( self. indexs)
def __next__ ( self) :
if self. cursor >= self. dataloader. count_data:
raise StopIteration( )
batch_data = [ ]
remain = min ( self. dataloader. batch_size, self. dataloader. count_data - self. cursor)
for n in range ( remain) :
index = self. indexs[ self. cursor]
data = self. dataloader. dataset[ index]
if len ( batch_data) == 0 :
batch_data = [ [ ] for i in range ( len ( data) ) ]
for index, item in enumerate ( data) :
batch_data[ index] . append( item)
self. cursor += 1
for index in range ( len ( batch_data) ) :
batch_data[ index] = np. vstack( batch_data[ index] )
return batch_data
class DataLoader :
def __init__ ( self, dataset, batch_size, shuffle) :
self. dataset = dataset
self. batch_size = batch_size
self. shuffle = shuffle
self. count_data = len ( self. dataset)
def __iter__ ( self) :
return DataLoaderIterator( self)
Module
class Module :
def __init__ ( self, name) :
self. name = name
self. train_model = False
def __call__ ( self, * args) :
return self. forward( * args)
def train ( self) :
self. train_model = True
for m in self. modules( ) :
m. train( )
def eval ( self) :
self. train_model = False
for m in self. modules( ) :
m. eval ( )
def modules ( self) :
ms = [ ]
for attr in self. __dict__:
m = self. __dict__[ attr]
if isinstance ( m, Module) :
ms. append( m)
return ms
def params ( self) :
ps = [ ]
for attr in self. __dict__:
p = self. __dict__[ attr]
if isinstance ( p, Parameter) :
ps. append( p)
ms = self. modules( )
for m in ms:
ps. extend( m. params( ) )
return ps
def info ( self, n) :
ms = self. modules( )
output = f" { self. name} \n"
for m in ms:
output += ( ' ' * ( n+ 1 ) ) + f" { m. info( n+ 1 ) } \n"
return output[ : - 1 ]
def __repr__ ( self) :
return self. info( 0 )
ModuleList
class ModuleList ( Module) :
def __init__ ( self, * args) :
super ( ) . __init__( "ModuleList" )
self. ms = list ( args)
def modules ( self) :
return self. ms
def forward ( self, x) :
for m in self. ms:
x = m( x)
return x
def backward ( self, G) :
for i in range ( len ( self. ms) - 1 , - 1 , - 1 ) :
G = self. ms[ i] . backward( G)
return G
Parameter
class Parameter :
def __init__ ( self, value) :
self. value = value
self. delta = np. zeros( value. shape)
def zero_grad ( self) :
self. delta[ . . . ] = 0
Initializer
class Initializer :
def __init__ ( self, name) :
self. name = name
def __call__ ( self, * args) :
return self. apply ( * args)
class GaussInitializer ( Initializer) :
def __init__ ( self, mu, sigma) :
self. mu = mu
self. sigma = sigma
def apply ( self, value) :
value[ . . . ] = np. random. normal( self. mu, self. sigma, value. shape)
Linear
class Linear ( Module) :
def __init__ ( self, input_feature, output_feature) :
super ( ) . __init__( "Linear" )
self. input_feature = input_feature
self. output_feature = output_feature
self. weights = Parameter( np. zeros( ( input_feature, output_feature) ) )
self. bias = Parameter( np. zeros( ( 1 , output_feature) ) )
initer = GaussInitializer( 0 , np. sqrt( 2 / input_feature) )
initer. apply ( self. weights. value)
def forward ( self, x) :
self. x_save = x. copy( )
return x @ self. weights. value + self. bias. value
def backward ( self, G) :
self. weights. delta += self. x_save. T @ G
self. bias. delta += np. sum ( G, 0 )
return G @ self. weights. value. T
激活函数
class ReLU ( Module) :
def __init__ ( self, inplace= True ) :
super ( ) . __init__( "ReLU" )
self. inplace = inplace
def forward ( self, x) :
self. negative_position = x < 0
if not self. inplace:
x = x. copy( )
x[ self. negative_position] = 0
return x
def backward ( self, G) :
if not self. inplace:
G = G. copy( )
G[ self. negative_position] = 0
return G
class SWish ( Module) :
def __init__ ( self) :
super ( ) . __init__( "SWish" )
def sigmoid ( x) :
p0 = x < 0
p1 = ~ p0
x = x. copy( )
x[ p0] = np. exp( x[ p0] ) / ( 1 + np. exp( x[ p0] ) )
x[ p1] = 1 / ( 1 + np. exp( - x[ p1] ) )
return x
def forward ( self, x) :
self. x_save = x. copy( )
self. sx = sigmoid( x)
return x * self. sx
def backward ( self, G) :
return G * ( self. sx + self. x_save * self. sx * ( 1 - self. sx) )
Dropout
class Dropout ( Module) :
def __init__ ( self, prob_keep= 0.5 , inplace= True ) :
super ( ) . __init__( "Dropout" )
self. prob_keep = prob_keep
self. inplace = inplace
def forward ( self, x) :
if not self. train_mode:
return x
self. mask = np. random. binomial( size= x. shape, p= 1 - self. prob_keep, n= 1 ) == 1
if not self. inplace:
x = x. copy( )
x[ self. mask] = 0
x *= 1 / self. prob_keep
return x
def backward ( self, G) :
if not self. inplace:
G = G. copy( )
G[ self. mask] = 0
G *= 1 / self. prob_keep
return G
class DropoutMul ( Module) :
def __init__ ( self, prob_keep= 0.5 , inplace= True ) :
super ( ) . __init__( "Dropout" )
self. prob_keep = prob_keep
self. inplace = inplace
def forward ( self, x) :
if not self. train_mode:
return x
self. mask = np. random. binomial( size= x. shape, p= self. prob_keep, n= 1 )
if not self. inplace:
x = x. copy( )
x *= self. mask
x *= 1 / self. prob_keep
return x
def backward ( self, G) :
if not self. inplace:
G = G. copy( )
G *= self. mask
G *= 1 / self. prob_keep
return G
BatchNorm
class BatchNormalization ( Module) :
def __init__ ( self, in_feature, momentum= 0.9 , eps= 1e-8 ) :
self. mu = 0
self. var = 1
self. momentum = momentum
self. eps = eps
self. in_feature = in_feature
self. gamma = Parameter( np. ones( in_feature) )
self. beta = Parameter( np. zeros( in_feature) )
def forward ( self, x) :
if not self. train_mode:
y = ( x - self. mu) / np. sqrt( self. var + self. eps)
return y * self. gamma. value. reshape( 1 , - 1 , 1 , 1 ) + self. beta. value. reshape( 1 , - 1 , 1 , 1 )
self. b_mu = np. mean( x, axis= ( 0 , 2 , 3 ) , keepdims= True )
self. b_var = np. var( x, axis= ( 0 , 2 , 3 ) , keepdims= True )
self. y = ( x - self. b_mu) / np. sqrt( self. b_var + self. eps)
self. mu = self. b_mu * self. momentum + self. mu * ( 1 - self. momentum)
n = x. size / x. shape[ 1 ]
unbiased_var = self. b_var * n / ( n - 1 )
self. var = unbiased_var * self. momentum + self. var * ( 1 - self. momentum)
return self. y * self. gamma. value. reshape( 1 , - 1 , 1 , 1 ) + self. beta. value. reshape( 1 , - 1 , 1 , 1 )
def backward ( self, G) :
self. gamma. delta = np. sum ( G * self. y, axis= ( 0 , 2 , 3 ) )
self. beta. delta = np. sum ( G, axis= ( 0 , 2 , 3 ) )
return G * self. gamma. value. reshape( 1 , - 1 , 1 , 1 ) / np. sqrt( self. b_var + self. eps)
Loss
class SigmoidCrossEntropy ( Module) :
def __init__ ( self, params, weight_decay= 1e-5 ) :
super ( ) . __init__( "CrossEntropyLoss" )
self. params = params
self. weight_decay = weight_decay
def sigmoid ( self, x) :
p0 = x < 0
p1 = ~ p0
x = x. copy( )
x[ p0] = np. exp( x[ p0] ) / ( 1 + np. exp( x[ p0] ) )
x[ p1] = 1 / ( 1 + np. exp( - x[ p1] ) )
return x
def decay_loss ( self) :
loss = 0
for p in self. params:
loss += np. sqrt( np. sum ( p. value ** 2 ) ) / ( 2 * p. value. size) * self. weight_decay
return loss
def decay_backward ( self) :
for p in self. params:
eps = 1e-8
p. delta += 1 / ( 2 * np. sqrt( np. sum ( p. value ** 2 ) ) + eps) / ( 2 * p. value. size) * self. weight_decay * 2 * p. value
def forward ( self, x, label_onehot) :
eps = 1e-6
self. label_onehot = label_onehot
self. predict = self. sigmoid( x)
self. predict = np. clip( self. predict, a_max= 1 - eps, a_min= eps)
self. batch_size = self. predict. shape[ 0 ]
return - np. sum ( label_onehot * np. log( self. predict) + ( 1 - label_onehot) *
np. log( 1 - self. predict) ) / self. batch_size + self. decay_loss( )
def backward ( self) :
self. decay_backward( )
return ( self. predict - self. label_onehot) / self. batch_size
class SoftmaxCrossEntropy ( Module) :
def __init__ ( self) :
super ( ) . __init__( "SoftmaxCrossEntropy" )
def softmax ( self, x) :
max_x = np. max ( x, axis= 1 , keepdims= True )
exp_x = np. exp( x - max_x)
return exp_x / np. sum ( exp_x, axis= 1 , keepdims= True )
def forward ( self, x, label_onehot) :
eps = 1e-6
self. label_onehot = label_onehot
self. predict = self. softmax( x)
self. predict = np. clip( self. predict, a_max= 1 - eps, a_min= eps)
self. batch_size = self. predict. shape[ 0 ]
return - np. sum ( label_onehot * np. log( self. predict) ) / self. batch_size
def backward ( self) :
return ( self. predict - self. label_onehot) / self. batch_size
Optimizer
class Optimizer :
def __init__ ( self, name, model, lr) :
self. name = name
self. model = model
self. lr = lr
self. params = model. params( )
def zero_grad ( self) :
for param in self. params:
param. zero_grad( )
def set_lr ( self, lr) :
self. lr = lr
class SGD ( Optimizer) :
def __init__ ( self, model, lr= 1e-3 ) :
super ( ) . __init__( "SGD" , model, lr)
def step ( self) :
for param in self. params:
param. value -= self. lr * param. delta
class SGDMomentum ( Optimizer) :
def __init__ ( self, model, lr= 1e-3 , momentum= 0.9 ) :
super ( ) . __init__( "SGDMomentum" , model, lr)
self. momentum = momentum
for param in self. params:
param. v = 0
def step ( self) :
for param in self. params:
param. v = self. momentum * param. v - self. lr * param. delta
param. value += param. v
class Adam ( Optimizer) :
def __init__ ( self, model, lr= 1e-3 , beta1= 0.9 , beta2= 0.999 , l2_regularization = 0 ) :
super ( ) . __init__( "Adam" , model, lr)
self. beta1 = beta1
self. beta2 = beta2
self. l2_regularization = l2_regularization
self. t = 0
for param in self. params:
param. m = 0
param. v = 0
def step ( self) :
eps = 1e-8
self. t += 1
for param in self. params:
g = param. delta
param. m = self. beta1 * param. m + ( 1 - self. beta1) * g
param. v = self. beta2 * param. v + ( 1 - self. beta2) * g ** 2
mt_ = param. m / ( 1 - self. beta1 ** self. t)
vt_ = param. v / ( 1 - self. beta2 ** self. t)
param. value -= self. lr * mt_ / ( np. sqrt( vt_) + eps) + self. l2_regularization * param. value