![在这里插入图片描述](https://i-blog.csdnimg.cn/blog_migrate/5aa13f6e730008b7bc3c810257102ba2.png)
import numpy as np
from abc import ABC,abstractmethod
class Dropout(ABC):
"""
wrappered layer : 被dropoutde 的layer
p: 神经元保留率。
"""
def __init__(self,wrapped_layer, p):
super().__init__()
self._base_layer = wrapped_layer
self.p = p
self._init_wrapper_params()
def _init_wrapper_params(self):
self._wrapper_derived_variables = {"dropout_mask" : None}
self._wrapper_hyperparams = {"wrapper": "Dropout", "p":self.p}
def flush_gradients(self):
""" 调用包装的函数"""
self._base_layer.flush_gradients()
def update(self):
""" 函数作用,调用base layer 更新出参数"""
self._base_layer.update()
def forward(self,x, is_train=True):
mask = np.ones_like(x.shape).astype(bool)
if is_train:
mask = (np.random.rand(*x.shape) < self.p) / self.p
x = mask*x
self._wrapper_derived_variables["dropout_mask"] = mask
return self._base_layer.forward(x)
def backward(self, dLda):
return self._base_layer.backward(dLda)
@property
def hyperparams(self):
hp = self._base_layer.hyperparams
hpw = self._wrapper_hyperparams
if "wrappers" in hp:
hp["wrappers"].append(hpw)
else:
hp["wrappers"] = [hpw]
return hp
from abc import ABC, abstractmethod
import numpy as np
from PIL import Image
from matplotlib import pyplot as plt
import math
import sys
import os
import time
import re
import progressbar
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from utils import *
from activation import *
from Regular_Class import *
class FullyConnected_Regular(LayerBase):
""" 需要一些参数,比如in,out....forward, backward, update, flash_gradient, _init_params"""
def __init__(self, n_out,acti_fn,init_w,optimizer=None):
super(FullyConnected_Regular,self).__init__(optimizer)
self.n_out = n_out
self.acti_fn = ActivationInitializer(acti_fn)()
self.init_w = init_w
self.init_weights = WeightInitializer(mode=init_w)
self.n_in = None
self.is_initialized = False
def _init_params(self,**kwargs):
b = np.zeros((1, self.n_out))
W = self.init_weights((self.n_in, self.n_out))
self.params = {"W" : W, "b": b}
self.gradients = {"W" : np.zeros_like(W), "b": np.zeros_like(b)}
self.is_initialized = True
def forward(self,X, retain_derived=True):
""" 全连接网络的前向传播"""
if not self.is_initialized:
self.n_in = X.shape[1]
self._init_params()
W = self.params["W"]
b = self.params["b"]
z = X@W + b
a = self.acti_fn.forward(z)
if retain_derived:
self.X.append(X)
return a
def backward(self, dLda, retain=True, regular = None):
""" 有正则项的反向传播"""
if not isinstance(dLda, list):
dLda = [dLda]
dX = []
X = self.X
for da, x in zip(dLda,X):
dx,dw,db = self._bwd(da,x,regular)
dX.append(dx)
if retain:
self.gradients["W"] += dw
self.gradients["b"] += db
return dX[0] if len(X)==1 else dX
def _bwd(self,dLda,X,regular):
W = self.params["W"]
b = self.params["b"]
z = X@W +b
dz = dLda * self.acti_fn.grad(z)
dX = dz @ W.T
dW = X.T @ dz
db = dz.sum(axis=0, keepdims=True)
if regular is not None:
dW_norm = regular.grad(W)
dW += dW_norm
return dX,dW,db
def hyperparams(self):
return {
"layer":"Fully_connected_Regularizer",
"init_w":self.init_w,
"n_in": self.n_in,
"n_out": self.n_out,
"acti_fn":str(self.acti_fn),
"optimizer":{
"hyperparams": self.optimizer.hyperparams,
},
"componets":{
k:v for k,v in self.params.items()
}
}
def minibatch(x, batchsize=256, shuffle=True):
N = x.shape[0]
idx = np.arange(N)
n_batches = int(np.ceil(N/batchsize))
if shuffle:
np.random.shuffle(idx)
def mb_generator():
for i in range(n_batches):
yield idx[i*batchsize:(i+1)*batchsize]
return mb_generator(), n_batches
""" 双层全连接层的模型"""
class DFN(object):
def __init__(self,
hidden_dims_1 = None,
hidden_dims_2 = None,
optimizer = "sgd(lr=0.1)",
init_w = "std_normal",
regular_act=None,
loss = CrossEntropy(),
p = 1.0
):
self.optimizer = optimizer
self.hidden_dims_1 = hidden_dims_1
self.hidden_dims_2 = hidden_dims_2
self.loss =loss
self.regular = None
self.regular_act = regular_act
self.is_initialized = False
self.init_w = init_w
self.p = p
def _set_params(self):
""" 模型初始化: FC1-> sigmoid -> FC2 -> softmax"""
self.layers = OrderedDict()
self.layers["FC1"] =Dropout( FullyConnected_Regular(n_out=self.hidden_dims_1,
acti_fn="sigmoid",
init_w=self.init_w,
optimizer=self.optimizer), p=self.p)
self.layers["FC2"] =Dropout( FullyConnected_Regular(n_out=self.hidden_dims_2,
acti_fn="affine(slope=1.,intercept=0)",
init_w=self.init_w,
optimizer=self.optimizer), p=self.p)
self.layers["Softmax"] = Softmax(optimizer=self.optimizer)
if self.regular_act is not None:
self.regular = RegularizerInitializer(self.regular_act)()
self.is_initialized = True
def forward(self,X, is_train=True):
Xs = {}
out = X
for k,v in self.layers.items():
Xs[k] = out
try:
out = v.froward(out, is_train=is_train)
except:
out = v.forward(out)
return out, Xs
def backward(self,grad):
dXs = {}
out = grad
for k,v in reversed(self.layers.items()):
dXs[k] = out
try:
out = v.backward(out, regular=self.regular)
except:
out = v.backward(out)
return out, dXs
def update(self):
""" 参数更新"""
for k,v in reversed(list(self.layers.items())):
v.update()
self.flush_gradients()
def flush_gradients(self, curr_loss=None):
for k,v in self.layers.items():
v.flush_gradients()
def fit(self, X_train,y_train,n_epochs=20, batch_size=64, verbose=False):
"""
:param X_train:
:param y_train:
:param n_epochs:
:param batch_size:
:param verbose:
:return:
"""
self.verbose = verbose
self.n_epochs = n_epochs
self.batch_size = batch_size
if not self.is_initialized:
self.n_features = X_train.shape[1]
self._set_params()
prev_loss = np.inf
for i in range(n_epochs):
loss, epoch_start = 0.0, time.time()
batch_generator, n_batch = minibatch(X_train, self.batch_size, shuffle=True)
for j, batch_idx in enumerate(batch_generator):
batch_len, batch_start = len(batch_idx), time.time()
X_batch, y_batch = X_train[batch_idx], y_train[batch_idx]
out,_ = self.forward(X_batch)
batch_loss = self.loss(y_batch, out)
if self.regular is not None:
for _,layerparams in self.hyperparams["components"].items():
assert type(layerparams) is dict
batch_loss += self.regular.loss(layerparams)
grad = self.loss.grad(y_batch, out)
_,_ = self.backward(grad)
self.update()
loss += batch_loss
if self.verbose:
fstr = f"\t [Batch {j+1}/{n_batch} Train loss :{batch_loss:.3f} ({(time.time() - batch_start):.1f}s/batch) ]"
print(fstr)
loss /= n_batch
fstr2 = f"[Epoch {i+1}/{n_epochs} avg.loss :{loss:.3f}, Delta:{(prev_loss-loss):.3f} ({(time.time() - epoch_start):.1f}s/epoch)]"
print(fstr2)
prev_loss = loss
def evaluate(self, X_test, y_test, batch_size=128):
acc = 0.0
batch_generator, n_batch = minibatch(X_test, batchsize=batch_size, shuffle=True)
for j, batch_idx in enumerate(batch_generator):
batch_len, batch_start = len(batch_idx), time.time()
X_batch, y_batch = X_test[batch_idx], y_test[batch_idx]
out,_ = self.forward(X_batch)
y_pred = np.argmax(out, axis=1)
y_batch = np.argmax(y_batch,axis=1)
acc += np.sum(y_pred==y_batch)
return acc / X_test.shape[0]
@property
def hyperparams(self):
return {
"init_w": self.init_w,
"loss": str(self.loss),
"optimizer": self.optimizer,
"regular": str(self.regular_act),
"hidden_dims_1": self.hidden_dims_1,
"hidden_dims_2": self.hidden_dims_2,
"components": {k: v.params for k, v in self.layers.items()}
}
""" 测试训练"""
def load_data(path = "..\data/mnist/mnist.npz"):
f = np.load(path)
X_train,y_train = f["x_train"], f["y_train"]
X_test, y_test = f["x_test"], f["y_test"]
f.close()
return (X_train,y_train),(X_test,y_test)
(X_train, y_train), (X_test, y_test) = load_data()
y_train = np.eye(10)[y_train.astype(int)]
y_test = np.eye(10)[y_test.astype(int)]
X_train = X_train.reshape(-1, X_train.shape[1]*X_train.shape[2]).astype('float32')
X_test = X_test.reshape(-1, X_test.shape[1]*X_test.shape[2]).astype('float32')
print(X_train.shape, y_train.shape)
N = 20000
indices = np.random.permutation(range(X_train.shape[0]))[:N]
X_train, y_train = X_train[indices], y_train[indices]
print(X_train.shape, y_train.shape)
X_train /= 255
X_train = (X_train - 0.5) * 2
X_test /= 255
X_test = (X_test - 0.5) * 2
model = DFN(hidden_dims_1=200, hidden_dims_2=10, p=0.5)
model.fit(X_train, y_train, n_epochs=20, batch_size=64)
print("with L1 regularization -- accuracy:{}".format(model.evaluate(X_test, y_test)))
(60000, 784) (60000, 10)
(20000, 784) (20000, 10)
[Epoch 1/20 avg.loss :2.288, Delta:inf (2.0s/epoch)]
[Epoch 2/20 avg.loss :2.208, Delta:0.080 (2.0s/epoch)]
[Epoch 3/20 avg.loss :1.940, Delta:0.268 (2.1s/epoch)]
[Epoch 4/20 avg.loss :1.550, Delta:0.390 (2.1s/epoch)]
[Epoch 5/20 avg.loss :1.207, Delta:0.343 (2.1s/epoch)]
[Epoch 6/20 avg.loss :1.000, Delta:0.208 (2.1s/epoch)]
[Epoch 7/20 avg.loss :0.866, Delta:0.134 (2.0s/epoch)]
[Epoch 8/20 avg.loss :0.791, Delta:0.075 (2.1s/epoch)]
[Epoch 9/20 avg.loss :0.736, Delta:0.055 (2.4s/epoch)]
[Epoch 10/20 avg.loss :0.704, Delta:0.032 (2.3s/epoch)]
[Epoch 11/20 avg.loss :0.673, Delta:0.031 (2.2s/epoch)]
[Epoch 12/20 avg.loss :0.659, Delta:0.014 (2.2s/epoch)]
[Epoch 13/20 avg.loss :0.649, Delta:0.010 (2.4s/epoch)]
[Epoch 14/20 avg.loss :0.631, Delta:0.018 (2.1s/epoch)]
[Epoch 15/20 avg.loss :0.627, Delta:0.004 (2.1s/epoch)]
[Epoch 16/20 avg.loss :0.618, Delta:0.010 (2.1s/epoch)]
[Epoch 17/20 avg.loss :0.608, Delta:0.010 (2.0s/epoch)]
[Epoch 18/20 avg.loss :0.595, Delta:0.013 (2.0s/epoch)]
[Epoch 19/20 avg.loss :0.590, Delta:0.005 (2.1s/epoch)]
[Epoch 20/20 avg.loss :0.590, Delta:-0.001 (2.0s/epoch)]
with L1 regularization -- accuracy:0.8296