文章目录
前言
本文使用最基础的语法结构,利用numpy实现最基础的网络结构的搭建,本文简单搭建简单的图像分类的网络结构
一、引入必备的第三方库
import numpy as np
import cv2,os,random,pickle
本次主要使用的是numpy库,因为网络结构主要是矩阵计算等操作,因此numpy库是必备之选。
二、基础类的构建
1.Tensor的构建
此类主要是定义一个矩阵,用来存储卷积时的所有的矩阵,便于管理
class Tensor:
def __init__(self, shape):
self.data = np.zeros(shape=shape, dtype=np.float32)
self.grad = np.zeros(shape=shape, dtype=np.float32)
def clear_grad(self):
self.grad = np.zeros_like(self.grad)
def __str__(self):
return "Tensor shape: {}, data: {}".format(self.data.shape, self.data)
2.Initializer的构建
此类是一个基类,实现是卷积层和名称(网络结构命名)的对应关系
class Initializer:
def __init__(self, shape=None, name='initializer'):
self.shape = shape
self.name = name
def __call__(self, *args, **kwargs):
raise NotImplementedError
def __str__(self):
return self.name
3.Constant的构建
此类主要是对权值的一些保存
class Constant(Initializer):
def __init__(self, value=0., name='constant initializer', *args, **kwargs):
super().__init__(name=name, *args, **kwargs)
self.value = value
def __call__(self, shape=None, *args, **kwargs):
if shape:
self.shape = shape
assert shape is not None, "the shape of initializer must not be None."
return self.value + np.zeros(shape=self.shape)
4.Normal类的构建
此类主要是对模型初始化的时候的标准化处理
class Normal(Initializer):
def __init__(self, mean=0., std=0.01, name='normal initializer', *args, **kwargs):
super().__init__(name=name, *args, **kwargs)
self.mean = mean
self.std = std
def __call__(self, shape=None, *args, **kwargs):
if shape:
self.shape = shape
assert shape is not None, "the shape of initializer must not be None."
return np.random.normal(self.mean, self.std, size=self.shape)
5.Layer类的构建
此类主要是实现卷积层的构建,此类为基类,里面包含了前向传播和反向传播的抽象方法
class Layer:
def __init__(self, name='layer', *args, **kwargs):
self.name = name
def forward(self, *args, **kwargs):
raise NotImplementedError
def backward(self):
raise NotImplementedError
def parameters(self):
return []
def __call__(self, *args, **kwargs):
return self.forward(*args, **kwargs)
def __str__(self):
return self.name
6.Linear类的构建
全连接层的构建,这个是最基本的卷积层的操作类
class Linear(Layer):
def __init__(self, in_features, out_features, name='linear', weight_attr=Normal(), bias_attr=Constant(), *args, **kwargs):
super().__init__(name=name, *args, **kwargs)
self.weights = Tensor((in_features, out_features))
self.weights.data = weight_attr(self.weights.data.shape)
self.bias = Tensor((1, out_features))
self.bias.data = bias_attr(self.bias.data.shape)
self.input = None
def forward(self, x):
self.input = x
output = np.dot(x, self.weights.data) + self.bias.data
return output
def backward(self, gradient):
self.weights.grad += np.dot(self.input.T, gradient) # dy / dw
self.bias.grad += np.sum(gradient, axis=0, keepdims=True) # dy / db
input_grad = np.dot(gradient, self.weights.data.T) # dy / dx
return input_grad
def parameters(self):
return [self.weights, self.bias]
def __str__(self):
string = "linear layer, weight shape: {}, bias shape: {}".format(self.weights.data.shape, self.bias.data.shape)
return string
7.Relu激活函数构建
class ReLU(Layer):
def __init__(self, name='relu', *args, **kwargs):
super().__init__(name=name, *args, **kwargs)
self.activated = None
def forward(self, x):
x[x < 0] = 0
self.activated = x
return self.activated
def backward(self, gradient):
return gradient * (self.activated > 0)
8.优化器基类设置
这里实现优化器的基类:
class Optimizer:
def __init__(self, parameters, learning_rate=0.001, weight_decay=0.0, decay_type='l2'):
assert decay_type in ['l1', 'l2'], "only support decay_type 'l1' and 'l2', but got {}.".format(decay_type)
self.parameters = parameters
self.learning_rate = learning_rate
self.weight_decay = weight_decay
self.decay_type = decay_type
def step(self):
raise NotImplementedError
def clear_grad(self):
for p in self.parameters:
p.clear_grad()
def get_decay(self, g):
if self.decay_type == 'l1':
return self.weight_decay
elif self.decay_type == 'l2':
return self.weight_decay * g
9.优化器的设置:SGD
下面为优化器SGD的设置方法
class SGD(Optimizer):
def __init__(self, momentum=0.9, *args, **kwargs):
super().__init__(*args, **kwargs)
self.momentum = momentum
self.velocity = []
for p in self.parameters:
self.velocity.append(np.zeros_like(p.grad))
def step(self):
for p, v in zip(self.parameters, self.velocity):
decay = self.get_decay(p.grad)
v = self.momentum * v + p.grad + decay # 动量计算
p.data = p.data - self.learning_rate * v
10.softmax设计
这里基本是卷积层数据分类结果的最后一层,得出概率
class SoftmaxWithLogits(Layer):
def __init__(self, reduction='mean', name='softamxwithlogits', *args, **kwargs):
super().__init__(name=name, *args, **kwargs)
assert reduction in ['mean', 'none', 'sum'], "reduction only support 'mean', 'none' and 'sum', but got {}.".format(reduction)
self.reduction = reduction
self.logits = None
self.target = None
def forward(self, logits, target):
assert logits.shape[0] == target.shape[0], "The first fimension of logits and target is not same, logits shape {} cann't match target shape {}.".format(logits.shape, target.shape)
self.logits = logits
self.target = target
loss = []
for i in range(logits.shape[0]):
loss_i = -logits[i, target.squeeze(-1)[i]] + np.log(np.sum(np.exp(logits[i])))
loss.append(loss_i)
loss = np.array(loss).reshape(target.shape)
if self.reduction == 'mean':
return loss.mean()
elif self.reduction == 'sum':
return loss.sum()
else:
return loss
def backward(self):
soft_denominator = np.sum(np.exp(self.logits), axis=1, keepdims=True) # [N, 1]
eq_grad = np.zeros_like(self.logits)
for i in range(self.logits.shape[0]):
eq_grad[i, self.target.squeeze(-1)[i]] = -1
gradient = np.exp(self.logits) / soft_denominator + eq_grad
return gradient
11.模拟其他深度学习框架
实现可自行add方式进行网络构建
class Sequential:
def __init__(self, *args, **kwargs):
self.graphs = []
self._parameters = []
for arg_layer in args:
if isinstance(arg_layer, Layer):
self.graphs.append(arg_layer)
self._parameters += arg_layer.parameters()
def add(self, layer):
assert isinstance(layer, Layer), "The type of added layer must be Layer, but got {}.".format(type(layer))
self.graphs.append(layer)
self._parameters += layer.parameters()
def forward(self, x):
for graph in self.graphs:
x = graph(x)
return x
def backward(self, grad):
for graph in self.graphs[::-1]:
grad = graph.backward(grad)
def __call__(self, *args, **kwargs):
return self.forward(*args, **kwargs)
def __str__(self):
string = 'Sequential:\n'
for graph in self.graphs:
string += graph.__str__() + '\n'
return string
三、数据加载器的构建
1.设计数据加载器类的基类
这里主要是设计有迭代器的功能:getitem
class Dataset:
def __init__(self, *args, **kwargs):
pass
def __getitem__(self, idx):
raise NotImplementedError("'{}' not implement in class {}".format('__getitem__', self.__class__.__name__))
def __len__(self):
raise NotImplementedError("'{}' not implement in class {}".format('__len__', self.__class__.__name__))
2.设计训练时的batchsize的类
class BatchSampler:
def __init__(self, dataset=None, shuffle=False, batch_size=1, drop_last=False):
self.batch_size = batch_size
self.drop_last = drop_last
self.shuffle = shuffle
self.num_data = len(dataset)
if self.drop_last or (self.num_data % batch_size == 0):
self.num_samples = self.num_data // batch_size
else:
self.num_samples = self.num_data // batch_size + 1
indices = np.arange(self.num_data)
if shuffle:
np.random.shuffle(indices)
if drop_last:
indices = indices[:self.num_samples * batch_size]
self.indices = indices
def __len__(self):
return self.num_samples
def __iter__(self):
batch_indices = []
for i in range(self.num_samples):
if (i + 1) * self.batch_size <= self.num_data:
for idx in range(i * self.batch_size, (i + 1) * self.batch_size):
batch_indices.append(self.indices[idx])
yield batch_indices
batch_indices = []
else:
for idx in range(i * self.batch_size, self.num_data):
batch_indices.append(self.indices[idx])
if not self.drop_last and len(batch_indices) > 0:
yield batch_indices
3.数据加载器
class DataLoader:
def __init__(self, dataset, sampler=BatchSampler, shuffle=False, batch_size=1, drop_last=False):
self.dataset = dataset
self.batch_sampler = sampler
self.sampler = self.batch_sampler(dataset, shuffle, batch_size, drop_last)
self.shuffle = shuffle
self.drop_last = drop_last
self.batch_size = batch_size
def __len__(self):
return len(self.sampler)
def __call__(self):
self.__iter__()
def __iter__(self):
for sample_indices in self.sampler:
data_list = []
label_list = []
for indice in sample_indices:
data, label = self.dataset[indice]
data_list.append(data)
label_list.append(label)
yield np.stack(data_list, axis=0), np.stack(label_list, axis=0)
self.sampler = self.batch_sampler(self.dataset, self.shuffle, self.batch_size, self.drop_last)
4.加载本地图像路径的类
class CifarDataset(Dataset):
def __init__(self, X, Y,image_size):
self.X = X
self.Y = Y
self.image_size = image_size
def __len__(self):
return len(self.X)
def __getitem__(self, idx):
img_cv = cv2.imdecode(np.fromfile(self.X[idx], dtype=np.uint8), 1)
img_cv = cv2.resize(img_cv,(self.image_size,self.image_size))
return img_cv / 255.0, np.array([self.Y[idx]])
四、评估类的构建
1.正确率的计算类
class AverageMeter:
def __init__(self):
self.val = 0.
self.count = 0
def update(self, value, n=1):
self.val += value
self.count += n
def __call__(self):
return self.val / self.count
def reset(self):
self.val = 0.
self.count = 0
def __str__(self):
return str(self.__call__())
五、训练
1.设计数据载入的预处理函数
def get_data_iter(TrainDir,split_train = 0.9):
name_list = os.listdir(TrainDir)
train_data_x=[]
train_data_y=[]
val_data_x = []
val_data_y = []
for name in name_list:
pics_dir = os.path.join(TrainDir, name)
pic_list = os.listdir(pics_dir)
random.shuffle(pic_list)
train_l = pic_list[0:int(len(pic_list) * split_train)]
val_l = pic_list[int(len(pic_list) * split_train):]
for pic in train_l:
train_data_x.append(os.path.join(pics_dir,pic))
train_data_y.append(name_list.index(name))
for pic in val_l:
val_data_x.append(os.path.join(pics_dir, pic))
val_data_y.append(name_list.index(name))
return train_data_x,train_data_y,val_data_x,val_data_y,name_list
2.模型构建
model = Sequential(
Linear(3 * image_size * image_size, 96, name='linear1'),
ReLU(name='relu1'),
Linear(96, 256, name='linear2'),
ReLU(name='relu1'),
Linear(256, 512, name='linear3'),
ReLU(name='relu1'),
Linear(512, 512, name='linear4'),
ReLU(name='relu1'),
Linear(512, 1024, name='linear5'),
ReLU(name='relu1'),
Linear(1024, 1024, name='linear6'),
ReLU(name='relu1'),
Linear(1024, len(class_names), name='linear7'),
)
opt = SGD(parameters=model.parameters(), learning_rate=learning_rate, weight_decay=0.0005, decay_type='l2')
loss_fn = SoftmaxWithLogits()
3.验证函数
def eval(model, val_dataloader):
predict_labels = []
labels = []
for x, y in val_dataloader:
x = x.reshape((1, -1))
logits = model(x)
pred = np.argmax(logits, axis=1)
predict_labels.append(pred)
labels.append(y.squeeze(1))
pred = np.array(predict_labels)
labels = np.array(labels)
acc = np.sum(pred == labels) / len(labels)
print("val dataset accuracy:", acc)
return acc
4.参数设置
epoches = 500
batch_size = 4
learning_rate = 0.001
image_size = 224
train_X, train_Y,val_X, val_Y,class_names = get_data_iter(r'D:\Ctu\Ctu_Project_DL\DataSet\DataSet_Classification_Chess\DataImage',0.9)
train_dataset = CifarDataset(train_X, train_Y,image_size)
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
val_dataset = CifarDataset(val_X, val_Y,image_size)
val_dataloader = DataLoader(val_dataset, batch_size=1, shuffle=False, drop_last=False)
5.开始训练
lddl_acc = []
loss_avg = AverageMeter()
for epoch in range(1, epoches + 1):
acc = eval(model, val_dataloader=val_dataloader)
lddl_acc.append(acc)
for idx, (x, y) in enumerate(train_dataloader):
x = x.reshape((batch_size, -1))
logits = model(x)
loss = loss_fn(logits, y)
loss_avg.update(loss)
grad = loss_fn.backward()
model.backward(grad)
opt.step()
opt.clear_grad()
print("[{}/{}]. loss: {}".format(epoch,idx, loss_avg))
print("epoch: {}. loss: {}".format(epoch, loss_avg))
obj = pickle.dumps(model)
with open('alexnet.ctu', 'wb') as f:
f.write(obj)