深度学习代码

爱吃夹心饼干的小准子

于 2023-07-18 11:17:48 发布

阅读量334

点赞数 1

文章标签：深度学习人工智能

本文链接：https://blog.csdn.net/m0_62850850/article/details/131783234

版权

本文提供了一个BP神经网络的完整源码实现，包括one-hot编码、数据集和数据加载器的定义，以及Module、Parameter、初始化方法、线性层、激活函数、dropout、批量归一化和损失函数等关键组件。代码展示了如何构建和训练一个简单的神经网络模型。

摘要由CSDN通过智能技术生成

BP神经网络源码

one hot

def one_hot(labels, classes):
    n = len(labels)
    output = np.zeros((n, classes), dtype=np.int32) // 构建n行classes列的矩阵
    for row, label in enumerate(labels):
        output[row, label] = 1
    return output

Dataset and Dataloader

class Dataset:
    def __init__(self, images, labels):
        self.images = images
        self.labels = labels

    # 获取他的一个item，  dataset = Dataset(),   dataset[index]
    def __getitem__(self, index):
        return self.images[index], self.labels[index]

    # 获取数据集的长度，个数
    def __len__(self):
        return len(self.images)

class DataLoaderIterator:
    def __init__(self, dataloader):
        self.dataloader = dataloader
        self.cursor = 0
        self.indexs = list(range(self.dataloader.count_data))
        if self.dataloader.shuffle:
            # 打乱一下
            random.shuffle(self.indexs)

    def __next__(self):
        if self.cursor >= self.dataloader.count_data:
            raise StopIteration()

        batch_data = []
        remain = min(self.dataloader.batch_size, self.dataloader.count_data - self.cursor)
        for n in range(remain):
            index = self.indexs[self.cursor]
            data = self.dataloader.dataset[index]

            # 如果batch没有初始化，则初始化n个list成员
            if len(batch_data) == 0:
                batch_data = [[] for i in range(len(data))]

            # 直接append进去,相当于复制进去
            for index, item in enumerate(data):
                batch_data[index].append(item)
            self.cursor += 1

        # 通过np.vstack一次性实现合并，而非每次一直在合并
        for index in range(len(batch_data)):
            batch_data[index] = np.vstack(batch_data[index])

        return batch_data


class DataLoader:
    def __init__(self, dataset, batch_size, shuffle):
        self.dataset = dataset
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.count_data = len(self.dataset)

    def __iter__(self):
        return DataLoaderIterator(self)

Module

class Module:
    def __init__(self, name):
        self.name = name
        self.train_model = False

    def __call__(self, *args):
        return self.forward(*args)
    
    def train(self):
        self.train_model = True
        for m in self.modules():
            m.train()

    def eval(self):
        self.train_model = False
        for m in self.modules():
            m.eval()

    def modules(self):
        ms = []
        for attr in self.__dict__:
            m = self.__dict__[attr]
            if isinstance(m, Module):
                ms.append(m)
        return ms
    
    def params(self):
        ps = []
        for attr in self.__dict__:
            p = self.__dict__[attr]
            if isinstance(p, Parameter):
                ps.append(p)
        
        ms = self.modules()
        for m in ms:
            ps.extend(m.params())
        return ps
    
    def info(self, n):
        ms = self.modules()
        output = f"{self.name}\n"
        for m in ms:
            output += ('  '*(n+1)) + f"{m.info(n+1)}\n"
        return output[:-1] 
    
    def __repr__(self):
        return self.info(0)

ModuleList

class ModuleList(Module):
    def __init__(self, *args):
        super().__init__("ModuleList")
        self.ms = list(args)
        
    def modules(self):
        return self.ms
    
    def forward(self, x):
        for m in self.ms:
            x = m(x)
        return x
    
    def backward(self, G):
        for i in range(len(self.ms)-1, -1, -1):
            G = self.ms[i].backward(G)
        return G

Parameter

class Parameter:
    def __init__(self, value):
        self.value = value
        self.delta = np.zeros(value.shape)
        
    def zero_grad(self):
        self.delta[...] = 0

Initializer

class Initializer:
    def __init__(self, name):
        self.name = name
        
    def __call__(self, *args):
        return self.apply(*args)
        
class GaussInitializer(Initializer):
    # where :math:`\mu` is the mean and :math:`\sigma` the standardalizer
    # deviation. The square of the standard deviation, :math:`\sigma^2`,
    # is called the variance.
    def __init__(self, mu, sigma):
        self.mu = mu
        self.sigma = sigma
        
    def apply(self, value):
        value[...] = np.random.normal(self.mu, self.sigma, value.shape)

Linear

class Linear(Module):
    def __init__(self, input_feature, output_feature):
        super().__init__("Linear")
        self.input_feature = input_feature
        self.output_feature = output_feature
        self.weights = Parameter(np.zeros((input_feature, output_feature)))
        self.bias = Parameter(np.zeros((1, output_feature)))
        
        # 权重初始化 
        initer = GaussInitializer(0, np.sqrt(2 / input_feature))  # np.sqrt(2 / input_feature)
        initer.apply(self.weights.value)
        
    def forward(self, x):
        self.x_save = x.copy()
        return x @ self.weights.value + self.bias.value
    
    #AB = C  G
    #dB = A.T @ G
    #dA = G @ B.T
    def backward(self, G):
        self.weights.delta += self.x_save.T @ G
        self.bias.delta += np.sum(G, 0)  #值复制
        return G @ self.weights.value.T

激活函数

class ReLU(Module):
    def __init__(self, inplace=True):
        super().__init__("ReLU")
        self.inplace = inplace
        
    # 亿点点
    def forward(self, x):
        self.negative_position = x < 0
        if not self.inplace:
            x = x.copy()
            
        x[self.negative_position] = 0
        return x
    
    def backward(self, G):
        if not self.inplace:
            G = G.copy()
            
        G[self.negative_position] = 0
        return G
      
      
class SWish(Module):
    def __init__(self):
        super().__init__("SWish")
    
    def sigmoid(x):
    p0 = x < 0
    p1 = ~p0
    x = x.copy()

    # 如果x的类型是整数，那么会造成丢失精度
    x[p0] = np.exp(x[p0]) / (1 + np.exp(x[p0]))
    x[p1] = 1 / (1 + np.exp(-x[p1]))
    return x
  
    def forward(self, x):
        self.x_save = x.copy()
        self.sx = sigmoid(x)
        return x * self.sx
    
    def backward(self, G):
        return G * (self.sx + self.x_save * self.sx * (1 - self.sx))

Dropout

class Dropout(Module):
    def __init__(self, prob_keep=0.5, inplace=True):
        super().__init__("Dropout")
        self.prob_keep = prob_keep
        self.inplace = inplace
        
    def forward(self, x):
        if not self.train_mode:
            return x
        
        self.mask = np.random.binomial(size=x.shape, p=1 - self.prob_keep, n=1) == 1
        if not self.inplace:
            x = x.copy()
        
        # 作为index的时候，mask就是索引值，而不是预期的掩码
        x[self.mask] = 0
        x *= 1 / self.prob_keep  # rescale
        return x
    
    def backward(self, G):
        if not self.inplace:
            G = G.copy()
        G[self.mask] = 0
        G *= 1 / self.prob_keep
        return G
    
class DropoutMul(Module):
    def __init__(self, prob_keep=0.5, inplace=True):
        super().__init__("Dropout")
        self.prob_keep = prob_keep
        self.inplace = inplace
        
    def forward(self, x):
        if not self.train_mode:
            return x
        
        # 0, 1,   0.8    5x5 -> 25 * 0.8 是1，其他全是0
        self.mask = np.random.binomial(size=x.shape, p=self.prob_keep, n=1)
        if not self.inplace:
            x = x.copy()
        
        x *= self.mask
        x *= 1 / self.prob_keep  # rescale
        return x
    
    def backward(self, G):
        if not self.inplace:
            G = G.copy()
        G *= self.mask
        G *= 1 / self.prob_keep
        return G

BatchNorm

class BatchNormalization(Module):
    def __init__(self, in_feature, momentum=0.9, eps=1e-8):
        self.mu = 0
        self.var = 1
        self.momentum = momentum
        self.eps = eps
        self.in_feature = in_feature
        self.gamma = Parameter(np.ones(in_feature))
        self.beta = Parameter(np.zeros(in_feature))
        
    def forward(self, x):
        
        if not self.train_mode:
            y = (x - self.mu) / np.sqrt(self.var + self.eps)
            # running_mean  running_var
            return y * self.gamma.value.reshape(1, -1, 1, 1) + self.beta.value.reshape(1, -1, 1, 1)
        
        # NCHW    0, 2, 3   ->  C
        # 2x3     0   H     ->  W
        # x -> B 个图    32  
        # B == 1  他是有问题的
        self.b_mu = np.mean(x, axis=(0, 2, 3), keepdims=True)
        self.b_var = np.var(x, axis=(0, 2, 3), keepdims=True)
        self.y = (x - self.b_mu) / np.sqrt(self.b_var + self.eps)
        self.mu = self.b_mu * self.momentum + self.mu * (1 - self.momentum)
        
        # NCHW
        # x.size = N * C * H * W
        # len(x) = N
        # x.shape[1] = C
        # x.size / x.shape[1] = N * H * W
        n = x.size / x.shape[1]
        unbiased_var = self.b_var * n / (n - 1)
        self.var = unbiased_var * self.momentum + self.var * (1 - self.momentum)
        return self.y * self.gamma.value.reshape(1, -1, 1, 1) + self.beta.value.reshape(1, -1, 1, 1)
    
    def backward(self, G):
        self.gamma.delta = np.sum(G * self.y, axis=(0, 2, 3))
        self.beta.delta = np.sum(G, axis=(0, 2, 3))
        return G * self.gamma.value.reshape(1, -1, 1, 1) / np.sqrt(self.b_var + self.eps)

Loss

class SigmoidCrossEntropy(Module):
    def __init__(self, params, weight_decay=1e-5):
        super().__init__("CrossEntropyLoss")
        self.params = params
        self.weight_decay = weight_decay
        
    def sigmoid(self, x):
        #return 1 / (1 + np.exp(-x))
        p0 = x < 0
        p1 = ~p0
        x = x.copy()
        x[p0] = np.exp(x[p0]) / (1 + np.exp(x[p0]))
        x[p1] = 1 / (1 + np.exp(-x[p1]))
        return x
    
    def decay_loss(self):
        loss = 0
        for p in self.params:
            loss += np.sqrt(np.sum(p.value ** 2)) / (2 * p.value.size) * self.weight_decay
        return loss
    
    def decay_backward(self):
        for p in self.params:
            eps = 1e-8
            p.delta += 1 / (2 * np.sqrt(np.sum(p.value ** 2)) + eps) / (2 * p.value.size) * self.weight_decay * 2 * p.value

    def forward(self, x, label_onehot):
        eps = 1e-6
        self.label_onehot = label_onehot
        self.predict = self.sigmoid(x)
        self.predict = np.clip(self.predict, a_max=1-eps, a_min=eps)  # 裁切
        self.batch_size = self.predict.shape[0]
        return -np.sum(label_onehot * np.log(self.predict) + (1 - label_onehot) * 
                        np.log(1 - self.predict)) / self.batch_size + self.decay_loss()
    
    def backward(self):
        self.decay_backward()
        return (self.predict - self.label_onehot) / self.batch_size
    
class SoftmaxCrossEntropy(Module):
    def __init__(self):
        super().__init__("SoftmaxCrossEntropy")
        
    def softmax(self, x):
        #return np.exp(x) / np.sum(np.exp(x), axis=1, keepdims=True)
        max_x = np.max(x, axis=1, keepdims=True)
        exp_x = np.exp(x - max_x)
        return exp_x / np.sum(exp_x, axis=1, keepdims=True)

    def forward(self, x, label_onehot):
        eps = 1e-6
        self.label_onehot = label_onehot
        self.predict = self.softmax(x)
        self.predict = np.clip(self.predict, a_max=1-eps, a_min=eps)  # 裁切
        self.batch_size = self.predict.shape[0]
        return -np.sum(label_onehot * np.log(self.predict)) / self.batch_size
    
    def backward(self):
        return (self.predict - self.label_onehot) / self.batch_size

Optimizer

class Optimizer:
    def __init__(self, name, model, lr):
        self.name = name
        self.model = model
        self.lr = lr
        self.params = model.params()
                
    def zero_grad(self):
        for param in self.params:
            param.zero_grad()
            
    def set_lr(self, lr):
        self.lr = lr
        
class SGD(Optimizer):
    def __init__(self, model, lr=1e-3):
        super().__init__("SGD", model, lr)
    
    def step(self):
        for param in self.params:
            param.value -= self.lr * param.delta
            
class SGDMomentum(Optimizer):
    def __init__(self, model, lr=1e-3, momentum=0.9):
        super().__init__("SGDMomentum", model, lr)
        self.momentum = momentum
        
        for param in self.params:
            param.v = 0
    
    # 移动平均
    def step(self):
        for param in self.params:
            param.v = self.momentum * param.v - self.lr * param.delta
            param.value += param.v
            
class Adam(Optimizer):
    def __init__(self, model, lr=1e-3, beta1=0.9, beta2=0.999, l2_regularization = 0):
        super().__init__("Adam", model, lr)
        self.beta1 = beta1
        self.beta2 = beta2
        self.l2_regularization = l2_regularization
        self.t = 0
        
        for param in self.params:
            param.m = 0
            param.v = 0
            
    # 指数移动平均
    def step(self):
        eps = 1e-8
        self.t += 1
        for param in self.params:
            g = param.delta
            param.m = self.beta1 * param.m + (1 - self.beta1) * g
            param.v = self.beta2 * param.v + (1 - self.beta2) * g ** 2
            mt_ = param.m / (1 - self.beta1 ** self.t)
            vt_ = param.v / (1 - self.beta2 ** self.t)
            param.value -= self.lr * mt_ / (np.sqrt(vt_) + eps) + self.l2_regularization * param.value