深度学习代码

HandsomeDr

已于 2024-04-23 20:00:19 修改

阅读量141

点赞数 2

文章标签： python 深度学习

于 2024-04-20 17:46:48 首次发布

本文链接：https://blog.csdn.net/HandsomeWey/article/details/138005535

版权

声明：

打算最近把之前读研时候写(bushi，ctrl+c，ctrl+v)的代码整理一下

包括手势识别的3种方案，肤色处理的10种方案

这些也是之前别人在网上开源的模型，经过修改和适配的代码。

不过最近公司项目多，平常时间少，有空整理一下发上来吧

其中，主要是根据B站UP主太阳花的小绿豆-CSDN博客（我实际导师）的代码，简单修改跑通的

其中，也遇到了一些问题，主要是环境配置，还有运行时候的问题，我现在上传的主要是我之前跑过的代码(在导师的基础上做了一些小适配)，有遇到相同问题的可以和我沟通交流一下。

1.残差网络

1.1.model.py，模型代码

from tensorflow.keras import layers, Model, Sequential
#这是resnet网络模型

class BasicBlock(layers.Layer):
    expansion = 1

    def __init__(self, out_channel, strides=1, downsample=None, **kwargs):          #out_channel针对的就是我们残差结构当中卷积层所使用卷积核的个数，strides默认等于1的，downsample指的是下采样函数，**kwargs可变长度字典
        super(BasicBlock, self).__init__(**kwargs)
        self.conv1 = layers.Conv2D(out_channel, kernel_size=3, strides=strides,
                                   padding="SAME", use_bias=False)                  #当padding="SAME"时，output=input/stride(向上取整)，use_bias=False即不使用偏置，因为下面使用BN层的时候是不需要使用偏置bias的
        self.bn1 = layers.BatchNormalization(momentum=0.9, epsilon=1e-5)            #momentum=0.99是动量，epsilon是防止计算过程中分母为0的小数
        # -----------------------------------------
        self.conv2 = layers.Conv2D(out_channel, kernel_size=3, strides=1,
                                   padding="SAME", use_bias=False)
        self.bn2 = layers.BatchNormalization(momentum=0.9, epsilon=1e-5)
        # -----------------------------------------
        self.downsample = downsample
        self.relu = layers.ReLU()               #激活函数
        self.add = layers.Add()

    def call(self, inputs, training=False):     #inputs就是传入的特征矩阵
        identity = inputs
        if self.downsample is not None:
            identity = self.downsample(inputs)

        x = self.conv1(inputs)
        x = self.bn1(x, training=training)      #training用来控制Batchnormalization训练过程和验证过程展现出来不同的状态
        x = self.relu(x)

        x = self.conv2(x)
        x = self.bn2(x, training=training)

        x = self.add([identity, x])
        x = self.relu(x)

        return x


class Bottleneck(layers.Layer):         #两边粗中间细的瓶颈结构
    expansion = 4           #卷积核变化的系数

    def __init__(self, out_channel, strides=1, downsample=None, **kwargs):          #这里的out_channel是残差结构主分支上第一个卷积层使用的卷积核个数
        super(Bottleneck, self).__init__(**kwargs)
        self.conv1 = layers.Conv2D(out_channel, kernel_size=1, use_bias=False, name="conv1")    #这里传入的名字是为了和一会在进行迁移学习时能够与我们预训练模型权重参数的名字保持一致
        self.bn1 = layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name="conv1/BatchNorm")
        # -----------------------------------------
        self.conv2 = layers.Conv2D(out_channel, kernel_size=3, use_bias=False,
                                   strides=strides, padding="SAME", name="conv2")
        self.bn2 = layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name="conv2/BatchNorm")
        # -----------------------------------------
        self.conv3 = layers.Conv2D(out_channel * self.expansion, kernel_size=1, use_bias=False, name="conv3")
        self.bn3 = layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name="conv3/BatchNorm")
        # -----------------------------------------
        self.relu = layers.ReLU()
        self.downsample = downsample
        self.add = layers.Add()

    def call(self, inputs, training=False):
        identity = inputs
        if self.downsample is not None:
            identity = self.downsample(inputs)

        x = self.conv1(inputs)
        x = self.bn1(x, training=training)
        x = self.relu(x)

        x = self.conv2(x)
        x = self.bn2(x, training=training)
        x = self.relu(x)

        x = self.conv3(x)
        x = self.bn3(x, training=training)

        x = self.add([x, identity])
        x = self.relu(x)

        return x


def _make_layer(block, in_channel, channel, block_num, name, strides=1):
    downsample = None
    if strides != 1 or in_channel != channel * block.expansion:
        downsample = Sequential([
            layers.Conv2D(channel * block.expansion, kernel_size=1, strides=strides,
                          use_bias=False, name="conv1"),
            layers.BatchNormalization(momentum=0.9, epsilon=1.001e-5, name="BatchNorm")
        ], name="shortcut")

    layers_list = []
    layers_list.append(block(channel, downsample=downsample, strides=strides, name="unit_1"))

    for index in range(1, block_num):
        layers_list.append(block(channel, name="unit_" + str(index + 1)))

    return Sequential(layers_list, name=name)

#这里使用的是keras的functional API方法搭建的模型，如果想使用subclass方法搭建的模型参见项目中的subclassed_model.py文件

#block是数字组成的列表，include_top
def _resnet(block, blocks_num, im_width=224, im_height=224, num_classes=1000, include_top=True):
    # tensorflow中的tensor通道排序是NHWC
    # (None, 224, 224, 3)
    input_image = layers.Input(shape=(im_height, im_width, 3), dtype="float32")
    x = layers.Conv2D(filters=64, kernel_size=7, strides=2,
                      padding="SAME", use_bias=False, name="conv1")(input_image)
    x = layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name="conv1/BatchNorm")(x)
    x = layers.ReLU()(x)
    x = layers.MaxPool2D(pool_size=3, strides=2, padding="SAME")(x)
    #每调用一次_make_layer函数就生成对应conv_x的一系列残差结构
    x = _make_layer(block, x.shape[-1], 64, blocks_num[0], name="block1")(x)        #生成conv2.x,  x.shape对应上一层输出特征矩阵的shape，对应[batch,height,weight,channel],所以[-1]代表channel深度,64为该层残差结构所对应第一层卷积层的卷积核个数
    x = _make_layer(block, x.shape[-1], 128, blocks_num[1], strides=2, name="block2")(x)
    x = _make_layer(block, x.shape[-1], 256, blocks_num[2], strides=2, name="block3")(x)
    x = _make_layer(block, x.shape[-1], 512, blocks_num[3], strides=2, name="block4")(x)

    if include_top:
        x = layers.GlobalAvgPool2D()(x)  # pool + flatten
        x = layers.Dense(num_classes, name="logits")(x)
        predict = layers.Softmax()(x)
    else:
        predict = x

    model = Model(inputs=input_image, outputs=predict)

    return model


def resnet34(im_width=224, im_height=224, num_classes=1000):
    return _resnet(BasicBlock, [3, 4, 6, 3], im_width, im_height, num_classes)


def resnet50(im_width=224, im_height=224, num_classes=1000, include_top=True):
    return _resnet(Bottleneck, [3, 4, 6, 3], im_width, im_height, num_classes, include_top)


def resnet101(im_width=224, im_height=224, num_classes=1000, include_top=True):
    return _resnet(Bottleneck, [3, 4, 23, 3], im_width, im_height, num_classes, include_top)

1.2.predict.py，训练结束后预测调用

from model import resnet50
from PIL import Image
import numpy as np
import json
import matplotlib.pyplot as plt
import tensorflow as tf

im_height = 224
im_width = 224

# load image
img = Image.open("img4.jpg")                #这里是放置在文件夹下的
# img = Image.open("img.png")
# 将非3通道的img1.png转为3通道的
# img = Image.open("img.png").convert("RGB")


img = img.resize((im_width, im_height))
plt.imshow(img)

# scaling pixel value to (0-1)
_R_MEAN = 123.68
_G_MEAN = 116.78
_B_MEAN = 103.94
img = np.array(img).astype(np.float32)
img = img - [_R_MEAN, _G_MEAN, _B_MEAN]

# Add the image to a batch where it's the only member.
img = (np.expand_dims(img, 0))

# read class_indict
try:
    json_file = open('./class_indices.json', 'r')
    class_indict = json.load(json_file)
except Exception as e:
    print(e)
    exit(-1)

feature = resnet50(num_classes=6, include_top=False)
feature.trainable = False
model = tf.keras.Sequential([feature,
                             tf.keras.layers.GlobalAvgPool2D(),
                             tf.keras.layers.Dropout(rate=0.5),
                             tf.keras.layers.Dense(1024),
                             tf.keras.layers.Dropout(rate=0.5),
                             tf.keras.layers.Dense(6),
                             tf.keras.layers.Softmax()])
# model.build((None, 224, 224, 3))  # when using subclass model
model.load_weights('./save_weights/resNet_50.ckpt')
result = model.predict(img)
prediction = np.squeeze(result)
predict_class = np.argmax(result)
print(class_indict[str(predict_class)], prediction[predict_class])
plt.show()

1.3.read_ckpt.py，读取ckpt权重文件使用

import tensorflow as tf


def rename_var(ckpt_path, new_ckpt_path, num_classes=6):            #num_classes是需要的节点数
    with tf.Graph().as_default(), tf.compat.v1.Session().as_default() as sess:
        var_list = tf.train.list_variables(ckpt_path)
        new_var_list = []

        for var_name, shape in var_list:
            print(var_name)
            if var_name in except_list:
                continue
            var = tf.train.load_variable(ckpt_path, var_name)
            new_var_name = var_name.replace('resnet_v1_50/', "")        #修改模型权重的名称
            new_var_name = new_var_name.replace("bottleneck_v1/", "")       #修改模型权重的名称
            new_var_name = new_var_name.replace("shortcut/weights", "shortcut/conv1/kernel")        #修改模型权重的名称
            new_var_name = new_var_name.replace("weights", "kernel")        #修改模型权重的名称
            new_var_name = new_var_name.replace("biases", "bias")       #修改模型权重的名称
            re_var = tf.Variable(var, name=new_var_name)
            new_var_list.append(re_var)
        #下面重新定义了全连接层                                     #这里的2048是50层的全连接层输出节点
        re_var = tf.Variable(tf.keras.initializers.he_uniform()([2048, num_classes]), name="logits/kernel")
        new_var_list.append(re_var)
        re_var = tf.Variable(tf.keras.initializers.he_uniform()([num_classes]), name="logits/bias")
        new_var_list.append(re_var)
        saver = tf.compat.v1.train.Saver(new_var_list)
        sess.run(tf.compat.v1.global_variables_initializer())
        saver.save(sess, save_path=new_ckpt_path, write_meta_graph=False, write_state=False)


except_list = ['global_step', 'resnet_v1_50/mean_rgb', 'resnet_v1_50/logits/biases', 'resnet_v1_50/logits/weights']
#删去不需要的层结构
ckpt_path = './resnet_v1_50.ckpt'
new_ckpt_path = './pretrain_weights.ckpt'
num_classes = 6
rename_var(ckpt_path, new_ckpt_path, num_classes)

1.4.read_h5.py，读取权重文件(h5类型)使用

import h5py

f = h5py.File('./save_weights/resNet_1.h5', 'r')
for root_name, g in f.items():
    print(root_name)
    for _, weights_dirs in g.attrs.items():
        for i in weights_dirs:
            name = root_name + "/" + str(i, encoding="utf-8")
            data = f[name]
            print(data.value)

1.5.subclassed_model.py

from tensorflow.keras import layers, Model, Sequential


class BasicBlock(layers.Layer):
    expansion = 1

    def __init__(self, out_channel, strides=1, downsample=None, **kwargs):
        super(BasicBlock, self).__init__(**kwargs)
        self.conv1 = layers.Conv2D(out_channel, kernel_size=3, strides=strides,
                                   padding="SAME", use_bias=False)
        self.bn1 = layers.BatchNormalization(momentum=0.9, epsilon=1e-5)
        # -----------------------------------------
        self.conv2 = layers.Conv2D(out_channel, kernel_size=3, strides=1,
                                   padding="SAME", use_bias=False)
        self.bn2 = layers.BatchNormalization(momentum=0.9, epsilon=1e-5)
        # -----------------------------------------
        self.downsample = downsample
        self.relu = layers.ReLU()
        self.add = layers.Add()

    def call(self, inputs, training=False, **kwargs):
        identity = inputs
        if self.downsample is not None:
            identity = self.downsample(inputs)

        x = self.conv1(inputs)
        x = self.bn1(x, training=training)
        x = self.relu(x)

        x = self.conv2(x)
        x = self.bn2(x, training=training)

        x = self.add([identity, x])
        x = self.relu(x)

        return x


class Bottleneck(layers.Layer):
    expansion = 4

    def __init__(self, out_channel, strides=1, downsample=None, **kwargs):
        super(Bottleneck, self).__init__(**kwargs)
        self.conv1 = layers.Conv2D(out_channel, kernel_size=1, use_bias=False, name="conv1")
        self.bn1 = layers.BatchNormalization(momentum=0.9, epsilon=1.001e-5, name="conv1/BatchNorm")
        # -----------------------------------------
        self.conv2 = layers.Conv2D(out_channel, kernel_size=3, use_bias=False,
                                   strides=strides, padding="SAME", name="conv2")
        self.bn2 = layers.BatchNormalization(momentum=0.9, epsilon=1.001e-5, name="conv2/BatchNorm")
        # -----------------------------------------
        self.conv3 = layers.Conv2D(out_channel * self.expansion, kernel_size=1, use_bias=False, name="conv3")
        self.bn3 = layers.BatchNormalization(momentum=0.9, epsilon=1.001e-5, name="conv3/BatchNorm")
        # -----------------------------------------
        self.relu = layers.ReLU()
        self.downsample = downsample
        self.add = layers.Add()

    def call(self, inputs, training=False, **kwargs):
        identity = inputs
        if self.downsample is not None:
            identity = self.downsample(inputs)

        x = self.conv1(inputs)
        x = self.bn1(x, training=training)
        x = self.relu(x)

        x = self.conv2(x)
        x = self.bn2(x, training=training)
        x = self.relu(x)

        x = self.conv3(x)
        x = self.bn3(x, training=training)

        x = self.add([x, identity])
        x = self.relu(x)

        return x


class ResNet(Model):
    def __init__(self, block, blocks_num, num_classes=1000, include_top=True, **kwargs):
        super(ResNet, self).__init__(**kwargs)
        self.include_top = include_top
        self.conv1 = layers.Conv2D(filters=64, kernel_size=7, strides=2, padding="SAME",
                                   use_bias=False, name="conv1")
        self.bn1 = layers.BatchNormalization(momentum=0.9, epsilon=1.001e-5, name="conv1/BatchNorm")
        self.relu1 = layers.ReLU(name="relu1")
        self.maxpool1 = layers.MaxPool2D(pool_size=3, strides=2, padding="SAME", name="maxpool1")

        self.block1 = self._make_layer(block, True, 64, blocks_num[0], name="block1")
        self.block2 = self._make_layer(block, False, 128, blocks_num[1], strides=2, name="block2")
        self.block3 = self._make_layer(block, False, 256, blocks_num[2], strides=2, name="block3")
        self.block4 = self._make_layer(block, False, 512, blocks_num[3], strides=2, name="block4")

        if self.include_top:
            self.avgpool = layers.GlobalAvgPool2D(name="avgpool1")
            self.fc = layers.Dense(num_classes, name="logits")
            self.softmax = layers.Softmax()

    def call(self, inputs, training=False, **kwargs):
        x = self.conv1(inputs)
        x = self.bn1(x, training=training)
        x = self.relu1(x)
        x = self.maxpool1(x)

        x = self.block1(x, training=training)
        x = self.block2(x, training=training)
        x = self.block3(x, training=training)
        x = self.block4(x, training=training)

        if self.include_top:
            x = self.avgpool(x)
            x = self.fc(x)
            x = self.softmax(x)

        return x

    def _make_layer(self, block, first_block, channel, block_num, name=None, strides=1):
        downsample = None
        if strides != 1 or first_block is True:
            downsample = Sequential([
                layers.Conv2D(channel * block.expansion, kernel_size=1, strides=strides,
                              use_bias=False, name="conv1"),
                layers.BatchNormalization(momentum=0.9, epsilon=1.001e-5, name="BatchNorm")
            ], name="shortcut")

        layers_list = []
        layers_list.append(block(channel, downsample=downsample, strides=strides, name="unit_1"))

        for index in range(1, block_num):
            layers_list.append(block(channel, name="unit_" + str(index + 1)))

        return Sequential(layers_list, name=name)


def resnet34(num_classes=1000, include_top=True):
    block = BasicBlock
    block_num = [3, 4, 6, 3]
    return ResNet(block, block_num, num_classes, include_top)


def resnet101(num_classes=1000, include_top=True):
    block = Bottleneck
    blocks_num = [3, 4, 23, 3]
    return ResNet(block, blocks_num, num_classes, include_top)

1.6.train.py，训练使用

from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
from model import resnet50
import tensorflow as tf
import json
import os
import PIL.Image as im
import numpy as np

#data_root = os.path.abspath(os.path.join(os.getcwd(), "../.."))  # get data root path
#image_path = data_root + "/data_set/flower_data/"  # flower data set path
#data_root = os.path.abspath(os.path.join(os.getcwd(), "../.."))  # get data root path


# train_dir = image_path + "train"
# validation_dir = image_path + "test"
train_dir = "./train"
validation_dir = "./test"


im_height = 224
im_width = 224
batch_size = 16
epochs = 200

_R_MEAN = 123.68    #对应R通道，G通道，B通道的一个均值
_G_MEAN = 116.78
_B_MEAN = 103.94


def pre_function(img):
    # img = im.open('test.jpg')
    # img = np.array(img).astype(np.float32)
    img = img - [_R_MEAN, _G_MEAN, _B_MEAN]

    return img


# data generator with data augmentation
train_image_generator = ImageDataGenerator(horizontal_flip=True,
                                           preprocessing_function=pre_function)     #pre_function函数，为什么要这么做呢？因为在讲迁移学习相关概念时强调，如果使用别人的预训练模型参数，就必须和别人使用相同的预处理方法

validation_image_generator = ImageDataGenerator(preprocessing_function=pre_function)

train_data_gen = train_image_generator.flow_from_directory(directory=train_dir,
                                                           batch_size=batch_size,
                                                           shuffle=True,
                                                           target_size=(im_height, im_width),
                                                           class_mode='categorical')
total_train = train_data_gen.n

# get class dict
class_indices = train_data_gen.class_indices

# transform value and key of dict
inverse_dict = dict((val, key) for key, val in class_indices.items())
# write dict into json file
json_str = json.dumps(inverse_dict, indent=4)
with open('class_indices.json', 'w') as json_file:
    json_file.write(json_str)

val_data_gen = validation_image_generator.flow_from_directory(directory=validation_dir,
                                                              batch_size=batch_size,
                                                              shuffle=False,
                                                              target_size=(im_height, im_width),
                                                              class_mode='categorical')
# img, _ = next(train_data_gen)
total_val = val_data_gen.n

feature = resnet50(num_classes=6, include_top=False)        #num_classes=5分类类别个数是5，include_top=False就不会创建最后面的平均池化下采样层和全连接层了
# feature.build((None, 224, 224, 3))  # when using subclass model
feature.load_weights('pretrain_weights.ckpt')
feature.trainable = False       #冻结所有feature这个模型当中的训练参数；当我们将trainable设置为False时，feature的所有权重都会被冻结，训练过程中也无法再训练这些参数，training设置为True也不行
feature.summary()

model = tf.keras.Sequential([feature,                       #再添加两个全连接层，这样的话就只用训练最后两个全连接层就行了
                             tf.keras.layers.GlobalAvgPool2D(),
                             tf.keras.layers.Dropout(rate=0.5),
                             tf.keras.layers.Dense(1024),       #全连接层1
                             tf.keras.layers.Dropout(rate=0.5),
                             tf.keras.layers.Dense(6),          #全连接层2,6是分类个数
                             tf.keras.layers.Softmax()])        #最后通过softmax输出得到一个概率分布
# model.build((None, 224, 224, 3))
model.summary()

# using keras low level api for training
loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=False)
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0002)

train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.CategoricalAccuracy(name='train_accuracy')

test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.CategoricalAccuracy(name='test_accuracy')


@tf.function
def train_step(images, labels):
    with tf.GradientTape() as tape:
        output = model(images, training=True)
        loss = loss_object(labels, output)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))

    train_loss(loss)
    train_accuracy(labels, output)


@tf.function
def test_step(images, labels):
    output = model(images, training=False)
    t_loss = loss_object(labels, output)

    test_loss(t_loss)
    test_accuracy(labels, output)


best_test_loss = float('inf')
for epoch in range(1, epochs + 1):
    train_loss.reset_states()  # clear history info
    train_accuracy.reset_states()  # clear history info
    test_loss.reset_states()  # clear history info
    test_accuracy.reset_states()  # clear history info

    # train
    for step in range(total_train // batch_size):
        images, labels = next(train_data_gen)
        train_step(images, labels)

        # print train process
        rate = (step + 1) / (total_train // batch_size)
        a = "*" * int(rate * 50)
        b = "." * int((1 - rate) * 50)
        acc = train_accuracy.result().numpy()
        print("\r[{}]train acc: {:^3.0f}%[{}->{}]{:.4f}".format(epoch, int(rate * 100), a, b, acc), end="")
    print()

    # validate
    for step in range(total_val // batch_size):
        test_images, test_labels = next(val_data_gen)
        test_step(test_images, test_labels)

    template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
    print(template.format(epoch,
                          train_loss.result(),
                          train_accuracy.result() * 100,
                          test_loss.result(),
                          test_accuracy.result() * 100))
    if test_loss.result() < best_test_loss:
        best_test_loss = test_loss.result()
        model.save_weights("./save_weights/resNet_50.ckpt", save_format="tf")

整个代码的架构大约是下图这样的。其中，test和train分别对应你的验证数据集和训练数据集。

其中，rename.py是我自己写的一个简单的针对数据集重命名的代码，也一起贴出来吧。

import os
path = "E:/xxx"    # 目标路径

"""os.listdir(path) 操作效果为 返回指定路径(path)文件夹中所有文件名"""
filename_list = os.listdir(path)  # 扫描目标路径的文件,将文件名存入列表

a = 0000
for i in filename_list:
     used_name = path + os.sep + filename_list[a]
     new_name = path + os.sep + "img_" + str(a) + '.png'
     os.rename(used_name,new_name)
     print("文件%s重命名成功,新的文件名为%s" %(used_name,new_name))
     a += 1