[DeepLearning]:基于paddle的场景分类(作业)

人形灭睡机

已于 2022-04-19 20:32:20 修改

阅读量2.1k

点赞数 1

文章标签： paddle

于 2022-04-13 18:55:16 首次发布

本文链接：https://blog.csdn.net/qq_56297952/article/details/124143060

版权

作业内容：

1、搭建VGG神经网络模型(16 weight layers)

2、训练模型

3、验证模型

一、VGG net

注意：

1、图中最后一层全连接层(fully neted)的1000指分类数(class_num)，根据实际问题修改，本作业为5分类问题，故为5；

2、softmax层将预测概率利用指数函数映射为非负，再利用归一化方法将概率转化为0-1，且各个概率的预测总和为1，如果只想得到最大概率的索引，即分到哪一类了，则可忽略这一层；

3、VGGnet的标准输入为244x244的RGB图片，卷积核为3x3，步长为1，上图中可以看出，VGG共有5组卷积，每组的卷积层间图片尺寸不变，故padding为same或1；

4、池化层采用最大化池化，滤波器为2x2，步长为2，使得图片长宽均减半而不改变深度；

5、最后一次池化后，要将输出进行展平(paddle.nn.Flatten(); pytorch中为.view())再送给全连接层，shape的变化为 (长,宽,深度)--->(1,1,长x宽x深度)。

构建VGGnet

# 构建VGG网络
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
class VGGNet(nn.Layer):
   ## 完成此部分代码
   def __init__(self):
      super(VGGNet, self).__init__()
      self.conv1 = nn.Sequential(nn.Conv2D(3, 64, 3, 1, 1), nn.ReLU(), nn.Conv2D(64, 64, 3, 1, 1), nn.ReLU(), nn.MaxPool2D(2))
      self.conv2 = nn.Sequential(nn.Conv2D(64, 128, 3, 1, 1), nn.ReLU(), nn.Conv2D(128, 128, 3, 1, 1), nn.ReLU(), nn.MaxPool2D(2))
      self.conv3 = nn.Sequential(nn.Conv2D(128, 256, 3, 1, 1), nn.ReLU(), nn.Conv2D(256, 256, 3, 1, 1), nn.ReLU(), nn.Conv2D(256, 256, 3, 1, 1), nn.ReLU(), nn.MaxPool2D(2))
      self.conv4 = nn.Sequential(nn.Conv2D(256, 512, 3, 1, 1), nn.ReLU(), nn.Conv2D(512, 512, 3, 1, 1), nn.ReLU(), nn.Conv2D(512, 512, 3, 1, 1), nn.ReLU(), nn.MaxPool2D(2))
      self.conv5 = nn.Sequential(nn.Conv2D(512, 512, 3, 1, 1), nn.ReLU(), nn.Conv2D(512, 512, 3, 1, 1), nn.ReLU(), nn.Conv2D(512, 512, 3, 1, 1), nn.ReLU(), nn.MaxPool2D(2))

      self.flatten = nn.Flatten()

      self.linear1 =nn.Sequential( nn.Linear(in_features=7*7*512, out_features=4096), nn.ReLU())
      self.linear2 =nn.Sequential( nn.Linear(4096, 4096), nn.ReLU())
      self.output = nn.Linear(4096, 5)

   def forward(self, x):
      x = self.conv1(x)
      x = self.conv2(x)
      x = self.conv3(x)
      x = self.conv4(x)
      x = self.conv5(x)
      x = self.flatten(x)
      x = self.linear1(x)
      x = self.linear2(x)
      output = self.output(x)
      return output

module = VGGNet()
print(module)

另一种构建方式：

import paddle.nn as nn
import paddle.nn.functional as F
class ConvPool(nn.Layer):    #卷积+池化
   # 完成此部分代码
   def __init__(self):
      super(ConvPool, self).__init__()
      self.conv1_1 = nn.Conv2D(3, 64, 3, 1, 1)
      self.conv1_2 = nn.Conv2D(64, 64, 3, 1, 1)

      self.pool1 = nn.MaxPool2D(kernel_size=2, stride=2)
      
      self.conv2_1 = nn.Conv2D(64, 128, 3, 1, 1)
      self.conv2_2 = nn.Conv2D(128, 128, 3, 1, 1)

      self.pool2 = nn.MaxPool2D(2)

      self.conv3_1 = nn.Conv2D(128, 256, 3, 1, 1)
      self.conv3_2 = nn.Conv2D(256, 256, 3, 1, 1)
      self.conv3_3 = nn.Conv2D(256, 256, 3, 1, 1)

      self.pool3 = nn.MaxPool2D(2)

      self.conv4_1 = nn.Conv2D(256, 512, 3, 1, 1)
      self.conv4_2 = nn.Conv2D(512, 512, 3, 1, 1)
      self.conv4_3 = nn.Conv2D(512, 512, 3, 1, 1)

      self.pool4 = nn.MaxPool2D(2)

      self.conv5_1 = nn.Conv2D(512, 512, 3, 1, 1)
      self.conv5_2 = nn.Conv2D(512, 512, 3, 1, 1)
      self.conv5_3 = nn.Conv2D(512, 512, 3, 1, 1)

      self.pool5 = nn.MaxPool2D(2)

      self.flatten = nn.Flatten()

      self.linear1 = nn.Linear(in_features=25088, out_features=4096)
      self.linear2 = nn.Linear(4096, 4096)
      self.linear3 = nn.Linear(4096, 5)

   def forward(self, x):
      x = self.conv1_1(x)
      x = F.relu(x)
      x = self.conv1_2(x)
      x = F.relu(x)
      x = self.pool1(x)

      x = self.conv2_1(x)
      x = F.relu(x)
      x = self.conv2_2(x)
      x = F.relu(x)
      x = self.pool2(x)  

      x = self.conv3_1(x)
      x = F.relu(x)
      x = self.conv3_2(x)
      x = F.relu(x)
      x = self.conv3_3(x)
      x = F.relu(x)
      x = self.pool3(x)

      x = self.conv4_1(x)
      x = F.relu(x)
      x = self.conv4_2(x)
      x = F.relu(x)
      x = self.conv4_3(x)
      x = F.relu(x)
      x = self.pool4(x)

      x = self.conv5_1(x)
      x = F.relu(x)
      x = self.conv5_2(x)
      x = F.relu(x)
      x = self.conv5_3(x)
      x = F.relu(x)
      x = self.pool5(x)

      x = self.flatten(x)

      x = self.linear1(x)
      x = F.relu(x)
      x = self.linear2(x)
      x = F.relu(x)
      x = self.linear3(x)
      
      return x

cnn = ConvPool()
print(cnn)

二、训练模型

### 定义画曲线函数
def draw_process(title,color,iters,data,label):
    plt.title(title, fontsize=24)
    plt.xlabel('iter_num',fontsize=20)
    plt.ylabel(label,fontsize=20)
    plt.plot(iters,data,color=color,label=label)
    plt.legend()
    plt.grid()
    plt.show()

## 训练代码
model = VGGNet()  #实例化网络模型
# model = ConvPool()

###完成此部分代码
import numpy
Iters = []
total_loss = []
total_acc = []
optimizer = paddle.optimizer.Adam(learning_rate=train_parameters["learning_rate"], parameters=model.parameters())
for epoch in range(train_parameters["train_batch_size"]):
    for step, data in enumerate(train_loader()):
        x_data = data[0]
        y_data = data[1]

        prediction = model(x_data)
        loss = paddle.nn.functional.cross_entropy(prediction, y_data)
        loss.backward()
        optimizer.step()
        optimizer.clear_grad()
        
        Iters.append(step)
        total_loss.append(loss.numpy())
        correctness = paddle.metric.accuracy(prediction,y_data)
        total_acc.append(correctness.numpy()[0])

        print('Step:', step, '|| Loss: %.4f' % loss.numpy(), '|| Accuracy: %.2f' % correctness.numpy()[0])
# 保存模型参数
paddle.save(model.state_dict(), "work/checkpoints/save_dir_final.pdparams")

draw_process("Trainning Loss","red",Iters,total_loss,"Trainning Loss")
draw_process("Trainning Acc","blue",Iters,total_acc,"Trainning Acc")        

print('done!')

注意：

1、画图所使用的数据需要为numpy类型

2、VGGnet的输出为一个1xBatchSize的tensor矩阵，包含BatchSize个预测概率([属于第一类的概率，属于第二类的概率，...]);

3、批训练中的批次(本程序以step对应批次)默认为numpy且为int型；

4、残差计算输出loss为tensor类型，需用.numpy()进行转化；

5、paddle.metric.accuracy()能够计算某批次数个预测的正确率，输出为一个1x1的tensor，转化为numpy类型后是一个1x1的numpy矩阵，故再最后还要取[0]或者利用.sequeeze()后缀降维才能得到一个数。

6、由于使用的是免费的算力，训练一轮就要花费一个半小时左右，所以我这里只训练了一轮，故准确率不是特别好(正确率在60%浮动)，但是可以到loss在下降、准确度在上升的趋势，后续如果再重新训练（训练轮数上升到十几轮）会再更新。

实验结果

三、验证模型

##图像预处理
def unzip_infer_data(src_path,target_path):
    ##解压预测数据集
    if (not os.path.isdir(target_path + 'test')):
        z = zipfile.ZipFile(src_path,'r')
        z.extractall(path = target_path)
        z.close()

def load_image(image_path):
    ##预测图片预处理
    img = Image.open(image_path)
    if img.mode != 'RGB':
        img = img.convert('RGB')
    img = img.resize((224,224),Image.BILINEAR)
    img = np.array(img).astype('float32')
    img = img.transpose((2,0,1))/255  #HWC 转置为CHW及归一化
    return img

infer_src_path = "/home/aistudio/work/test.zip"
infer_dst_path = "/home/aistudio/work/test/"
unzip_infer_data(infer_src_path,infer_dst_path)

label_dict = train_parameters['label_dict']

## 读入模型参数文件
model_state_dict = paddle.load('work/checkpoints/save_dir_final.pdparams')
model_pred = VGGNet()   #实例化网络
model_pred.set_state_dict(model_state_dict)
#完成此部分代码
import os
import paddle.vision.transforms as T

class_num = {'0' : 'lawn', '1' : 'river', '2' : 'desert', '3' : 'church', '4' : 'ice'}
isExists = os.path.exists('/home/aistudio/work/test_result')
if not isExists:
    os.makedirs('/home/aistudio/work/test_result')
test_result = open('/home/aistudio/work/test_result/test_result.txt', 'w')

transform = T.ToTensor()
datanames = os.listdir(infer_dst_path)
for i in datanames:
    img_name = infer_dst_path + i
    # print(img_name)
    img_test = load_image(img_name)
    img_test_tensor = transform(img_test)
    img_test_tensor_ = paddle.unsqueeze(img_test_tensor, axis=0)
    img_test_tensor_ = img_test_tensor_.transpose((0, 2, 1, 3))
    img_pred = model_pred(img_test_tensor_)
    # print(img_pred)
    img_num = paddle.argmax(img_pred).numpy()[0]
    img_class = class_num[str(img_num)]
    print('||Name:', i, '||Class:', img_class, '||')
    test_result.write('||Name:' + i + '||Class:' + img_class + '||' + '\n')

test_result.close()

注意：

1、os.listdir(/路径)可以读取指定路径下的文件名；

2、由于这里的测试集不经过批处理环节，无法自动转化为tensor格式以及增加BatchSize维度，所以这里手动转化格式(transform)，在0号位增加一个维度；

3、出现问题：经过转换后的图片仍然放不进神经网络，发现是图片信息矩阵有误，因此使用.transpose()更改信息顺序，就可以放进网络进行预测了；

4、由于训练轮数比较低，实验结果不尽人意，暂时先不放在博客中。

人形灭睡机

关注

1
点赞
踩
5

收藏

觉得还不错? 一键收藏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫