[DeepLearning]:基于paddle的场景分类(作业)

作业内容:

1、搭建VGG神经网络模型(16 weight layers)

2、训练模型

3、验证模型

一、VGG net

 注意:

1、图中最后一层全连接层(fully neted)的1000指分类数(class_num),根据实际问题修改,本作业为5分类问题,故为5;

2、softmax层将预测概率利用指数函数映射为非负,再利用归一化方法将概率转化为0-1,且各个概率的预测总和为1,如果只想得到最大概率的索引,即分到哪一类了,则可忽略这一层;

3、VGGnet的标准输入为244x244的RGB图片,卷积核为3x3,步长为1,上图中可以看出,VGG共有5组卷积,每组的卷积层间图片尺寸不变,故padding为same或1;

4、池化层采用最大化池化,滤波器为2x2,步长为2,使得图片长宽均减半而不改变深度;

5、最后一次池化后,要将输出进行展平(paddle.nn.Flatten(); pytorch中为.view())再送给全连接层,shape的变化为 (长,宽,深度)--->(1,1,长x宽x深度)。

构建VGGnet

# 构建VGG网络
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
class VGGNet(nn.Layer):
   ## 完成此部分代码
   def __init__(self):
      super(VGGNet, self).__init__()
      self.conv1 = nn.Sequential(nn.Conv2D(3, 64, 3, 1, 1), nn.ReLU(), nn.Conv2D(64, 64, 3, 1, 1), nn.ReLU(), nn.MaxPool2D(2))
      self.conv2 = nn.Sequential(nn.Conv2D(64, 128, 3, 1, 1), nn.ReLU(), nn.Conv2D(128, 128, 3, 1, 1), nn.ReLU(), nn.MaxPool2D(2))
      self.conv3 = nn.Sequential(nn.Conv2D(128, 256, 3, 1, 1), nn.ReLU(), nn.Conv2D(256, 256, 3, 1, 1), nn.ReLU(), nn.Conv2D(256, 256, 3, 1, 1), nn.ReLU(), nn.MaxPool2D(2))
      self.conv4 = nn.Sequential(nn.Conv2D(256, 512, 3, 1, 1), nn.ReLU(), nn.Conv2D(512, 512, 3, 1, 1), nn.ReLU(), nn.Conv2D(512, 512, 3, 1, 1), nn.ReLU(), nn.MaxPool2D(2))
      self.conv5 = nn.Sequential(nn.Conv2D(512, 512, 3, 1, 1), nn.ReLU(), nn.Conv2D(512, 512, 3, 1, 1), nn.ReLU(), nn.Conv2D(512, 512, 3, 1, 1), nn.ReLU(), nn.MaxPool2D(2))

      self.flatten = nn.Flatten()

      self.linear1 =nn.Sequential( nn.Linear(in_features=7*7*512, out_features=4096), nn.ReLU())
      self.linear2 =nn.Sequential( nn.Linear(4096, 4096), nn.ReLU())
      self.output = nn.Linear(4096, 5)

   def forward(self, x):
      x = self.conv1(x)
      x = self.conv2(x)
      x = self.conv3(x)
      x = self.conv4(x)
      x = self.conv5(x)
      x = self.flatten(x)
      x = self.linear1(x)
      x = self.linear2(x)
      output = self.output(x)
      return output

module = VGGNet()
print(module)

另一种构建方式:

import paddle.nn as nn
import paddle.nn.functional as F
class ConvPool(nn.Layer):    #卷积+池化
   # 完成此部分代码
   def __init__(self):
      super(ConvPool, self).__init__()
      self.conv1_1 = nn.Conv2D(3, 64, 3, 1, 1)
      self.conv1_2 = nn.Conv2D(64, 64, 3, 1, 1)

      self.pool1 = nn.MaxPool2D(kernel_size=2, stride=2)
      
      self.conv2_1 = nn.Conv2D(64, 128, 3, 1, 1)
      self.conv2_2 = nn.Conv2D(128, 128, 3, 1, 1)

      self.pool2 = nn.MaxPool2D(2)

      self.conv3_1 = nn.Conv2D(128, 256, 3, 1, 1)
      self.conv3_2 = nn.Conv2D(256, 256, 3, 1, 1)
      self.conv3_3 = nn.Conv2D(256, 256, 3, 1, 1)

      self.pool3 = nn.MaxPool2D(2)

      self.conv4_1 = nn.Conv2D(256, 512, 3, 1, 1)
      self.conv4_2 = nn.Conv2D(512, 512, 3, 1, 1)
      self.conv4_3 = nn.Conv2D(512, 512, 3, 1, 1)

      self.pool4 = nn.MaxPool2D(2)

      self.conv5_1 = nn.Conv2D(512, 512, 3, 1, 1)
      self.conv5_2 = nn.Conv2D(512, 512, 3, 1, 1)
      self.conv5_3 = nn.Conv2D(512, 512, 3, 1, 1)

      self.pool5 = nn.MaxPool2D(2)

      self.flatten = nn.Flatten()

      self.linear1 = nn.Linear(in_features=25088, out_features=4096)
      self.linear2 = nn.Linear(4096, 4096)
      self.linear3 = nn.Linear(4096, 5)

   def forward(self, x):
      x = self.conv1_1(x)
      x = F.relu(x)
      x = self.conv1_2(x)
      x = F.relu(x)
      x = self.pool1(x)

      x = self.conv2_1(x)
      x = F.relu(x)
      x = self.conv2_2(x)
      x = F.relu(x)
      x = self.pool2(x)  

      x = self.conv3_1(x)
      x = F.relu(x)
      x = self.conv3_2(x)
      x = F.relu(x)
      x = self.conv3_3(x)
      x = F.relu(x)
      x = self.pool3(x)

      x = self.conv4_1(x)
      x = F.relu(x)
      x = self.conv4_2(x)
      x = F.relu(x)
      x = self.conv4_3(x)
      x = F.relu(x)
      x = self.pool4(x)

      x = self.conv5_1(x)
      x = F.relu(x)
      x = self.conv5_2(x)
      x = F.relu(x)
      x = self.conv5_3(x)
      x = F.relu(x)
      x = self.pool5(x)

      x = self.flatten(x)

      x = self.linear1(x)
      x = F.relu(x)
      x = self.linear2(x)
      x = F.relu(x)
      x = self.linear3(x)
      
      return x

cnn = ConvPool()
print(cnn)

二、训练模型

### 定义画曲线函数
def draw_process(title,color,iters,data,label):
    plt.title(title, fontsize=24)
    plt.xlabel('iter_num',fontsize=20)
    plt.ylabel(label,fontsize=20)
    plt.plot(iters,data,color=color,label=label)
    plt.legend()
    plt.grid()
    plt.show()

## 训练代码
model = VGGNet()  #实例化网络模型
# model = ConvPool()

###完成此部分代码
import numpy
Iters = []
total_loss = []
total_acc = []
optimizer = paddle.optimizer.Adam(learning_rate=train_parameters["learning_rate"], parameters=model.parameters())
for epoch in range(train_parameters["train_batch_size"]):
    for step, data in enumerate(train_loader()):
        x_data = data[0]
        y_data = data[1]

        prediction = model(x_data)
        loss = paddle.nn.functional.cross_entropy(prediction, y_data)
        loss.backward()
        optimizer.step()
        optimizer.clear_grad()
        
        Iters.append(step)
        total_loss.append(loss.numpy())
        correctness = paddle.metric.accuracy(prediction,y_data)
        total_acc.append(correctness.numpy()[0])

        print('Step:', step, '|| Loss: %.4f' % loss.numpy(), '|| Accuracy: %.2f' % correctness.numpy()[0])
# 保存模型参数
paddle.save(model.state_dict(), "work/checkpoints/save_dir_final.pdparams")

draw_process("Trainning Loss","red",Iters,total_loss,"Trainning Loss")
draw_process("Trainning Acc","blue",Iters,total_acc,"Trainning Acc")        

print('done!')

 注意:

1、画图所使用的数据需要为numpy类型

2、VGGnet的输出为一个1xBatchSize的tensor矩阵,包含BatchSize个预测概率([属于第一类的概率, 属于第二类的概率,...]);

3、批训练中的批次(本程序以step对应批次)默认为numpy且为int型;

4、残差计算输出loss为tensor类型,需用.numpy()进行转化;

5、paddle.metric.accuracy()能够计算某批次数个预测的正确率,输出为一个1x1的tensor,转化为numpy类型后是一个1x1的numpy矩阵,故再最后还要取[0]或者利用.sequeeze()后缀降维才能得到一个数。

6、由于使用的是免费的算力,训练一轮就要花费一个半小时左右,所以我这里只训练了一轮,故准确率不是特别好(正确率在60%浮动),但是可以到loss在下降、准确度在上升的趋势,后续如果再重新训练(训练轮数上升到十几轮)会再更新

实验结果

三、验证模型

##图像预处理
def unzip_infer_data(src_path,target_path):
    ##解压预测数据集
    if (not os.path.isdir(target_path + 'test')):
        z = zipfile.ZipFile(src_path,'r')
        z.extractall(path = target_path)
        z.close()

def load_image(image_path):
    ##预测图片预处理
    img = Image.open(image_path)
    if img.mode != 'RGB':
        img = img.convert('RGB')
    img = img.resize((224,224),Image.BILINEAR)
    img = np.array(img).astype('float32')
    img = img.transpose((2,0,1))/255  #HWC 转置为CHW及归一化
    return img

infer_src_path = "/home/aistudio/work/test.zip"
infer_dst_path = "/home/aistudio/work/test/"
unzip_infer_data(infer_src_path,infer_dst_path)

label_dict = train_parameters['label_dict']
## 读入模型参数文件
model_state_dict = paddle.load('work/checkpoints/save_dir_final.pdparams')
model_pred = VGGNet()   #实例化网络
model_pred.set_state_dict(model_state_dict)
#完成此部分代码
import os
import paddle.vision.transforms as T

class_num = {'0' : 'lawn', '1' : 'river', '2' : 'desert', '3' : 'church', '4' : 'ice'}
isExists = os.path.exists('/home/aistudio/work/test_result')
if not isExists:
    os.makedirs('/home/aistudio/work/test_result')
test_result = open('/home/aistudio/work/test_result/test_result.txt', 'w')

transform = T.ToTensor()
datanames = os.listdir(infer_dst_path)
for i in datanames:
    img_name = infer_dst_path + i
    # print(img_name)
    img_test = load_image(img_name)
    img_test_tensor = transform(img_test)
    img_test_tensor_ = paddle.unsqueeze(img_test_tensor, axis=0)
    img_test_tensor_ = img_test_tensor_.transpose((0, 2, 1, 3))
    img_pred = model_pred(img_test_tensor_)
    # print(img_pred)
    img_num = paddle.argmax(img_pred).numpy()[0]
    img_class = class_num[str(img_num)]
    print('||Name:', i, '||Class:', img_class, '||')
    test_result.write('||Name:' + i + '||Class:' + img_class + '||' + '\n')

test_result.close()

注意:

1、os.listdir(/路径)可以读取指定路径下的文件名; 

2、由于这里的测试集不经过批处理环节,无法自动转化为tensor格式以及增加BatchSize维度,所以这里手动转化格式(transform),在0号位增加一个维度;

3、出现问题:经过转换后的图片仍然放不进神经网络,发现是图片信息矩阵有误,因此使用.transpose()更改信息顺序,就可以放进网络进行预测了; 

4、由于训练轮数比较低,实验结果不尽人意,暂时先不放在博客中。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值