Pytorch学习(六)

利用PyTorch完成Cifar10图像分类

分类网络的基本结构

在这里插入图片描述

  • 先加载数据,然后将这个数据进行重组,组合成我们神经网络所需要的数据的形式(数据预处理/数据增强)。
  • 我们将数据丢到一个CNN网络中进行特征的提取。在特征提取完之后我们会得到一个N维的向量,而这N个向量就表示我们想要分的N个类别。
  • 通过一个损失函数来计算网络当前的损失,通过这个损失对网络进行反向传播,从而对参数进行调整。在进行BP的时候我们就需要定义我们网络的优化器,如梯度下降法等来完成对网络参数的迭代。迭代到模型收敛的时候,即loss很小或是loss几乎不变的时候,参数就训练完成了。
  • 有了这些参数,就可以构建一个网络。从而对我们接下来输入数据的一个推理。
  • 训练的过程是求解网络参数的过程
  • 推理的过程是已知网络结构,并且经过训练过程我们已经拿到网络参数,这个时候我们就构建出了前向推理的函数。利用这个函数,给定其输入数据x,就可以计算出预测的结果。
数据加载模块

在这里插入图片描述

  • 在pytorch中channel是优先排列的,我们需要将数据转换成 channel * h * w的数据
数据增强
  • 是一种将已有的数据进行一种扩充的手段,为了解决数据集不足的问题
  • 常见的一些数据增强的方法:进行一些随机的翻转/旋转 / 修改图像的亮度 饱和度 对比度等
  • 我们使用的是pytorch下的transformer类
    在这里插入图片描述
网络结构

在这里插入图片描述

类别概率分布
  • N维度向量对应N个类别
  • 采用FC层(对图像尺寸敏感),将其拉成一个向量。也可以使用卷积层(对图像尺寸敏感)或者是pooling层(pooling层无参数,所以对shape不敏感)
  • 通过一个softmax函数,将这N维向量映射到概率分布上面去 S i = e i / ∑ j e j S_i = e^i / \sum_j e^j Si=ei/jej
损失
  • 使用交叉熵损失 nn.CrossEntropyLoss
  • 在分类问题中需要定义label。从标签转化为向量的是One-hot编码
  • [1,0,0]第一类 [0,1,0] 第二类 但是这种方式太硬了,所以需要label smoothing在这里插入图片描述
分类问题常用指标

在这里插入图片描述

  • PR曲线 ROC曲线 AUC曲线
    在这里插入图片描述
优化器
  • 推荐使用Adam,学习率衰减采用指数衰减
    在这里插入图片描述

cifar10数据介绍——读取——处理

cifar10数据介绍
  • cifar10是完成一个10分类,cifar100是完成一个100分类
  • cifar10数据集中一共有六万张图片,其中五万张是训练集(每个类别有5000张图片),一万张是测试集。图片大小是32*32
  • 数据集获取地址
cifar10数据的读取与保存
import os
import cv2
import numpy as  np
import pickle 

def unpickle(file):
	with open(file, 'rb') as fo:
		dict = pickle.load(fo, encoding='bytes')
	return dict

label_name = ["airplane",										
			"automobile",									
			"bird",										
			"cat",										
			"deer",										
			"dog",										
			"frog",										
			"horse",										
			"ship",										
			"truck"]


import glob
train_list = glob.glob("cifar-10-batches/data_batch_*")
test_list = glob.glob("cifar-10-batches/test_batch")
save_path = "cifar-10-batches/train"
test_path = "cifar-10-batches/test"
# for l in train_list:
# 	l_dict = unpickle(l)
# 	#print( l_dict.keys() )
# 	for im_idx, im_data in enumerate(l_dict[b'data']):
# 		#print(im_idx)
# 		#print(im_data) #这里显示的是一个向量,所以我们需要reshape一下 3*32*32

# 		im_label = l_dict[b'labels'][im_idx]
# 		im_name = l_dict[b'filenames'][im_idx] 

# 		#print(im_label, im_name, im_data)
# 		im_label_name = label_name[im_label]
# 		im_data = np.reshape(im_data, [3,32,32])
# 		im_data = np.transpose(im_data, (1,2,0))

# 		#cv2.imshow("im_data", cv2.resize(im_data, (200,200) ) )
# 		#cv2.waitKey(0)

# 		if not os.path.exists("{}/{}".format(save_path, im_label_name)):
# 			os.mkdir("{}/{}".format(save_path, im_label_name))

# 		cv2.imwrite("{}/{}/{}".format(save_path, im_label_name, im_name.decode("utf-8")), im_data)



for l in test_list:
	l_dict = unpickle(l)
	#print( l_dict.keys() )
	for im_idx, im_data in enumerate(l_dict[b'data']):
		#print(im_idx)
		#print(im_data) #这里显示的是一个向量,所以我们需要reshape一下 3*32*32

		im_label = l_dict[b'labels'][im_idx]
		im_name = l_dict[b'filenames'][im_idx] 

		#print(im_label, im_name, im_data)
		im_label_name = label_name[im_label]
		im_data = np.reshape(im_data, [3,32,32])
		im_data = np.transpose(im_data, (1,2,0))

		#cv2.imshow("im_data", cv2.resize(im_data, (200,200) ) )
		#cv2.waitKey(0)

		if not os.path.exists("{}/{}".format(test_path, im_label_name)):
			os.mkdir("{}/{}".format(test_path, im_label_name))

		cv2.imwrite("{}/{}/{}".format(test_path, im_label_name, im_name.decode("utf-8")), im_data)
cifar10数据的加载与处理
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
import os 
from PIL import Image
import numpy as np
import glob

label_name = ["airplane", "automobile", "bird", "cat",
			"deer","dog","frog","horse","ship","truck"]

label_dict = {}

for idx, name in enumerate(label_name):
	label_dict[name] = idx

def default_loader(path):
	return Image.open(path).covert("RGB")

train_transform = transforms.Compose([
	transforms.RandomResizedCrop((28,28)),
	transforms.RandomHorizontalFlip(),
	transforms.RandomVerticalFlip(),
	transforms.RandomRotation(90),
	transforms.RandomGrayscale(0.1),
	transforms.ColorJitter(0.3, 0.3, 0.3, 0.3),
	transforms.ToTensor()
])

class MyDataset(Dataset):
	def __init__(self, im_list, transform = None, loader = default_loader):
		super(MyDataset,self).__init__()
		imgs = []

		for im_item in im_list:
			im_label_name = im_item.split("/")[-2]
			imgs.append([im_item, label_dict[im_label_name]])
		
		self.imgs = imgs
		self.transform = transform
		self.loader = loader


	# 定义对数据的读取和对数据的增强,然后返回图片的数据和label
	def __getitem__(self, index):
		im_path, im_label = self.imgs[index]

		im_data = self.loader(im_path)

		if self.transform is not None:
			im_data = self.transform(im_data)

		return im_data, im_label


	def __len__(self):
		return len(self.imgs)


im_train_list = glob.glob("cifar-10-batches/train/*/*.png")
im_test_list = glob.glob("cifar-10-batches/test/*/*.png")

train_dataset = MyDataset( im_train_list, transform = train_transform )
test_dataset = MyDataset( im_test_list, transform = transforms.ToTensor() )


train_data_loader = DataLoader(dataset = train_dataset, batch_size = 66, shuffle = True, num_workers = 4)
test_data_loader = DataLoader(dataset = test_dataset, batch_size = 66, shuffle = False, num_workers = 4)

print("num_of_train", len(train_dataset))
print("num_of_test", len(test_dataset))
使用VGGNet实现cifar10分类

网络定义的程序

import torch
import torch.nn as nn
import torch.nn.functional as F

class VGGbase(nn.Module):
	def __init__(self):
		super(VGGbase, self).__init__()

		self.conv1 = nn.Sequrntial(
			nn.Conv2d(3, 64, kernal_size = 3, stride = 1, padding = 1),
			nn.BatchNorm2d(64),
			nn.ReLU()
		)
		self.max_pooling1 = nn.MaxPool2d(kernel_size = 2, stride = 2)

		self.conv2_1 = nn.Sequrntial(
			nn.Conv2d(64, 128, kernal_size = 3, stride = 1, padding = 1),
			nn.BatchNorm2d(128),
			nn.ReLU()
		)

		self.conv2_2 = nn.Sequrntial(
			nn.Conv2d(128, 128, kernal_size = 3, stride = 1, padding = 1),
			nn.BatchNorm2d(128),
			nn.ReLU()
		)

		self.max_pooling2 = nn.MaxPool2d(kernel_size = 2, stride = 2)

		self.conv3_1 = nn.Sequrntial(
			nn.Conv2d(128, 256, kernal_size = 3, stride = 1, padding = 1),
			nn.BatchNorm2d(256),
			nn.ReLU()
		)

		self.conv3_2 = nn.Sequrntial(
			nn.Conv2d(256, 256, kernal_size = 3, stride = 1, padding = 1),
			nn.BatchNorm2d(256),
			nn.ReLU()
		)

		self.max_pooling3 = nn.MaxPool2d(kernel_size = 2, stride = 2, padding=1)


		self.conv4_1 = nn.Sequrntial(
			nn.Conv2d(256, 512, kernal_size = 3, stride = 1, padding = 1),
			nn.BatchNorm2d(256),
			nn.ReLU()
		)

		self.conv4_2 = nn.Sequrntial(
			nn.Conv2d(512, 512, kernal_size = 3, stride = 1, padding = 1),
			nn.BatchNorm2d(512),
			nn.ReLU()
		)

		self.max_pooling3 = nn.MaxPool2d(kernel_size = 2, stride = 2)

		#batchsize * 512 * 2 * 2 -- > batchsize * (512 * 4) 
		self.fc = nn.Linear( 512 * 4, 10 )
		
	def forward(self, x):
		batchsize = x.size(0)

		out = self.conv1(x)
		out = self.max_pooling1(out)

		out = self.conv2_1(out)
		out = self.conv2_2(out)
		out = self.max_pooling2(out)

		out = self.conv3_1(out)
		out = self.conv3_2(out)
		out = self.max_pooling3(out)

		out = self.conv4_1(out)
		out = self.conv4_2(out)
		out = self.max_pooling4(out)

		out = out.view(batchsize, -1)

		out = self.fc(out)
		out = F.log_sofemax(out, dim=1)
		return  out
def VGGNet():
	return VGGbase()

训练代码

import torch
import torch.nn as nn
import torchvision
from vggnet import VGGNet
from load_cifar10 import train_loader, test_loader
import os
import tensorboardX #学着使用tensorboard


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

epoch_num = 200
lr = 0.01
batch_size = 128

net = VGGNet().to(device)

#loss

loss_func = nn.CrossEntropyLoss()

#optimizer

optimizer = torch.optim.Adam( net.parameters(), lr = lr )

scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.9)

if not os.path.exists("log"):
		os.mkdir("log")
writer = tensorboardX.SummaryWriter("log")

step_n = 0

for epoch in range(epoch_num):
	print("epoch is", epoch)
	net.train() #train BN dropout

	for i, data in enumerate(train_loader):
		
		inputs, labels = data
		inputs, labels = inputs.to(device), labels.to(device)

		outputs = net(inputs)
		loss = loss_func(outputs, labels)

		optimizer.zero_grad()
		loss.backward()
		optimizer.step()

		_, pred = torch.max(outputs.data, dim=1)
		correct = pred.eq(labels.data).cpu().sum()

		print("step", i, "loss is:", loss.item(),
			"mini-batch correct is:", 100.0 * correct / batch_size)

		writer.add_scalar("train loss", loss.item(),global_step = step_n)
		writer.add_scalar("train correct", 100.0 * correct.item() / batch_size,global_step = step_n)
		
		step_n += 1

	if not os.path.exists("models"):
		os.mkdir("models")

	torch.save(net.state_dict(), "models/{}.pth".format(epoch + 1))	
	secheduler.step()

	print("lr is ", optimizer.state_dict()["param_groups"][0]["lr"])


	#测试脚本
	sum_loss = 0
	sum_correct = 0
	for i, data in enumerate(test_loader):
		net.eval()
		inputs, labels = data
		inputs, labels = inputs.to(device), labels.to(device)

		outputs = net(inputs)
		loss = loss_func(outputs, labels)
		_, pred = torch.max(outputs.data, dim=1)
		correct = pred.eq(labels.data).cpu().sum()

		sum_loss += loss.item()
		sum_correct += correct.item()

		writer.add_scalar("test loss", loss.item(),global_step = step_n)
		writer.add_scalar("test correct", 100.0 * correct.item() / batch_size,global_step = step_n)

		#计算每个batch平均的loss和correct

	test_loss = sum_loss * 1.0 / len(test_loader)
	test_correct = sum_correct * 100.0 / len(test_loader) / batch_size

	print("epoch is", epoch + 1, "loss is:", test_loss,
			"test correct is:", test_correct)

writer.close()
  • 0
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 2
    评论
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

五月的天气

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值