1.使用自制数据集读取图
对于分类网络,在其Dataset下包括N个文件夹,每个文件夹为一类,在没有使用使用自制数据集读取图片之前,我们使用了 datasets.ImageFolder数据进行读取, ImageFolder 的功能就是将root下所有文件和其类别(文件对应的文件夹索引),如{‘DOG’:0, ‘CAT’:1, ‘rabbit’:2}
主要步骤:
step1:构建数据集
step2:读取数据集
1)直接利用ImageFolder
ToTensor 实现两个功能, 一个是维度的转变(HWC->CHW),二是实现标准化 [0,255]->[0,1]\
import torch
from torchvision import datasets
from torch.utils.data import DataLoader
from torchvision.transforms import transforms
data_transform = transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))] #tensor数据标准化 均值mean和标准差std)
train_dir = config["train"]["datapath"]
train_dataset = datasets.ImageFolder(root=train_dir, transform=data_transform)
train_loader = DataLoader(dataset=train_dataset,
batch_size=8,
shuffle=True,
num_workers=0)
Image Folder的具体作用,构建文件名和文件夹(分类)的对应关系,其返回值是图片的位置与其对应分类,此部分的解释参考处 点击这里
import torchvision.datasets as dset
dataset = dset.ImageFolder('./data/dogcat_2') #没有transform,先看看取得的原始图像数据
print(dataset.classes) #根据分的文件夹的名字来确定的类别
print(dataset.class_to_idx) #按顺序为这些类别定义索引为0,1...
print(dataset.imgs) #返回从所有文件夹中得到的图片的路径以及其类别
返回
['cat', 'dog']
{'cat': 0, 'dog': 1}
[('./data/dogcat_2/cat/cat.12484.jpg', 0), ('./data/dogcat_2/cat/cat.12485.jpg', 0), ('./data/dogcat_2/cat/cat.12486.jpg', 0), ('./data/dogcat_2/cat/cat.12487.jpg', 0), ('./data/dogcat_2/dog/dog.12496.jpg', 1), ('./data/dogcat_2/dog/dog.12497.jpg', 1), ('./data/dogcat_2/dog/dog.12498.jpg', 1), ('./data/dogcat_2/dog/dog.12499.jpg', 1)]
2)自制dataset
from PIL import Image
import torch
from torch.utils.data import Dataset
import os
import glob
import json
from matplotlib import pyplot as plt
class Mydataset(Dataset):
def __init__(self,dataset_folder, transform=None):
self.dataset_folder = dataset_folder
self.images_path = []
self.label_list = []
self.transform = transform
_class = [cla for cla in os.listdir(dataset_folder) if os.path.isdir(os.path.join(dataset_folder, cla))]
# 排序,保证顺序一致
_class.sort()
# 生成类别名称以及对应的数字索引
class_indices = dict((k, v) for v, k in enumerate(_class))
json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4)
with open('class_indices.json', 'w') as json_file:
json_file.write(json_str)
for root, dirs, files in os.walk(dataset_folder):
for dir in dirs:
files_list = glob.glob(os.path.join(root, dir, '*png'))
# img_list.sort(key=lambda x: str(x))
for i in range(len(files_list)):
# print(files_list[i])
# print(dir)
self.images_path.append(files_list[i])
image_class = class_indices[dir]
self.label_list.append(image_class)
def __len__(self):
return len(self.images_path)
def __getitem__(self, item):
img = Image.open(self.images_path[item]).convert('L') #(W,H)->(512,512)
# print(img.size)#此时还是图片
# print(img)
if img.mode != 'L':
raise ValueError("image:{} is not RGB mode".format(self.images_path[item]))
label = self.label_list[item]
# print(self.images_path[item],"aaaaaaa",self.images_class[item])
if self.transform is not None:
img = self.transform(img) #图片转Tensor
print(img.shape)
return img, label
#自定义打包方式
def collate_fn(batch):
# *batch表示可以接收任意多的元素数,将他们打包成{images:labels}再将这些放进元组中
images,labels = tuple(zip(*batch))
# torch.stack:把多个2维的张量凑成一个3维的张量;多个3维的凑成一个4维的张量…以此类推,也就是在增加新的维度进行堆叠。
images = torch.stack(images,dim=0) #将一个batch 里面的图片进行拼接 (1,512,512)->(8,1,512,512)
labels = torch.as_tensor(labels) #将label转为Tensor
return images, labels
此时可以直接调用Mydataset来处理
opencv读取图片时,读取后转为numpy数组的格式是(H,W,C).
torch将图片转为Tensor,读取格式为(C,H,W)\
from my_dataset import Mydataset
import torch
from torch.utils.data import DataLoader
from torchvision.transforms import transforms
import yaml
with open('config.yaml', 'r', encoding='utf-8') as f_config:
config_result = f_config.read()
config = yaml.load(config_result, Loader=yaml.FullLoader)
dataset=config["train"]["dataset_folder"]
data_transform=transforms.Compose([
transforms.Resize(512),
transforms.ToTensor(),
transforms.Normalize([0.5],[0.5])
])
train_dataset = Mydataset(dataset_folder=dataset,
transform=data_transform)
train_loader = DataLoader(dataset=train_dataset,
batch_size=8,
shuffle=True,
num_workers=0,
collate_fn=train_dataset.collate_fn)
for step, data in enumerate(train_loader):
imgs,labels = data
# print(labels)
print(imgs.shape)
把两个tensor的shape打印出来,结果如下
2.网络的搭建
- 网络的主体应该分为两个部分一个是网络,一个是网络的传播路径forward
- 初始化网络,分为两类,一类是随机初始化,一类是加载预训练模型.
随机初始化
#自定义的随机初始化,# 1. 根据网络层的不同定义不同的初始化方式\
#一般需要初始化的层数包括卷积层、BN层、linear层
def _initialize_weights(self):
#m 为module里面的所有层数,一般需要初始化的
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0]*m.kernel_size[1]*m.outchannels
m.weight.data.normal_(0, math.sqrt(2./n))
if m.bias is not None:
m.bias,data.zero_()
elif isinstance(m, nn.BatchNormal2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
elif isinstance(m, nn.Linear):
m.weight.data.normal_(0, 0.01)
m.bias.data.zero_()
#定义在模型中,使用kaiming initialization, pytorch常用
for m in self.modules():
if isinstance(m, nn.Conv2d):
#“fan_in”保留前向传播时权值方差的量级,“fan_out”保留反向传播时的量级
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
#对于BN层的初始化为权重为1,偏置为0
elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
网络结构,意思一下
import torch
from torch import nn
class YourNet(nn.Module):
def __init__(self): #def __init__(self, num_class):
super(YourNet, self).__init__()
self.conv1 = nn.Conv2d(in_channels=3, out_channels=8, kernel_size=7, stride=1, padding=1)
#dengdeng
def forward(self, x):
x1 = self.conv1(x)
return x1
以LeNet为例
模型结构图
import torch
from torch import nn
from torchsummary import summary
class MyLeNet(nn.Module):
def __init__(self):
super(MyLeNet, self).__init__()
self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, padding=2)
self.Sigmoid = nn.Sigmoid()
self.avgpool = nn.AvgPool2d(kernel_size=2,stride=2)
self.conv2 = nn.Conv2d(in_channels=6, out_channels=16,kernel_size=5)
self.faltten = nn.Flatten()
self.linear1 = nn.Linear(16*5*5,84)
self.output = nn.Linear(84,10)
def forward(self, x): # input (1,28,28)
# x = x.view(-1,1,28,28)
x = self.conv1(x) #(1,28,28)-> (6,28,28)
x = self.Sigmoid(x)
x = self.avgpool(x) #(6,28,28)-> (6,14,14)
x = self.conv2(x) #(6,14,14) -> (16,10,10)
x = self.Sigmoid(x)
x = self.avgpool(x) # (16,10,10)->(16,5,5)
x = self.faltten(x) (16,5,5) -> (16*5*5)
x = self.linear1(x) #(1,16*5*5)-> (84)
x = self.output(x) # 10
return x
if __name__ == "__main__":
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = MyLeNet().to(device)
summary(model,(1,28,28))
函数 summary(model, input_size)可以打印模型的层数参数量以及每一层的输出shape,返回的效果
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [-1, 6, 28, 28] 156
Sigmoid-2 [-1, 6, 28, 28] 0
AvgPool2d-3 [-1, 6, 14, 14] 0
Conv2d-4 [-1, 16, 10, 10] 2,416
Sigmoid-5 [-1, 16, 10, 10] 0
AvgPool2d-6 [-1, 16, 5, 5] 0
Flatten-7 [-1, 400] 0
Linear-8 [-1, 84] 33,684
Linear-9 [-1, 10] 850
================================================================
Total params: 37,106
Trainable params: 37,106
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.11
Params size (MB): 0.14
Estimated Total Size (MB): 0.26
----------------------------------------------------------------
根据图像大小(nh,nw),padding, kenerl_size,以及stride 计算卷积后的大小公式