前言
记录一下自己学习的过程,内容主要来自于B站的一位up主,在此非常感谢他无私的奉献精神。看到他的视频请一键三连!
- up主的B站链接:https://space.bilibili.com/18161609/channel/series
- up主的csdn链接:https://blog.csdn.net/qq_37541097?type=blog
- up主的github链接:https://github.com/WZMIAOMIAO/deep-learning-for-image-processing
数据集下载
- 本次使用到的花分类数据集下载链接:http://download.tensorflow.org/example_images/flower_photos.tgz
划分数据集
对于准备划分的数据我们要求文件夹有以下文件格式。
|--your_dataset
|--calss1
|--class2
|--class3
划分后的文件呈现以下文件夹格式。
|--data
|--train
|--calss1
|--class2
|--class3
|--val
|--calss1
|--class2
|--class3
|--test
|--calss1
|--class2
|--class3
import os
from shutil import copy
import random
# 如果file不存在,创建file
def mkfile(file):
if not os.path.exists(file):
os.makedirs(file)
# 获取data文件夹下所有除.txt文件以外所有文件夹名(即需要分类的类名)
# os.listdir():用于返回指定的文件夹包含的文件或文件夹的名字的列表
file_path = r'your_dataset_path' # 你的数据集路径
pet_class = [cla for cla in os.listdir(file_path) if ".txt" not in cla]
# 创建训练集train文件夹,并由类名在其目录下创建子目录
mkfile('data/train')
for cla in pet_class:
mkfile('data/train/' + cla)
# 创建验证集val文件夹,并由类名在其目录下创建子目录
mkfile('data/val')
for cla in pet_class:
mkfile('data/val/' + cla)
# 创建测试集test文件夹,并由类名在其目录下创建子目录
mkfile('data/test')
for cla in pet_class:
mkfile('data/test/' + cla)
# 划分比例,训练集 : 验证集 : 测试集 = 6 : 2 : 6
train_rate = 0.6
val_rate = 0.2
test_rate = 0.2
for cla in pet_class:
cla_path = file_path + '/' + cla + '/'
images = os.listdir(cla_path)
num = len(images)
# 计算每个子集的数量
train_num = int(num * train_rate)
val_num = int(num * val_rate)
test_num = int(num * test_rate)
# 随机抽取图像名称
all_images = random.sample(images, k=num)
train_images = all_images[:train_num]
val_images = all_images[train_num:(train_num + val_num)]
test_images = all_images[(train_num + val_num):]
# 复制图像到相应的子集文件夹
for image in train_images:
image_path = cla_path + image
new_path = 'data/train/' + cla
copy(image_path, new_path)
for image in val_images:
image_path = cla_path + image
new_path = 'data/val/' + cla
copy(image_path, new_path)
for image in test_images:
image_path = cla_path + image
new_path = 'data/test/' + cla
copy(image_path, new_path)
print("\r[{}] processing done".format(cla))
print("\nData splitting done!")
1 AlexNet网络详解
AlexNet是2012年ILSVRC2012(ImageNet Large Scale Visual Recognition Challenge)竞赛的冠军网络,分类准确率由传统的 70%+提升到 80%+。它是由Hinton和他的学生Alex Krizhevsky设计的。也是在那年之后,深度学习开始迅速发展。
过拟合问题
Dropout方法通过网络在正向传播过程中随机失活部分神经元,减少网络训练参数,从而解决过拟合问题。
由于当时实验设备资源受限,原始AlexNet网络是在两块gpu上进行训练的。AlexNet网络主要分为11层。分别是Conv1, Maxpool1, Conv2, Maxpool2, Conv3, Conv4, Conv5, Maxpool3, FC, FC, FC 每一层的具体数据如下:
1.1 总结
2 model.py
import torch
import torch.nn as nn
class AlexNet(nn.Module):
def __init__(self, num_classes=1000, init_weights=False):
super(AlexNet, self).__init__()
self.features = nn.Sequential(
# 这里的padding操作原论文是padding为(1,2)
nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
# 这里的padding操作原论文是pdadding为(2,2)
nn.Conv2d(96, 256, kernel_size=5, stride=1, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
# 原来padding为(1,1)
nn.Conv2d(256, 384, kernel_size=3, stride=1, padding=1),
nn.ReLU(inplace=True),
# 原来padding为(1,1)
nn.Conv2d(384, 384, kernel_size=3, stride=1, padding=1),
nn.ReLU(inplace=True),
# 原来padding为(1,1)
nn.Conv2d(384, 256, kernel_size=3, stride=1, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
)
self.classifier = nn.Sequential(
nn.Dropout(p=0.5),
nn.Linear(6 * 6 * 256, 2048),
nn.ReLU(inplace=True),
nn.Linear(2048, 2048),
nn.ReLU(inplace=True),
nn.Linear(2048, num_classes),
)
if init_weights:
self._initialize_weights()
def forward(self, x):
x = self.features(x)
x = torch.flatten(x, start_dim=1)
x = self.classifier(x)
return x
# 初始化方法,pytorch内置的就有初始化,不需要我们单独去设置
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
nn.init.constant_(m.bias, 0)
3 train.py
3.1 代码
import os
import sys
import json
import torch
import torch.nn as nn
from torchvision import transforms, datasets, utils
import matplotlib.pyplot as plt
import numpy as np
import torch.optim as optim
from tqdm import tqdm
from model import AlexNet
def main():
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("using {} device.".format(device))
data_transform = {
"train": transforms.Compose([transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]),
"val": transforms.Compose([transforms.Resize((224, 224)), # cannot 224, must (224, 224)
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])}
data_root = os.path.abspath(os.path.join(os.getcwd(), "../..")) # get data root path
image_path = os.path.join(data_root, "data_set", "flower_data") # flower data set path
assert os.path.exists(image_path), "{} path does not exist.".format(image_path)
train_dataset = datasets.ImageFolder(root=os.path.join(image_path, "train"),
transform=data_transform["train"])
train_num = len(train_dataset)
# {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}
flower_list = train_dataset.class_to_idx
cla_dict = dict((val, key) for key, val in flower_list.items())
# write dict into json file
json_str = json.dumps(cla_dict, indent=4)
with open('class_indices.json', 'w') as json_file:
json_file.write(json_str)
batch_size = 32
nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers
print('Using {} dataloader workers every process'.format(nw))
train_loader = torch.utils.data.DataLoader(train_dataset,
batch_size=batch_size, shuffle=True,
num_workers=nw)
validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, "val"),
transform=data_transform["val"])
val_num = len(validate_dataset)
validate_loader = torch.utils.data.DataLoader(validate_dataset,
batch_size=4, shuffle=False,
num_workers=nw)
print("using {} images for training, {} images for validation.".format(train_num,
val_num))
# test_data_iter = iter(validate_loader)
# test_image, test_label = test_data_iter.next()
#
# def imshow(img):
# img = img / 2 + 0.5 # unnormalize
# npimg = img.numpy()
# plt.imshow(np.transpose(npimg, (1, 2, 0)))
# plt.show()
#
# print(' '.join('%5s' % cla_dict[test_label[j].item()] for j in range(4)))
# imshow(utils.make_grid(test_image))
net = AlexNet(num_classes=5, init_weights=True)
net.to(device)
loss_function = nn.CrossEntropyLoss()
# pata = list(net.parameters())
optimizer = optim.Adam(net.parameters(), lr=0.0002)
epochs = 10
save_path = './AlexNet.pth'
best_acc = 0.0
train_steps = len(train_loader)
for epoch in range(epochs):
# train
net.train()
running_loss = 0.0
train_bar = tqdm(train_loader, file=sys.stdout)
for step, data in enumerate(train_bar):
images, labels = data
optimizer.zero_grad()
outputs = net(images.to(device))
loss = loss_function(outputs, labels.to(device))
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1,
epochs,
loss)
# validate
net.eval()
acc = 0.0 # accumulate accurate number / epoch
with torch.no_grad():
val_bar = tqdm(validate_loader, file=sys.stdout)
for val_data in val_bar:
val_images, val_labels = val_data
outputs = net(val_images.to(device))
predict_y = torch.max(outputs, dim=1)[1]
acc += torch.eq(predict_y, val_labels.to(device)).sum().item()
val_accurate = acc / val_num
print('[epoch %d] train_loss: %.3f val_accuracy: %.3f' %
(epoch + 1, running_loss / train_steps, val_accurate))
if val_accurate > best_acc:
best_acc = val_accurate
torch.save(net.state_dict(), save_path)
print('Finished Training')
if __name__ == '__main__':
main()
3.2 问题
记录一下自己的一些小疑问。
经过torchvision.datasets.ImageFolder
操作之后的数据会有以下三个属性。
- classes (list): List of the class names sorted alphabetically.
- class_to_idx (dict): Dict with items (class_name, class_index).
- imgs (list): List of (image path, class_index) tuples.
下面我们通过一个小实验来验证上面三个属性。
input:
from torchvision import datasets
image = datasets.ImageFolder(root=r"E:\Dateset\flower_photos")
print('属性1{}'.format(image.classes))
print('属性2{}'.format(image.class_to_idx))
print('属性3{}'.format(image.imgs))
output(部分)
属性1 ['daisy', 'dandelion', 'flower_photos', 'roses', 'sunflowers', 'tulips']
属性2 {'daisy': 0, 'dandelion': 1, 'flower_photos': 2, 'roses': 3, 'sunflowers': 4, 'tulips': 5}
属性3 [('E:\\Dateset\\flower_photos\\daisy\\100080576_f52e8ee070_n.jpg', 0), ('E:\\Dateset\\flower_photos\\daisy\\10140303196_b88d3d6cec.jpg', 0), ('E:\\Dateset\\flower_photos\\daisy\\10172379554_b296050f82_n.jpg', 0), ('E:\\Dateset\\flower_photos\\daisy\\10172567486_2748826a8b.jpg', 0), ('E:\\Dateset\\flower_photos\\daisy\\10172636503_21bededa75_n.jpg', 0), ('E:\\Dateset\\flower_photos\\daisy\\102841525_bd6628ae3c.jpg', 0)]
4 predict
4.1 代码
import os.path
import matplotlib.pyplot as plt
import torch
from torchvision import transforms
from PIL import Image
import json
from model import AlexNet
def main():
device = torch.device("cuda:0" if torch.cuda.is_available() else"cpu")
data_transform = transforms.Compose(
[transforms.Resize((224,224)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
# 加载预测图片
img_path = ''
assert os.path.exists(img_path), "file: {} does not exist".format(img_path)
img = Image.open(img_path)
plt.imshow(img_path)
img = data_transform(img)
# 添加一个batch维度
img = torch.unsqueeze(img, dim=0)
# read class_indict
json_path = './class_indices.json'
assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)
with open(json_path, "r") as f:
class_indict = json.load(f)
model = AlexNet(num_classes=5).to(device)
# load model weights
weights_path = "./AlexNet.pth"
assert os.path.exists(weights_path), "file: '{}' dose not exist.".format(weights_path)
model.load_state_dict(torch.load(weights_path))
model.eval()
with torch.no_grad():
# predict class
output = torch.squeeze(model(img.to(device))).cpu()
predict = torch.softmax(output, dim=0)
predict_cla = torch.argmax(predict).numpy()
print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_cla)],
predict[predict_cla].numpy())
plt.title(print_res)
for i in range(len(predict)):
print("class: {:10} prob: {:.3}".format(class_indict[str(i)],
predict[i].numpy()))
plt.show()
if __name__ == '__main__':
main()
4.2 问题
torch.squeeze()
torch.squeeze(input, dim=None)
,该函数总共有两个参数,维度dim
可以传入int
整型获取tuple
元组类型。对于不指定dim
的输出将会删除所有输入大小为1的维度。
看一下官网的例子更加容易明白。
>>> x = torch.zeros(2, 1, 2, 1, 2)
>>> x.size()
torch.Size([2, 1, 2, 1, 2])
>>> y = torch.squeeze(x)
>>> y.size()
torch.Size([2, 2, 2])
>>> y = torch.squeeze(x, 0)
>>> y.size()
torch.Size([2, 1, 2, 1, 2])
>>> y = torch.squeeze(x, 1)
>>> y.size()
torch.Size([2, 2, 1, 2])
>>> y = torch.squeeze(x, (1, 2, 3))
torch.Size([2, 2, 2])
torch.unsqueeze
torch.unsqueeze起到升维的作用。
看一下具体的例子就可以明白。
a = torch.randn(2,3)
>>>a.shape
torch.Size([2, 3])
>>>b = torch.unsqueeze(a, dim=0)
torch.Size([1, 2, 3])
>>>c = torch.unsqueeze(a, dim=1)
torch.Size([1, 2, 3])
>>> d = torch.unsqueeze(a, dim=2)
torch.Size([2, 3, 1])