以AlexNet为例,添加一些PyTorch函数的整理。
torch.utils.data.DataLoader
DataLoader用于接收一个Dataset类的对象并返回一个DataLoader类的对象,其中在该函数的调用中,我们往往需要指定batch_size(每次batch训练图片的个数),shuffle(是否打乱),num_workers(linux系统下工作个数)。
这个函数往往要与dataloader.py
脚本中的Dataset类一块结合使用。
DataLoader的函数原型如下:
torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=None,
sampler=None, batch_sampler=None, num_workers=0,
collate_fn=None, pin_memory=False, drop_last=False, timeout=0, worker_init_fn=None, multiprocessing_context=None, generator=None, *, prefetch_factor=2, persistent_workers=False, pin_memory_device='')
torch.nn.Module.to()函数
我们主要用该函数来指定训练时候采用CPU还是GPU,往往要在前面增加对设备的判断,然后再指定设备。
# 用问号语法判断设备是否有cuda,若无则用cpu
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# 指定训练设备
net.to(device)
这个函数的函数原型如下:
to(*args, **kwargs)
定义损失函数,指定优化器,查看网络参数
# --- 指定损失函数
loss_function = nn.CrossEntropyLoss()
## 使用
# loss = loss_function(outputs, labels.to(device))
# loss.backward()
# --- 指定优化器
optimizer = optim.Adam(net.parameters(), lr=0.0002)
## 使用
# optimizer.zero_grad()
# optimizer.step()
# 查看网络参数
net_paras = list(net.parameters())
epoch循环用于训练模型
net.train()
指的是训练阶段,并不会使dropout失活,net.eval()
指的是接下来进入验证阶段,会使dropout失活。
save_path = project_path + "/AlexNet.pth"
best_acc = 0.0
for epoch in range(10):
# train
net.train()
running_loss = 0.0
t1 = time.perf_counter()
for step, data in enumerate(train_loader, start=0):
images, labels = data
optimizer.zero_grad()
outputs = net(images.to(device))
loss = loss_function(outputs, labels.to(device))
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
# print train process
rate = (step + 1) / len(train_loader)
a = "*" * int(rate * 50)
b = "*" * int((1 - rate) * 50)
print("\rtrain loss: {:^3.0f}%[{}->{}]{:.3f}".format(int(rate * 100), a, b, loss), end="")
print()
print(time.perf_counter() - t1)
# validate
net.eval()
acc = 0.0 # accumulate accurate number / epoch
with torch.no_grad():
for data_test in validate_loader:
test_images, test_labels = data_test
outputs = net(test_images.to(device))
predict_y = torch.max(outputs, dim = 1)[1]
acc += (predict_y == test_labels.to(device)).sum().item()
accurate_test = acc / val_num
if accurate_test > best_acc:
best_acc = accurate_test
torch.save(net.state_dict(), save_path)
print("[epoch %d] train_loss: %.3f test_accuracy: %.3f" %
(epoch + 1, running_loss / step, acc / val_num))
进行FP,BP算法的代码:
# 取出DataLoader中的inputs和labels后 ...
optimizer.zero_grad()
outputs = net(images.to(device))
loss = loss_function(outputs, labels.to(device))
loss.backward()
optimizer.step()
计时代码:
t1 = time.perf_counter()
# 训练模型代码
pass
print(time.perf_counter() - t1)
可视化训练进度代码:
# print train process
rate = (step + 1) / len(train_loader)
a = "*" * int(rate * 50)
b = "*" * int((1 - rate) * 50)
print("\rtrain loss: {:^3.0f}%[{}->{}]{:.3f}".format(int(rate * 100), a, b, loss), end="")
源码
# --- add path
import os, sys
from sklearn.utils import shuffle
project_path = os.path.dirname(__file__)
sys.path.append(project_path)
# ---
import torch
import torch.nn as nn
from torchvision import transforms, datasets, utils
import torch.optim as optim
from model import AlexNet
import os, json, time
import matplotlib.pyplot as plt
import numpy as np
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
data_transform = {
"train": transforms.Compose([
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
]),
"val": transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
}
# data_root = os.path.abspath(os.path.join(os.getcwd(), "../..")) # get data root path
# image_path = data_root + "/data_set/flower_data/" # flower data set path
train_dataset = datasets.ImageFolder(root=project_path + "/flower_data/train", transform=data_transform["train"])
train_num = len(train_dataset)
flower_list = train_dataset.class_to_idx
cla_dict = dict((val, key) for key, val in flower_list.items())
# write dict into json file
json_str = json.dumps(cla_dict, indent = 4)
with open(project_path + "/class_indices.json", "w") as json_file:
json_file.write(json_str)
batch_size = 32
train_loader = torch.utils.data.DataLoader(
train_dataset,
batch_size=batch_size,
shuffle = True,
num_workers = 0,
)
validate_dataset = datasets.ImageFolder(
root=project_path + "/flower_data/val",
transform=data_transform["val"],
)
val_num = len(validate_dataset)
validate_loader = torch.utils.data.DataLoader(
validate_dataset,
batch_size = batch_size,
shuffle = False,
num_workers = 0,
)
# # to see pics
# test_data_iter = iter(validate_loader)
# test_image, test_label = test_data_iter.next()
# def imshow(img):
# img = img/2 + 0.5 # unnormalize
# npimg = img.numpy()
# plt.imshow(np.transpose(npimg, (1, 2, 0)))
# plt.savefig(project_path+"/to_see_picture.jpg")
# plt.show()
# print(" ".join("%5s" % cla_dict[test_label[j].item()] for j in range(4)))
# imshow(utils.make_grid(test_image))
# train the net
net = AlexNet(num_classes=5, init_weights=True)
net.to(device)
loss_function = nn.CrossEntropyLoss()
# pata = list(net.parameters())
optimizer = optim.Adam(net.parameters(), lr = 0.0002)
save_path = project_path + "/AlexNet.pth"
best_acc = 0.0
for epoch in range(10):
# train
net.train()
running_loss = 0.0
t1 = time.perf_counter()
for step, data in enumerate(train_loader, start=0):
images, labels = data
optimizer.zero_grad()
outputs = net(images.to(device))
loss = loss_function(outputs, labels.to(device))
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
# print train process
rate = (step + 1) / len(train_loader)
a = "*" * int(rate * 50)
b = "*" * int((1 - rate) * 50)
print("\rtrain loss: {:^3.0f}%[{}->{}]{:.3f}".format(int(rate * 100), a, b, loss), end="")
print()
print(time.perf_counter() - t1)
# validate
net.eval()
acc = 0.0 # accumulate accurate number / epoch
with torch.no_grad():
for data_test in validate_loader:
test_images, test_labels = data_test
outputs = net(test_images.to(device))
predict_y = torch.max(outputs, dim = 1)[1]
acc += (predict_y == test_labels.to(device)).sum().item()
accurate_test = acc / val_num
if accurate_test > best_acc:
best_acc = accurate_test
torch.save(net.state_dict(), save_path)
print("[epoch %d] train_loss: %.3f test_accuracy: %.3f" %
(epoch + 1, running_loss / step, acc / val_num))
print("Finished Training")