# 加载库
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.utils
from torchvision import datasets, transforms
from torch.utils.data import DataLoader # 用于加载数据集
import matplotlib.pylab as plt
import numpy as np
"""定义超参数"""
batch_size = 16
device = torch.device("cuda" if torch.cuda.is_available()
else "cpu") # 是否用GPU训练
epochs = 10
# 构建pipeline, 对图像预处理
pipeline = transforms.Compose([transforms.ToTensor(), # 将数据转换成tensor
transforms.Normalize((0.1307,), (0.3081))]) # transforms.Normalize用于正则化,可以降低模型复杂度,避免过拟合。参数分别为标准差和均值,均由官网给出默认值
# 下载数据集
# train_set = datasets.MNIST("MNIST_dataset", train=True, download=False, transforms=pipeline) # 下载训练集到指定文件夹
# test_set = datasets.MNIST("MNIST_dataset", train=False, download=False, transforms=pipeline) # 下载测试集到指定文件夹
# train_set = 'modules/MNIST_dataset/t10k-images.idx3-ubyte'
# test_set = 'modules/MNIST_dataset/t10k-images.idx3-ubyte'
"""数据集介绍"""
"""
t10k-images.idx3-ubyte:测试集图像(1648877 字节)10000张测试集;
t10k-labels.idx1-ubyte:测试集标签(4542 字节)测试集对应的标签
train-images.idx3-ubyte:训练集图像(9912422 字节)55000张训练集 + 5000张验证集;
train-labels.idx1-ubyte:训练集标签(28881 字节)训练集对应的标签;
"""
"""下载数据"""
# train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
train_data = datasets.MNIST(root="F:/pythonProject/machine_learning/modules/MNIST_dataset", # 自己的目录
train=True,
transform=transforms.ToTensor(),
download=False) # 如果没下载,就改成True
test_data = datasets.MNIST(root="F:/pythonProject/machine_learning/modules/MNIST_datasette",
train=False,
transform=transforms.ToTensor(),
download=False) # 如果没下载,就改成True
# 加载数据
train_loader = DataLoader(dataset=train_data,
batch_size=64,
shuffle=True)
test_loader = DataLoader(dataset=test_data,
batch_size=64,
shuffle=True)
# print(train_loader)
"""数据可视化"""
# for num, (image, label) in enumerate(train_data):
# image_batch = torchvision.utils.make_grid(image, padding=2) # 返回(C,H,W)数据(多张图拼凑成了一张图)
# plt.imshow(np.transpose(image_batch.numpy(), (1, 2, 0)), vmin=0, vmax=255) # transpose作用是将矩阵转置,其中0,1,2分别表示x, y, z轴的数据
# plt.show()
# print(label)
"""
make_grid参数解释:将多个图像组合成一个网格图。它接受一个图像列表作为输入,并返回一个组合好的网格图,可以使用PyTorch的transforms来进行变换。
tensor (Tensor or list) – 4D mini-batch Tensor of shape (B x C x H x W) or a list of images all of the same size.
nrow (int, optional) – 每一行显示的图像数. 最终图标尺寸为(B / nrow, nrow). 默认为8.
padding (int, optional) –填充. Default is 2.
normalize (bool, optional) – If True, 归一化图像到(0, 1)区间, by subtracting the minimum and dividing by the maximum pixel value.
range (tuple, optional) – tuple (min, max) where min and max are numbers, then these numbers are used to normalize the image. By default, min and max are computed from the tensor.
scale_each (bool, optional) – If True, scale each image in the batch of images separately rather than the (min, max) over all images.
pad_value (float, optional) – Value for the padded pixels.
"""
# with open('modules/MNIST_dataset/t10k-images.idx3-ubyte') as f:
# file = f.read() # 读取文件
# print(file)
# image1 = [int(str(item).encode('ascii'), 16) for item in file[16 : ]]
"""构建网络模型"""
class Digit(nn.Module):
def __init__(self):
super().__init__() # 继承父类方法
# 定义一些属性或方法
self.conv1= nn.Conv2d(1, 10, 5) # 1代表灰度图片通道, 20:输出通道, 5:卷积核大小
self.conv2 = nn.Conv2d(10, 20, 3)
self.fc1 = nn.Linear(20*10*10, 500)
self.fc2 = nn.Linear(500, 10)
# 定义前向传播
def forward(self, x):
input_size = x.size(0) # 维度为:batch_size × 1 × 28 ×28,编号分别为0,1,2,3,其中x.size(0)表示取batch_size的值,即矩阵的块
"""第一层卷积"""
x = self.conv1(x) # x为输入数据:batch_size × 1 × 28 ×28 输出:batch_size × 10 × 24 ×24(28-5+1)
x = F.relu(x) # 非线性激活
x = F.max_pool2d(x, 2, 2) # 最大池化层 池化核:2×2 输入:batch_size × 10 × 24 ×24 输出:batch_size × 10 × 12 × 12 尺寸减半,其余不变
"""第二层卷积"""
x = self.conv2(x) # 输入:batch_size × 10 × 12 × 12 输出:batch_size × 20 × 10 × 10
x = F.relu(x) # 输入:batch_size × 20 × 10 × 10 输出:batch_size × 20 × 10 × 10
x = x.view(input_size, -1) # 由于经过激活函数之后,数据仍然是特征图的形式,是一个多维矩阵,需要将其拉平为一维矩阵,即flatten,
# 这里的-1表示自动计算维度 输出:batch_size × 20 × 10 × 10
x = self.fc1(x) # 输入:batch_size × 20 × 10 × 10 输出:batch_size × 500
x = F.relu(x) # 激活shape保持不变
x = self.fc2(x) # 输入:batch_size × 500 输出:batch_size × 10
output = F.log_softmax(x, dim=1) # 计算每个数字的概率值
return output
"""定义优化器"""
# 创建一个模型实例
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = Digit().to(device) # to(device)表示将模型部署在GPU或者CPU上 详细参考:http://t.csdn.cn/Lnh2I
optimizer = optim.Adam(model.parameters())
"""定义训练方法"""
def train_model(model, device, epoch, train_loader, optimizer):
# 模型训练
model.train()
for batch_index, (data, label) in enumerate(train_loader):
# 将遍历到的数据部署到device上
data , label = data.to(device), label.to(device)
# 梯度初始化为0
optimizer.zero_grad() # 每一个epoch训练时都需要初始化为0
# 输出训练后的结果
output = model(data) # 输出为一个概率值矩阵
# 计算损失
loss = F.cross_entropy(output, label) # 使用交叉熵损失函数计算预测值与真实值的差距
# 找到概率值最大的下标(索引)
prediction = output.max(1, keepdim=True) # 调用max方法,1表示横轴,返回每一行最大值的下标
# 或者使用prediction = output.argmax(dim=1)
# 反向传播
loss.backward()
# 参数优化 (更新所有参数)
optimizer.step()
if batch_index % 3000 ==0: # 每3000个epoch打印一次Loss,一共60000张数据, %表示取余运算
print("Train Epoch:{} \t Loss: {:.6f}".format(epoch, loss.item())) # \t :表示空4个字符,会打印出20个epoch的Loss值,保留6位有效数字
# 不知道item()用法的,参考:http://t.csdn.cn/UPk4E
"""定义测试方法"""
def test_model(model, device, test_loader):
# 模型验证
model.eval() # 直接调用库里面自带的验证方法
# 统计正确率和测试损失值
correct = 0.0 # 先初始化正确率
test_loss = 0.0 # 先初始化测试损失
with torch.no_grad(): # 注意,测试环节不用计算梯度,也不会反向传播和参数更新
for data, label in test_loader:
data, label = data.to(device), label.to(device) # 部署到设备上
output = model(data) # 将测试数据放进模型中,得到预测概率
# 计算测试损失
test_loss += F.cross_entropy(output, label).item()
# 找到概率值最大的下标(索引)
pred = output.max(1, keepdim=True)[1] # 返回的是:[值,索引],这里需要索引,因此后面是[1]
# 累计正确值
correct += pred.eq(label.view_as(pred)).sum().item() # 官方文档是这么写的
# 平均损失值
test_loss /= len(test_loader.dataset)
print("Test--Average Loss: {:.4f}, Accuracy: {:.3f}\n".format(test_loss, 100.0 * correct / len(test_loader.dataset))) # 这里的loss和accuracy都是平均后的值
"""调用方法进行训练"""
PATH = './modules'
for epoch in range(1, epochs+1): # range函数返回一个range类型的整数序列,一般用在循环结构中。
train_model(model, device, epoch, train_loader, optimizer)
test_model(model, device, test_loader) # 运行测试
计算机视觉新手入门项目:手写数字识别,MNIST数据集,几乎每行代码都有注释
最新推荐文章于 2024-01-26 18:06:10 发布