手写数字识别
step1.数据获取
引入路径处理库pathlib引入数据路径,引入Requests从mnist官网下载数据
#%matplotlib inline
from pathlib import Path
import requests
DATA_PATH = Path("data")
PATH = DATA_PATH/"mnist" #创建一个data/mnist的路径
PATH.mkdir(parents=True,exist_ok=True) #若该路径的文件目录不存在,则被创建
URL = "http://yann.lecun.com/exdb/mnist/" #mnist的官方下载地址
FILENAME = "mnist.pkl.gz"
导入gzip库将下载的数据解压
这里pickle.load(file) #反序列化对象。将文件中的数据解析为一个Python对象。
import pickle
import gzip
with gzip.open((PATH / FILENAME).as_posix(),"rb") as f: #as_posix()确保路径分隔符为Unix样式‘/’
((x_train,y_train),(x_valid,y_valid),_) = pickle.load(f,encoding="latin-1") #“latin-1”单字节编码,向下兼容ASCII
我们打印数据大小和第一个训练数据看一下
# 每一幅图片都是 28 × 28 像素的大小,在压缩文件中,它被扁平化保存为一个长度为784的向量. 为了显示该图像,需要首先将之转换为2d数据.
import numpy as np
import matplotlib.pyplot as plt
plt.imshow(x_train[0].reshape((28,28)),cmap="gray")
plt.show()
print(x_train.shape)
step2 ,数据准备
使用map函数把训练集和测试集全部映射成tensor()格式
import torch
x_train,y_train,x_valid,y_valid = map(torch.tensor,(x_train,y_train,x_valid,y_valid))
n,c = x_train.shape
print(x_train,y_train)
print(x_train.shape)
print(y_train.min(),y_train.max())
print(x_valid,y_valid)
print(x_valid.shape)
打印看一下
step3.构建网络模型
(使用torch.nn模块)
import torch.nn.functional as F
#构建网络模型
from torch import nn
class Mnist_NN(nn.Module): #继承nn
def __init__(self):
super().__init__()
self.hidden1 = nn.Linear(784, 128)
self.hidden2 = nn.Linear(128, 256)
self.out = nn.Linear(256, 10)
def forward(self, x):
x = F.relu(self.hidden1(x))
x = F.relu(self.hidden2(x))
x = self.out(x)
return x
net = Mnist_NN()
print(net) #打印网络模型
torch里的nn.module模块已经帮我们初始化好了每一层的参数
#打印定义好名字里的权重和偏置项
for name,parameter in net.named_parameters(): #named_parameters()继承自nn.module模块
print(name,parameter,parameter.size())
对训练集和测试集分别做一个封装,训练集每次取64个,测试集每次取128个
#使用TensorDatase和DataLoader来简化
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
train_ds = TensorDataset(x_train,y_train)
valid_ds = TensorDataset(x_valid,y_valid)
def get_data(train_ds,valid_ds,bs): #按batch取数据
return(
DataLoader(train_ds,batch_size=bs,shuffle=True), #训练集需要重新洗牌
DataLoader(valid_ds,batch_size=bs*2),
)
step4 训练方法
#训练方法
weights = torch.randn([784,10],dtype = torch.float,requires_grad = True)
bias = torch.zeros(10,requires_grad = True)
loss_func = F.cross_entropy # cross_entropy交叉熵损失函数
def model(Xb):
return Xb.mm(weights) + bias
def loss_batch(model,loss_func, xb, yb, opt = None): #计算损失
loss = loss_func(model(xb), yb)
if opt is not None:
loss.backward()
opt.step() #优化器更新
opt.zero_grad() # 把模型中参数的梯度设为0
#print(loss.item(), len(xb))
return loss.item(), len(xb) # loss的数据类型是Variable,loss.item()直接获得所对应的python数据类型
#fit方法做训练和测试
def fit(steps, model, loss_func, opt, train_dl, valid_dl):#steps迭代次数
for step in range(steps):
model.train() #一般在训练模型时加上model.train(),这样会正常使用Batch Normalizatiuonhe Dropout (降低过拟合)
for xb, yb in train_dl:
loss_batch(model, loss_func, xb, yb, opt)
model.eval() ##测试模型时加上model.eval(),这样不会使用Batch Normalizatiuonhe Dropout
with torch.no_grad():#测试集不必执行梯度下降
losses, nums = zip(
*[loss_batch(model, loss_func, xb, yb) for xb, yb in valid_dl]
)
val_loss = np.sum(np.multiply(losses, nums)) / np.sum(nums) #计算验证集损失
print('当前step:' + str(step), '验证集损失:' + str(val_loss))#打印验证集损失
step5.定义模型,分批传入数据训练网络并测试
bs = 64 #mini-batch
from torch import optim
def get_model():
model = Mnist_NN()
return model,optim.SGD(model.parameters(),lr=0.001)
train_dl,valid_dl = get_data(train_ds,valid_ds,bs)#1.拿到数据
model,opt = get_model()#2.拿到模型和优化器
fit(25,model,loss_func,opt,train_dl,valid_dl)#执行
执行结果:
损失明显降低