图像分类数据集(FASHION-MNIST)
1.导入基础包
import torch
#主要用来构建计算机视觉模型
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from IPython import display
import time
import sys
# sys.path.append('..')#为了导入d21zh_pytorch
# import d21zh_pytorch as d21
2.下载训练数据集
mnist_train=torchvision.datasets.FashionMNIST(root='~/Datasets/FashionMNIST',train=True,download=True,
transform=transforms.ToTensor())
mnist_test=torchvision.datasets.FashionMNIST(root='~/Datasets/FashionMNIST',train=False,download=True,
transform=transforms.ToTensor())
# print(mnist_train[0][0],'\n标签:',mnist_train[0][1])
#查看数据集参数
print(type(mnist_train))
print(len(mnist_train),len(mnist_test))
运行结果:
<class 'torchvision.datasets.mnist.FashionMNIST'>
60000 10000
代码:
#我们可以通过下标来访问任意样本
feature,label=mnist_train[0]
print(feature.shape,label)
运行结果:
torch.Size([1, 28, 28]) 9
def use_svg_display():
"""用矢量图显示svg"""
display.set_matplotlib_formats('retina')
def get_fashion_mnist_labels(labels):
text_labels = ['t-shirt', 'trouser', 'pullover', 'dress',
'coat','sandal', 'shirt', 'sneaker', 'bag', 'ankle boot']
return [text_labels[i] for i in labels]
#在一行里面画出多张图像和对应标签的函数
def show_fashion_mnist(images,labels):
use_svg_display()
#'_'表示我们忽略,不使用的变量
_,figs=plt.subplots(1,len(images),figsize=(25,25))#
for f,img,lbl in zip(figs,images,labels):
f.imshow(img.view((28,28)).numpy())
f.set_title(lbl,color='white')
f.axes.get_xaxis().set_visible(False)
f.axes.get_yaxis().set_visible(False)
plt.show()
#看一下训练数据集中前9个样本的图像内容与分类
x,y=[],[]
for i in range(10):
x.append(mnist_train[i][0])
y.append(mnist_train[i][1])
show_fashion_mnist(x,get_fashion_mnist_labels(y))
运行结果:
3.读取小批量数据
batch_size=256
if sys.platform.startswith('win'):
num_workers=2#0表示不用额外进程来加速读取数据
else:
num_workers=4
train_iter=torch.utils.data.DataLoader(mnist_train,batch_size=batch_size,shuffle=True,
num_workers=num_workers)
test_iter=torch.utils.data.DataLoader(mnist_test,batch_size=batch_size,shuffle=False,
num_workers=num_workers)
start=time.time()
for x,y in train_iter:
continue
print('%.2f sec' % (time.time()-start))
运行结果:
4.77 sec