PyTorch实战精讲-CSDN博客

本文链接：https://blog.csdn.net/lantuxin/article/details/87709344

1.pytorch环境配置(docker)

docker环境--配置过程(配置成功--能使用nvidia-docker命令)
拉取pytorch镜像--参考网站(命令--docker pull nvcr.io/nvidia/pytorch:19.01-py3)
启动docker容器--参考网站(命令--sudo nvidia-docker run -it --rm -v local_dir:container_dir nvcr.io/nvidia/pytorch:<xx.xx>-py3)

2. pytorch中tensor的基本操作

参考网站：https://pytorch.org/tutorials/beginner/blitz/tensor_tutorial.html#sphx-glr-beginner-blitz-tensor-tutorial-py

2.1 pytorch测试

# coding=utf-8
# 这是一个用于练习的文档
from __future__ import print_function
import torch
x = torch.rand(5,3)
print("x={}".format(x)) #5行3列随机数
print(torch.empty(5,3))
print(torch.zeros(5,3,dtype=torch.long))
print(torch.tensor([5.5,3]))

x=x.new_ones(5,3,dtype=torch.double)
print("x={}".format(x))
x=torch.rand_like(x,dtype=torch.float) #Returns a tensor with the same size as input that is filled with random numbers from a normal distribution with mean 0 and variance 1
print("x={}".format(x))
print(x.dtype)
print(x.size())

2.2 tensor加法

# tensor加法(pytorch)
y=(torch.rand(5,3))
print("y={}".format(y))
print("x+y={}".format(x+y)) #相加就是对应位置值相加，也可以用torch.add(x,y)
print("torch.add(x,y)={}".format(torch.add(x,y)))

result=torch.empty(5,3)
torch.add(x,y,out=result)
print("result={}".format(result)) #带输出的加法,result必须是与x,y相同的类型

2.3 in-place方法

# in-place方法,即不添加多余变量，直接内部放置
y.add_(x)
print("y={}".format(y)) # y=y+x
# pytorch都可以通过添加_实现in-place操作,如x.copy_(y),x.t_()
print("x={}".format(x))
x.copy_(y)
print("x={}".format(x))
x.t_()
print("x={}".format(x)) # x转置赋给x

# tensor resize/reshape(torch.view)
x = torch.rand(4,4)
print("x={}".format(x))
y = x.view(16)
print("y=x.view(16)={}".format(y))
print("x.view(-1,8)={}".format(x.view(-1,8))) #负数索引一般表示从右（最后一个元素为-1）往左数，这里的-1解释为:the size -1 is inferred from other dimensions
print("x.view(-1,4)={}".format(x.view(-1,4))) #进一步验证：-1表示从另一个维度推算，another_dim=8,-1表示2; another_dim=4,-1表示4
#print("x.view(-1,5)={}".format(x.view(-1,5)))#RuntimeError: shape '[-1, 5]' is invalid for input of size 16
print("x.view(2,8)={}".format(x.view(2,8)))
print("x.view(8,2)={}".format(x.view(8,2)))

2.4 tensor resize/reshape

# tensor resize/reshape(torch.view)
x = torch.rand(4,4)
print("x={}".format(x))
y = x.view(16)
print("y=x.view(16)={}".format(y))
print("x.view(-1,8)={}".format(x.view(-1,8))) #负数索引一般表示从右（最后一个元素为-1）往左数，这里的-1解释为:the size -1 is inferred from other dimensions
print("x.view(-1,4)={}".format(x.view(-1,4))) #进一步验证：-1表示从另一个维度推算，another_dim=8,-1表示2; another_dim=4,-1表示4
#print("x.view(-1,5)={}".format(x.view(-1,5)))#RuntimeError: shape '[-1, 5]' is invalid for input of size 16
print("x.view(2,8)={}".format(x.view(2,8)))
print("x.view(8,2)={}".format(x.view(8,2)))

2.5 获取tensor某一element的值

# 获取某一个元素的值
print("x[1][1]={}".format(x[1][1]))
print("x[1][1].item()={}".format(x[1][1].item()))

x=torch.randn(1)
print("x={}".format(x))
print("x.item()={}".format(x.item()))
print("x[0].item()={}".format(x[0].item()))

2.6 转换Torch tensor到numpy

# 转换Torch tensor到numpy
a = torch.ones(6)
print("a={},type is {}".format(a,type(a)))
b = a.numpy()
print("b=a.numpy()={},type is {}".format(b,type(b)))

# numpy中的加法:改变numpy的值，只需要改变Torch tensor对应的值即可
#b.add_(2)#AttributeError: 'numpy.ndarray' object has no attribute 'add_'
print("b+2={}".format(b+2))
a.add_(3)
print("a={},type is {}".format(a,type(a)))
print("b=a.numpy()={},type is {}".format(b,type(b)))

2.7 转换numpy到Torch tensor

# 转换numpy到Torch tensor
import numpy as np
a = np.ones(2)
print("a={},type is {}".format(a,type(a)))
b = torch.from_numpy(a)
print("b=torch.from_numpy(a)={},type is {}".format(b,type(b)))

# 改变numpy，会改变Torch tensor的值吗？经过验证，答案是“会”
np.add(a,1,out=a) # out --- A location into which the result is stored
print("a={},type is {}".format(a,type(a)))
print("b={},type is {}".format(b,type(b)))

2.8 tensor传入GPU

# pytorch cuda tensors
if torch.cuda.is_available():
    device = torch.device("cuda")
    y = torch.ones_like(x,device=device) #直接创建一个与x大小相同的tensor，放于GPU上
    print("\nx={},dtype is {}".format(x,x.dtype))
    print("y=torch.ones_like(x,device=device)={},dtype is {}".format(y,y.dtype))
    #z = x+y #RuntimeError: expected type torch.FloatTensor but got torch.cuda.FloatTensor一个在GPU上，一个在CPU上，无法运算
    x = x.to(device) # 将x传到GPU上
    print("x=x.to(device)={},dtype is {}".format(x,x.dtype))
    z = x + y
    print("z=x+y={},dtype is {}".format(z,z.dtype))

3.pytorch反向传播

3.1 requires_grad和grad_fn

# coding=utf-8
import torch
x = torch.ones(2,2,requires_grad=True)
print("x={}".format(x))
y = x + 3
print("y={},y.grad_fn={}".format(y,y.grad_fn)) #y.grad_fn=<AddBackward0 object at 0x7fd67a0f9be0>
z = x * y * 4
print("z={},z.grad_fn={}".format(z,z.grad_fn)) #z.grad_fn=<MulBackward0 object at 0x7fd67a0f9be0>

3.2 反向传播backward(标量对向量求导)

要实现反向传播，求偏导的自变量(tensor) 必须满足条件:a.requires_grad=True(以下述snippet为例)

# coding=utf-8
import torch

a = torch.randn(2,2)
a = (a*3/(a-1))
print("a.requires_grad is {}".format(a.requires_grad))  #The input flag defaults to False if not given默认Torch tensor是不需要梯度的，即a.requires_grad=False

#a.requires_grad_(True) #验证求偏导的自变量(tensor)的requires_grad=True,否则无法反向传播

b = a.sum()
print("b=a.sum()={},b.grad_fn is {}".format(b,b.grad_fn))
# 反向传播backward
#b.backward()  # out.backward() is equivalent to out.backward(torch.tensor(1.)) a.requires_grad=False 报错--RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

正确例子如下:

$c = \frac{1}{4}\sum a_{i}$ ，因此， $\frac{\partial c}{\partial a}=\begin{bmatrix} \frac{\partial c}{\partial a_{0}}& \frac{\partial c}{\partial a_{1}}\\ \frac{\partial c}{\partial a_{2}}& \frac{\partial c}{\partial a_{3}} \end{bmatrix}=\begin{bmatrix} 0.25& 0.25\\ 0.25& 0.25 \end{bmatrix}$

# coding=utf-8
import torch

a = torch.randn(2,2)
a = (a*3/(a-1))
print("a.requires_grad is {}".format(a.requires_grad))  #The input flag defaults to False if not given默认Torch tensor是不需要梯度的，即a.requires_grad=False

a.requires_grad_(True) #验证求偏导的自变量(tensor)的requires_grad=True,否则无法反向传播
print("a.requires_grad is {}".format(a.requires_grad)) #满足a.requires_grad=True
print("\na={}".format(a))

# 标量(saclar)对向量(vector)求偏导
c = a.mean()
print("c=a.mean()={}".format(c))
c.backward()  #实现了求平均值的函数的反向求导
print("a.grad={}".format(a.grad))  '''a.grad=tensor([[0.2500, 0.2500],[0.2500, 0.2500]])'''

b = a.sum()
b.backward()
print("a.grad={}".format(a.grad)) '''a.grad=tensor([[1.2500, 1.2500],[1.2500, 1.2500]])累加了！'''

如果多次使用backward()，则a.grad会被累加运算！

3.3 范数求解

# 范数求解
x = torch.randn(3,requires_grad=True)
y = x * 2
while y.data.norm()<10: #这里默认求解2-范数,也可以通过torch.norm(y,2)求解2-范数
    y = y * 2
print("\ny={},y.data={},y.data.norm()={},torch.norm(y,2)={}".format(y,y.data,y.data.norm(),torch.norm(y,2)))

3.4 反向传播backward(向量对向量求导)

理论公式推导可参考矩阵求导

$x=(x_{1},x_{2},x_{3}),y=(y_{1},y_{2},y_{3}),\frac{\partial y^{T}}{\partial x}=J^{T}=\bigl(\begin{smallmatrix} \frac{\partial y_{1}}{\partial x_{1}}& \frac{\partial y_{2}}{\partial x_{1}}& \frac{\partial y_{3}}{\partial x_{1}}\\ \frac{\partial y_{1}}{\partial x_{2}}& \frac{\partial y_{2}}{\partial x_{2}}&\frac{\partial y_{3}}{\partial x_{2}} \\ \frac{\partial y_{1}}{\partial x_{3}}& \frac{\partial y_{2}}{\partial x_{3}}& \frac{\partial y_{3}}{\partial x_{3}} \end{smallmatrix}\bigr)$

'''
# 向量(vector)对向量(vector)求偏导，backward(),数学上1*3的vector对1*3的vector求导会得到一个3*3的矩阵(vector-Jacobian product)，但这里需要加一个向量v
# 可参考https://pytorch.org/tutorials/beginner/blitz/autograd_tutorial.html
'''
v = torch.tensor([1.2,0.4,0.003],dtype=torch.float)
#y.backward() #这是计算标量倒数的方法，报错--RuntimeError: grad can be implicitly created only for scalar outputs
y.backward(v) #传入一个1*3的tensor相当于vector-Jacobian的转置*v的转置
print("x.grad={}".format(x.grad))

上述程序得到y=x*8，求导所得 $\frac{\partial y^{T}}{\partial x}=J^{T}=\bigl(\begin{smallmatrix} 8& 0& 0\\ 0& 8&0\\ 0& 0& 8 \end{smallmatrix}\bigr)$

x=tensor([ 1.3471, -0.0893, -0.7166], requires_grad=True)

y=tensor([10.7771, -0.7143, -5.7330], grad_fn=<MulBackward0>)

x.grad=tensor([9.6000, 3.2000, 0.0240])

3.5 停止自动求导

# 停止自动求导运算
print("(x**2).requires_grad={}".format((x ** 2).requires_grad)) # True
with torch.no_grad():
    print("(x**2).requires_grad={}".format((x ** 2).requires_grad)) # False
print("(x**2).requires_grad={}".format((x ** 2).requires_grad)) # True

4.神经网络

公式推导可以参考神经网络

4.1 基本要求

A typical training procedure for a neural network is as follows:

Define the neural network that has some learnable parameters (or weights) 定义神经网络(拥有一些可学习的参数)
Iterate over a dataset of inputs 在输入数据集上进行迭代
Process input through the network 通过网络处理输入
Compute the loss (how far is the output from being correct) 计算损失
Propagate gradients back into the network’s parameters 反向传播梯度给网络参数
Update the weights of the network, typically using a simple update rule: weight = weight -learning_rate * gradient 更新网络权重

4.2 实现步骤

定义神经网络：

# coding=utf-8
'''
卷积、全连接在torch.nn，池化在torch.nn.functional
'''
import torch
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super(Net,self).__init__()
        self.conv1 = nn.Conv2d(1,3,2) # in_channels=1,out_channels=3,kernel_size=2*2,后面默认stride=1,padding=0,dilation=1,group=1,bias=True
        self.conv2 = nn.Conv2d(3,3,2)
        # 全连接层
        self.fc1 = nn.Linear(3*7*7,5) #3通道，conv2计算1*32*32的矩阵后得到7*7大小的feature map，即上层输出3*7*7个数，有3*7*7个神经元
        self.fc2 = nn.Linear(5,4)
        self.fc3 = nn.Linear(4,3)
        '''
        以下为官网设置
        '''
        #self.conv1 = nn.Conv2d(1,6,5) # in_channels=1,out_channels=3,kernel_size=2*2,后面默认stride=1,padding=0,dilation=1,group=1,bias=True
        #self.conv2 = nn.Conv2d(6,16,5)
        #self.fc1 = nn.Linear(16*5*5,120) #16通道，5*5大小的feature map，即上层有16*5*5个神经元
        #self.fc2 = nn.Linear(120,84)
        #self.fc3 = nn.Linear(84,3)

    def forward(self,x):
        #import pdb;pdb.set_trace()
        x = F.max_pool2d(F.relu(self.conv1(x)),(2,2)) # 1.卷积；2.relu；3.最大池化(2*2)
        x = F.max_pool2d(F.relu(self.conv2(x)),2) # 设置为2，其实就是(2,2)，# If the size is a square you can only specify a single number
        x = x.view(-1,self.num_flat_features(x)) # reshape/resize,这里就是平铺x成1维
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        #import pdb;pdb.set_trace()
        x = self.fc3(x) # 这里就是把全连接层前面的所有神经元（不管2维还是多维）平铺成1维
    def num_flat_features(self,x):
        size = x.size()[1:] # 除了batch维度外的所有维度
        num_features = 1
        for s in size:
            num_features *= s
        return num_features
        return x

查看网络：

net = Net() # 网络实例化
print("net:{}".format(net)) # 查看网络构成
param = list(net.parameters())
print("length:{}\nparam[0].size()={}\nparam[1].size()={}\nparam[2].size()={}\nparam[3].size()={}\nparam[4].size()={}\nparam[5].size()={}\n".format(len(param),param[0].size(),param[1].size(),param[2].size(),param[3].size(),param[4].size(),param[5].size())) # 这里的length:10，因为每一个tensor都有weight和bias

数据输入网络：

'''
# input--The entire torch.nn package only supports inputs that are a mini-batch of samples, and not a single sample
# 就是需要实现对齐，举个例子:a single sample 可能就是3*32*32(nChannels*Height*Width)，a mini-batch of samples就是1*3*32*32(nSamples*nChannels*Height*Width))
# 如果输入是a single sample，则需要转换为a mini-batch of samples(虚构一个nSamples的维度)
'''
input = torch.randn(1,1,32,32)
print("input=torch.randn(1,1,32,32)={}".format(input))
out = net(input) # 自动执行forward函数
print("out=net(input)={},size={}".format(out,out.size()))

计算损失：

net.zero_grad() # Zero the gradient buffers of all parameters将所有梯度设置为0,Sets gradients of all model parameters to zero.

#out.backward(torch.randn(1,3),retain_graph=True)

target = torch.tensor([0.8,0.1,0.1],dtype=torch.float)
print("target变换前:{},size={}".format(target,target.size()))
target = target.view(1,-1) # 需要将target转换成与out相同维度
print("target变换后={},size={}".format(target,target.size()))

# loss function(mean-squared error)使用类nn.MSELoss
mse_loss = nn.MSELoss()    #1/3*((y0-t0)^2+(y1-t1)^2+(y2-t2)^2),其中y为fc3层计算输出，t为目标标签
#loss = mse_loss(target,out) #loss.grad_fn:<MeanBackward0 object at 0x7fa3aab3ee10>这里必须是out,target的顺序
loss = mse_loss(out,target) #loss.grad_fn:<MseLossBackward object at 0x7f89413e0780>这里必须是out,target的顺序
print("loss={}\nloss.grad_fn:{}\nloss.grad_fn.next_functions[0][0]={}\nloss.grad_fn.next_functions[0][0].next_functions[0][0]={}".format(loss,loss.grad_fn,loss.grad_fn.next_functions[0][0],loss.grad_fn.next_functions[0][0].next_functions[0][0]))

反向传播求梯度：

# backprop
print("\nbefore zero_grad---net.conv1.bias.grad={}".format(net.conv1.bias.grad))

net.zero_grad()
print("\nbefore backprop---net.conv1.bias.grad={}".format(net.conv1.bias.grad))
'''
# pytorch构建的一个graph中，只能进行一次backward,如果上述过程已经使用过一次，则会报错:RuntimeError: Trying to backward through the graph a second time, but the buffers have already been freed. Specify retain_graph=True when calling backward the first time.
'''
loss.backward() # 如果前面使用过backward,则前面的backward添加retain_graph=True,即可
print("\nafter backprop---net.conv1.bias.grad={}".format(net.conv1.bias.grad))

两种迭代方式(更新weight和bias)：

# a simple implementing method 试一下多次迭代！！！！Method 1
learning_rate = 0.01
iter_count = 0
while loss>0.00001:
    net.zero_grad()
    output = net(input)
    loss = mse_loss(output,target)
    loss.backward()
    for f in net.parameters():
        f.data.sub_(f.grad.data * learning_rate)
    iter_count = iter_count + 1
    if iter_count%30 == 0:
        print("第{}次迭代,loss:{}".format(iter_count,loss))

'''
# torch.optim优化,试一下多次迭代！！！！             Method 2
iter_count = 0
import torch.optim as optim
while loss>0.000000001:
    opt = optim.SGD(net.parameters(),lr=0.01)
    opt.zero_grad() # 每一次迭代都需要将梯度缓存改为0，否则会导致梯度叠加问题
    output = net(input)
    loss = mse_loss(output,target)
    loss.backward()
    opt.step()
    iter_count = iter_count + 1
    if iter_count%30 == 0:
        print("第{}次迭代,loss:{}".format(iter_count,loss))
'''

5.分类网络(CIFAR10)

5.1 基本步骤

Load and normalizing the CIFAR10 training and test datasets using torchvision 下载并载入cifar10的数据
Define a Convolutional Neural Network 定义分类网络
Define a loss function 定义损失函数
Train the network on the training data 训练数据
Test the network on the test data 测试

5.2 实现方法

下载cifar10数据：

# coding=utf-8
import torch
import torchvision
import torchvision.transforms as transforms

transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))]) 
'''
#Compose组合tensor到一起，transforms.ToTensor()--转换一个PIL图像到tensor，Convert a PIL Image or numpy.ndarray to tensor;
#transforms.Normalize(mean,std)--规范化一个tensor图像,input[channel] = (input[channel] - mean[channel]) / std[channel]
'''
traindatasets = torchvision.datasets.CIFAR10(root="./data",train=True,download=False,transform=transform) 
#从root目录读取，download=True则先下载再读取；train=True则表示读取train数据集，否则读取test数据集；按照transform设定的方式读取返回至traindatasets
trainloader = torch.utils.data.DataLoader(traindatasets,batch_size=4,shuffle=True,num_workers=2) #num_workers--用于数据载入的subprocesses数量

testdatasets = torchvision.datasets.CIFAR10(root="./data",train=False,download=False,transform=transform)
testLoader = torch.utils.data.DataLoader(testdatasets,batch_size=4,shuffle=True,num_workers=2)

classes = ("plane","car","bird","cat","deer","dog","frog","horse","ship","truck")

import matplotlib.pyplot as plt
import numpy as np
def imshow(img):
    img = img/2 +0.5 #载入的图片input[channel] = (input[channel] - mean[channel]) / std[channel],所以这里是input[channel]=std[channel]*input[channel]+mean[channel]
    np_img = img.numpy() #转换为numpy格式
    plt.imshow(np.transpose(np_img,(1,2,0)))
    plt.savefig("1.jpg")
#dataiter = iter(trainloader)
#images,labels = dataiter.next()
#
##imshow(torchvision.utils.make_grid(images)) # make a grid of images图像网格，images是一个tensor，所以imshow函数里面需要转换为numpy格式的
#print(' '.join("%5s"% classes[labels[i]] for i in range(4)))

定义分类网络：

traindatasets = torchvision.datasets.CIFAR10(root="./data",train=True,download=False,transform=transform) 
#从root目录读取，download=True则先下载再读取；train=True则表示读取train数据集，否则读取test数据集；按照transform设定的方式读取返回至traindatasets
trainloader = torch.utils.data.DataLoader(traindatasets,batch_size=1,shuffle=True,num_workers=2) #num_workers--用于数据载入的subprocesses数量

testdatasets = torchvision.datasets.CIFAR10(root="./data",train=False,download=False,transform=transform)
testloader = torch.utils.data.DataLoader(testdatasets,batch_size=4,shuffle=True,num_workers=2)

classes = ("plane","car","bird","cat","deer","dog","frog","horse","ship","truck")
import torch.nn as nn
import torch.nn.functional as F

class ClassifyNet(nn.Module):
    def __init__(self):
        super(ClassifyNet,self).__init__()
        self.conv1 = nn.Conv2d(3,6,3)  #input channels=3   output channels = 5   kernel=3*3
        self.conv2 = nn.Conv2d(6,10,3)
        self.pool = nn.MaxPool2d(2,2)
        self.fc1 = nn.Linear(1960,120) #1960 = 1*10*14*14 = batch_size*channels*width*height
        self.fc2 = nn.Linear(120,60)  #torch.nn.Linear(in_features, out_features, bias=True)
        self.fc3 = nn.Linear(60,10)

    def forward(self,x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        #import pdb; pdb.set_trace()
        x = x.view(-1,1960)
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        return x

①这里重新载入数据集，采用batch_size=1，即训练过程保持单张图训练，速度慢。但学习阶段，需要慢慢搞懂每一步！

②fc1的定义需要计算图像计算到这一层的[batchsize,channels,height,width]，然后设置当前层的in_features，即输入神经元个数。

③前向传播forward计算到fc1时，需要平铺卷积高维torch tensors。

定义损失函数：

net = ClassifyNet() #实例化网络
print(net)

# define loss
loss_cross = nn.CrossEntropyLoss()
import torch.optim as optim
opt = optim.SGD(net.parameters(),lr=0.001)
running_loss = 0

训练分类网络(利用cifar10)：

# train step
for i,data in enumerate(trainloader,0): # 把trainloader对象组合为一个索引序列，所以下标从0开始,把trainloader中所有的训练数据训练一遍
    inputs,labels = data
    opt.zero_grad()
    outputs = net(inputs)
    #import pdb;pdb.set_trace()
    loss = loss_cross(outputs,labels)
    loss.backward()
    opt.step()
    running_loss += loss.item()
    if i%2000 == 1999:
        print("第{}次迭代,loss:{}".format(i+1,running_loss/2000))
    running_loss = 0

print("Finish Training")

训练图片50000张，迭代50000次：

……
第46000次迭代,loss:0.000776898443698883
第48000次迭代,loss:0.001786381721496582
第50000次迭代,loss:0.0005364646911621094

测试分类网络:

# test step
testdataiter = iter(testloader)
images,labels = testdataiter.next()
imshow(torchvision.utils.make_grid(images))
print("GT:",' '.join("%5s"% classes[labels[i]] for i in range(4)))

outputs = net(images) # testdataset设置的batchsize为4，则计算得到的outputs也有4个10维输出
_, predicts = torch.max(outputs,1) # Returns the maximum value of each row of the input tensor in the given dimension dim这里在维度序号为1的list中的最大值，即每一个1*10list中的最大值
print(predicts)
print("Predicts:",' '.join("%5s"% classes[predicts[i]] for i in range(4)))

这里仅仅测试了4张图，预测都是正确的。

GT: car bird frog dog

Predicts: car bird frog dog

在测试集上测试：(正确率0.4979)

# test on testdatasets
correct = 0
total = 0
with torch.no_grad(): # 不需要求解梯度
    for i,data in enumerate(testloader,0):
        images,labels = data
        outputs = net(images)
        _, predicts = torch.max(outputs,1)
        total += labels.size(0)
        correct = correct + (predicts == labels).sum().item() #predicts与labels相同则为1，求和即得正确预测的个数
print("The accuracy of classifyNet on {} test images:{}".format(total,correct/total))

在GPU上训练：

# train step
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("device:{}".format(device))
for i,data in enumerate(trainloader,0): # 把trainloader对象组合为一个索引序列，所以下标从0开始,把trainloader中所有的训练数据训练一遍
    net.to(device) #①将网络放到GPU上
    inputs,labels = data
    inputs,labels = inputs.to(device),labels.to(device) #②将数据放到GPU上
    opt.zero_grad()
    outputs = net(inputs)
    #import pdb;pdb.set_trace()
    loss = loss_cross(outputs,labels)
    loss.backward()
    opt.step()
    running_loss += loss.item()
    if i%2000 == 1999:
        print("第{}次迭代,loss:{}".format(i+1,running_loss/2000))
    running_loss = 0

print("Finish Training")

①将网络net放到GPU上；

②将需要训练的数据放到GPU上。

在GPU上训练所需时间：----real 4m8.954s----user 5m22.688s----sys 0m49.932s----

在CPU上训练所需时间：----real 2m15.048s----user 9m12.332s----sys 15m32.658s----

CPU更快！！！奇怪不奇怪！！！官网解释：Why dont I notice MASSIVE speedup compared to CPU? Because your network is realllly small.

6.分类网络(自定义数据)

6.1 数据构成

数据来源：ImageNet

数据类别：dog 和 cat

数据放置：train文件夹下放2个文件夹（cat 和 dog），每个文件夹分别放各自的图片。val文件夹做同样的操作。但是train和val中放置的图片一般不能有重复的图片。

6.2 数据读取

参考pytorch官网(github)给出的一个例子，数据会被很规范的读入，类似CIFAR10一样，train和val下面的文件夹名字自然会被分为0和1两类：

# Data loading code
    traindir = os.path.join(args.data, 'train')
    valdir = os.path.join(args.data, 'val')
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    train_dataset = datasets.ImageFolder(
        traindir,
        transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ]))

	CIFAR10	myDatasets
function	traindatasets=torchvision.datasets.CIFAR10()	mytraindatasets = torchvision.datasets.ImageFolder()
pytorch structure	Dataset CIFAR10 Number of datapoints: 50000 Split: train Root Location: ./data Transforms (if any): Compose( ToTensor() Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)) ) Target Transforms (if any): None	Dataset ImageFolder Number of datapoints: 1876 Root Location: ./data/mydatasets/datasets/train Transforms (if any): Compose( RandomResizedCrop(size=(224, 224), scale=(0.08, 1.0), ratio=(0.75, 1.3333), interpolation=PIL.Image.BILINEAR) RandomHorizontalFlip(p=0.5) ToTensor() Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ) Target Transforms (if any): None
DataLoader	torch.utils.data.DataLoader(mytraindatasets, batch_size=1, shuffle=True, num_workers=0) 官网说法：Combines a dataset and a sampler, and provides single- or multi-process iterators over the dataset.
获取其中一个数据traindatasets.__getitem__(0) 矩阵是图像，6是类别标签	(tensor([[[-0.5373, -0.6627, -0.6078, ..., 0.2392, 0.1922, 0.1608], [-0.8745, -1.0000, -0.8588, ..., -0.0353, -0.0667, -0.0431], [-0.8039, -0.8745, -0.6157, ..., -0.0745, -0.0588, -0.1451], ..., [-0.2471, -0.7333, -0.7961, ..., -0.4510, -0.9451, -0.8431], [-0.2471, -0.6706, -0.7647, ..., -0.2627, -0.7333, -0.7333], [-0.0902, -0.2627, -0.3176, ..., 0.0980, -0.3412, -0.4353]]]), 6)

7. pytorch网络可视化（docker下的tensorboard）

安装自然很简单：

pip install -i https://pypi.tuna.tsinghua.edu.cn/simple tensorflow tensorboard tensorboardX

TensorBoard运行在docker容器下：

因为TensorBoard默认运行端口在6006，如果在docker下直接运行，则使用浏览器访问TensorBoard时，无法访问docker容器下的TensorBoard服务器，只能访问主机的TensorBoard。因此需要把docker容器的6006端口映射到主机，进而访问主机的TensorBoard服务器时，间接访问docker容器的6006端口服务资源。(-p 6006:6006)

 sudo nvidia-docker run --rm -it -v /media/lab/873821cf-d234-44cf-bd63-4372eac823a1/pytorch/:/home/pytorch -p 6006:6006 pytorch:v0 bash

TensorBoard网络可视化构建(代码)：

# coding=utf-8
import torch
import torchvision
import torchvision.transforms as transforms
from visualization import visualize

import torch.nn as nn
import torch.nn.functional as F

class ClassifyNet(nn.Module):
    def __init__(self):
        super(ClassifyNet,self).__init__()
        self.conv1 = nn.Conv2d(3,4,3)  #input channels=3   output channels = 4   kernel=3*3
        self.pool = nn.MaxPool2d(2,2)
        self.fc1 = nn.Linear(49284,60) #1960 = 1*4*111*111 = batch_size*channels*width*height
        self.fc2 = nn.Linear(60,2)

    def forward(self,x):
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        #import pdb;pdb.set_trace()
        x = x.view(-1,49284)
        x = self.fc1(x)
        x = self.fc2(x)
        return x

net = ClassifyNet()
print(net)

'''
visualization
method:tensorbordX
'''

from tensorboardX import SummaryWriter
with SummaryWriter(comment="Net") as w:
    w.add_graph(net,(torch.rand(1,3,224,224),))

程序运行之后，当前程序所在目录下会生成一个runs目录

运行TensorBoard服务器资源：

tensorboard --logdir=runs/

局域网下的浏览器访问TensorBoard(主机地址+端口)：

host_addr:6006

8. pytorch使用AlexNet训练minst

8.1 代码

#coding:utf-8
'''#直接使用AlexNet是不行的，minst数据集图像大小是28*28，如果使用AlexNet的卷积核设置，最终会导致：Given input size: (192x2x2). Calculated output size: (192x0x0). Output size is too small
#原始AlexNet位置：https://github.com/pytorch/vision/blob/master/torchvision/models/alexnet.py
Start Training!
[1, 60000] loss:0.4914
[2, 60000] loss:0.0042
[3, 60000] loss:0.0063
[4, 60000] loss:0.0038
[5, 60000] loss:0.0172
Finished Traning
Accuracy of the network on the 10000 test images:98%'''
import torch
import torch.nn as nn
import torchvision
import torch.optim as optim
import torchvision.transforms as transforms
import torch.nn.functional as F
import time,pdb,random,os
try:
    from torch.hub import load_state_dict_from_url
except ImportError:
    from torch.utils import load_state_dict_from_url


__all__ = ['AlexNet', 'alexnet']


model_urls = {
    'alexnet': 'https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth',
}


class AlexNet(nn.Module):

    def __init__(self, num_classes=10):
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256*6*6, 1024),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(1024, 1024),
            nn.ReLU(inplace=True),
            nn.Linear(1024, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        #x = torch.flatten(x, 1)
        x = x.view(-1, 256*6*6)
        x = self.classifier(x)
        return x


def alexnet(pretrained=False, progress=True, **kwargs):
    r"""AlexNet model architecture from the
    `"One weird trick..." <https://arxiv.org/abs/1404.5997>`_ paper.
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    model = AlexNet(**kwargs)
    if pretrained:
        state_dict = load_state_dict_from_url(model_urls['alexnet'],
                                              progress=progress)
        model.load_state_dict(state_dict)
    return model

#截取图片中的指定区域或在指定区域添加某一图片
def crop_pad_image(src1):
    movedirection = [0,0,0,0]  #up down left right
    directionindex = random.randint(0,3)
    movedirection[directionindex] = 3
    channel,height,width = src1.shape
    src2 = src1[:,0+movedirection[0]:height-movedirection[1], 0+movedirection[2]:width-movedirection[3]]
    
    if directionindex==0:
        srcTemp = src1[:,0:movedirection[0],0:width]
        move_im = torch.cat((src2,srcTemp),1)
    if directionindex==1:
        srcTemp = src1[:,height-movedirection[1]:height,0:width]
        move_im = torch.cat((srcTemp,src2),1)
    if directionindex==2:
        srcTemp = src1[:,0:height,0:movedirection[2]]
        move_im = torch.cat((src2,srcTemp),2)
    if directionindex==3:
        srcTemp = src1[:,0:height,width-movedirection[3]:width]
        move_im = torch.cat((srcTemp,src2),2)
    #pdb.set_trace()
    return move_im

from torchvision.utils import save_image
def save_img(img, save_path,name_index):
    if os.path.exists(save_path)==False:
        os.makedirs(save_path)
    img = 0.5 * (img + 1)
    img = img.clamp(0, 1)
    img = img.view(-1, 1, 28, 28)
    save_image(img, os.path.join(save_path,str(name_index)+".jpg"))

if __name__=="__main__":
    #transform
    
    transforms= transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5,),(0.5,))]) 

    #trainset = torchvision.datasets.MNIST(root='./data',train=True,download=True,transform=transforms)
    #trainloader = torch.utils.data.DataLoader(trainset, batch_size=10,shuffle=True,num_workers=0)
    testset = torchvision.datasets.MNIST(root='./data',train=False,download=True,transform=transforms)
    testloader = torch.utils.data.DataLoader(testset,batch_size=10,shuffle=False,num_workers=0)
    net = AlexNet()
    #损失函数:这里用交叉熵
    criterion = nn.CrossEntropyLoss()   
    #优化器 这里用SGD
    optimizer = optim.SGD(net.parameters(),lr=1e-3, momentum=0.9)
    #device : GPU or CPU
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    net.to(device)
    #print("Start Training!")
    #num_epochs = 5 #训练次数
    #for epoch in range(num_epochs):
    #    running_loss = 0
    #    batch_size = 10
    #    for i, data in enumerate(trainloader):
    #        inputs, labels = data
    #        inputs, labels = inputs.to(device), labels.to(device)
    #
    #        outputs = net(inputs)
    #        #pdb.set_trace()
    #        loss = criterion(outputs, labels)
    #        optimizer.zero_grad()
    #        loss.backward()
    #        optimizer.step()
    #
    #    print('[%d, %5d] loss:%.4f'%(epoch+1, (i+1)*10, loss.item()))
    #
    #print("Finished Traning")
    #
    ##保存训练模型
    #torch.save(net, 'MNIST.pkl')
    net = torch.load('MNIST.pkl')
    #开始识别
    with torch.no_grad():
        #在接下来的代码中，所有Tensor的requires_grad都会被设置为False
        correct = 0
        total = 0
        tplist = list(0. for i in range(10)) #正类预测为正类
        fnlist = list(0. for i in range(10)) #正类预测为负类
        tp_fplist = list(0. for i in range(10)) #负类预测为正类
        tp_fnlist = list(0. for i in range(10)) #负类预测为负类
        totallist = list(0. for i in range(10))
     
        for data in testloader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            #这段代码用于minst数字随机上下左右移动3个像素点，证明CNN的平移不变性
            #testbatchsize=images.size()[0]
            #for i in range(testbatchsize):
            #    #保存查看minst数字随机上下左右移动3个像素点后的真实图片展示
            #    #save_img(images[i,:,:,:],"original_images",labels[i].item())
            #    temp = torch.empty(1,28,28)
            #    temp.copy_(images[i])
            #    images[i].copy_(crop_pad_image(images[i]))
            #    #保存查看minst数字随机上下左右移动3个像素点后的真实图片展示，经验证，的确移动了
            #    #save_img(images[i,:,:,:],"move_images",labels[i].item())
            #    #pdb.set_trace()
            out = net(images)
            _, predicted = torch.max(out.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            res = predicted == labels
            for label_id in range(len(labels)):
                label_single = labels[label_id] #当前类
                tplist[label_single] += res[label_id].item()
                fnlist[label_single] += (res[label_id]==0).item()
                for i in range(len(labels)):
                    tp_fplist[i] += (predicted[label_id].item()==i)
                    tp_fnlist[i] += (labels[label_id].item()==i)
                totallist[label_single] += 1
        #pdb.set_trace()
        print('Accuracy of the network on the 10000 test images:{}%'.format(100 * float(correct) / total)) #输出识别准确率
        mRecall = 0
        mPrecision = 0
        for i in range(len(tplist)):
            print("数字{}的召回率(查全率)recall:{:.3f},精确率(查准率)precision:{:.3f}".format(i,tplist[i]/tp_fnlist[i],tplist[i]/tp_fplist[i]))
            mRecall += tplist[i]/tp_fnlist[i]
            mPrecision += tplist[i]/tp_fplist[i]
        print("mRecall:{:.3f}".format(mRecall/10))
        print("mPrecision:{:.3f}".format(mPrecision/10))

所有测试集数据随机上、下、左、右移动3个像素

数据预处理Normalize

参考网站：https://blog.csdn.net/xys430381_1/article/details/85724668?utm_source=distribute.pc_relevant.none-task

(单通道)transforms= transforms.Compose([transforms.ToTensor(),transforms.Normalize(mean=(0.5,),std=(0.5,))])

(三通道)transforms= transforms.Compose([transforms.ToTensor(),transforms.Normalize(mean=(0.5,0.5,0.5),std=(0.5,0.5,0.5))])

表示数据集图片预处理过程中，将0-255的像素值通过“torchvision.transforms.ToTensor”转换到0-1之间，然后“transforms.Normalize(mean=(0.5,),std=(0.5,))”按照均值为0.5，方差为0.5的方式normalize归一化0-1数据至-1到1。计算方法举例：((0,1)-0.5)/0.5=(-1,1)

tensor数据保存为图像

参考网站：https://pytorch.org/docs/master/_modules/torchvision/utils.html#save_image

网站中提供了tensor数据(像素值-1到1)直接保存为可视图像，最重要的一点，使用ndarr = grid.mul(255).add_(0.5).clamp_(0, 255).permute(1, 2, 0).to('cpu', torch.uint8).numpy()将数据从-1至1转换为0-255，以及调整了维度顺序。通常tensor的各维度信息为[batchsize,dim,height,width]，而opencv或者pillow图像各维度为[height,width,dim]，所以使用opencv或pillow保存图像需要将tensor转换过来。

缩小图像

tensor.resize_()不能实现，这个函数只会截取原图像数据矩阵中一部分，因此目前我能找到的方法就是先将tensor转换为opencv可用的数据维度，包括调整-1至1为0-255，以及调整维度顺序，然后使用opencv中的可插值resize方法。以minst图像缩小一半为例：

def resize_image(src,transforms=None):
    channel,height,width = src.shape
    temp = torch.empty(1,28,28)
    temp.copy_(src)
    temp = temp.mul(255).add_(0.5).clamp_(0,255).permute(1, 2, 0).to('cpu', torch.uint8).numpy()
    tempCV = cv2.cvtColor(temp, cv2.COLOR_GRAY2BGR)
    tempCV = cv2.resize(tempCV, (14,14), interpolation = cv2.INTER_AREA)
    #cv2.imwrite("temp_np.jpg",tempCV)
    resizeTemp = transforms(tempCV)
    outTemp = -torch.ones(1,28,28)
    outTemp[0,7:21,7:21]=resizeTemp[0,:,:]
    #save_img(outTemp,[1,28,28],"resize_images","resize")
    #pdb.set_trace()
    return outTemp

在应用时，将8.1中的代码179行修改为images[i].copy_(resize_image(images[i],transforms))即可实现minst测试集图像缩小功能。压缩后测试结果如下：

Accuracy of the network on the 10000 test images:57.2%
数字0的召回率(查全率)recall:0.004,精确率(查准率)precision:1.000
数字1的召回率(查全率)recall:0.959,精确率(查准率)precision:0.639
数字2的召回率(查全率)recall:0.479,精确率(查准率)precision:0.653
数字3的召回率(查全率)recall:0.685,精确率(查准率)precision:0.504
数字4的召回率(查全率)recall:0.648,精确率(查准率)precision:0.747
数字5的召回率(查全率)recall:0.952,精确率(查准率)precision:0.357
数字6的召回率(查全率)recall:0.507,精确率(查准率)precision:0.452
数字7的召回率(查全率)recall:0.545,精确率(查准率)precision:0.930
数字8的召回率(查全率)recall:0.331,精确率(查准率)precision:0.782
数字9的召回率(查全率)recall:0.583,精确率(查准率)precision:0.696
mRecall:0.569
mPrecision:0.676

随机缩小为原图的0.5-0.7倍：

def resize_image(src,transforms=None):
    zoom = 0.1*random.randint(5,7)
    channel,height,width = src.shape
    temp = torch.empty(1,height,width)
    temp.copy_(src)
    temp = temp.mul(255).add_(0.5).clamp_(0,255).permute(1, 2, 0).to('cpu', torch.uint8).numpy(
)
    tempCV = cv2.cvtColor(temp, cv2.COLOR_GRAY2BGR)
    tempCV = cv2.resize(tempCV, (int(height*zoom),int(width*zoom)), interpolation = cv2.INTER_A
REA)
    #cv2.imwrite("temp_np.jpg",tempCV)
    resizeTemp = transforms(tempCV)
    outTemp = -torch.ones(1,height,width)
    outTemp[0,int(height*(1-zoom)*0.5):int(height*(1-zoom)*0.5)+int(height*zoom),int(width*(1-z
oom)*0.5):int(width*(1-zoom)*0.5)+int(width*zoom)]=resizeTemp[0,:,:]
    #save_img(outTemp,[1,28,28],"resize_images","resize")
    #pdb.set_trace()
    return outTemp