GETTING STARTED
Tensors
pytorch深度学习框架中的Tensor和numpy的操作类似,还可以在GPU上进行加速~
import torch
# Construct a 5x3 matrix, uninitialized
# 刚开始不明白这个未初始化是什么意思,明明赋值了啊,后来试验了多次
# 发现每次打印出来的值都不一样,有的非常大有的非常小,可能这里未初始化的意思
# 就是指没有规律的意思吧
x = torch.empty(5, 3)
print(x)
# 输出
tensor([[8.3665e+22, 4.5580e-41, 1.6025e-03],
[3.0763e-41, 0.0000e+00, 0.0000e+00],
[0.0000e+00, 0.0000e+00, 3.4438e-41],
[0.0000e+00, 4.8901e-36, 2.8026e-45],
[6.6121e+31, 0.0000e+00, 9.1084e-44]])
-------------------------------------------------------------------------------------
# 随机初始化
# 从[0,1)的均匀分布中采样
x = torch.rand(5, 3)
print(x)
# 输出
tensor([[0.1607, 0.0298, 0.7555],
[0.8887, 0.1625, 0.6643],
[0.7328, 0.5419, 0.6686],
[0.0793, 0.1133, 0.5956],
[0.3149, 0.9995, 0.6372]])
-------------------------------------------------------------------------------------
# 创建值全部为0的tensor,并定义数值类型
x = torch.zeros(5, 3, dtype=torch.long)
print(x)
# 输出
tensor([[0, 0, 0],
[0, 0, 0],
[0, 0, 0],
[0, 0, 0],
[0, 0, 0]])
-------------------------------------------------------------------------------------
# 指定data定义tensor
x = torch.tensor([5.5, 3])
print(x)
# 输出
tensor([5.5000, 3.0000])
-------------------------------------------------------------------------------------
# 基于已存在tensor来创建tensor(数值不一定相同,但size一定是相同的),这些方法可以重用输入tensor的性质,除非你重新指定,比如类型
x = x.new_ones(5, 3, dtype=torch.double) # new_* methods take in sizes
print(x)
x = torch.randn_like(x, dtype=torch.float) # override dtype! 被新的类型覆盖
print(x) # result has the same size
# 输出
tensor([[1., 1., 1.],
[1., 1., 1.],
[1., 1., 1.],
[1., 1., 1.],
[1., 1., 1.]], dtype=torch.float64)
tensor([[-0.2217, -0.9135, -0.6010],
[-0.3193, -0.3675, 0.1951],
[ 0.0646, -0.4947, 1.0374],
[-0.4154, -1.0247, -1.2872],
[ 0.5228, 0.3420, 0.0219]])
-------------------------------------------------------------------------------------
# 获取size
print(x.size())
s = x.size()
print(s)
print(s[0]) # torch.Size实际上是一个元组,可以执行元组的所有操作
# 输出
torch.Size([5, 3])
5
Operations
这里介绍一下tensor的操作
# 以加法举例
x = torch.ones(5, 3)
y = torch.rand(5, 3)
print(x + y)
print(torch.add(x, y))
result = torch.empty(5, 3)
torch.add(x, y, out=result) # 将输出结果赋给result
print(result)
#in-place
# adds x to y
# 其实y.add(x)也是可以相加的,但“_”的标识符意味着y的值会被改变
y.add_(x)
print(y)
#以上所有打印的输出都是一样的
tensor([[ 0.2349, -0.0427, -0.5053],
[ 0.6455, 0.1199, 0.4239],
[ 0.1279, 0.1105, 1.4637],
[ 0.4259, -0.0763, -0.9671],
[ 0.6856, 0.5047, 0.4250]])
-------------------------------------------------------------------------------
# torch.view是用来resize/reshape tensor
x = torch.randn(4, 4)
y = x.view(16)
z = x.view(-1, 8) # the size -1 is inferred from other dimensions
print(x.size(), y.size(), z.size())
# 输出
torch.Size([4, 4]) torch.Size([16]) torch.Size([2, 8])
# 如果tensor仅有一个元素,使用.item()可以把值取出来
x = torch.randn(1)
print(x)
print(x.item())
# 输出
tensor([1.9218])
1.9218417406082153
NUMPY和Tenosr互转
Torch Tensor 和 NumPy array会共享底层内存位置,所以改变其中一个另一个值也会改变,举例:
import torch
import numpy as np
a = torch.ones(5)
print(a)
b = a.numpy() # tenor---->numpy
print(b)
# 输出
tensor([1., 1., 1., 1., 1.])
[1. 1. 1. 1. 1.]
----------------------------------------------------------------------------------
# 改变tensor
a.add_(1)
print(a)
print(b) # array也会改变
# 输出
tensor([2., 2., 2., 2., 2.])
[2. 2. 2. 2. 2.]
----------------------------------------------------------------------------------
a = np.ones(5)
b = torch.from_numpy(a) # numpy------>tensor
np.add(a, 1, out=a)
print(a)
print(b) tensor也跟着改变
# 输出
[2. 2. 2. 2. 2.]
tensor([2., 2., 2., 2., 2.], dtype=torch.float64)
CUDA TENSORS
使用.to方法把tensor移送到cpu或者gpu上
x = torch.rand(5)
if torch.cuda.is_available():
device = torch.device("cuda") # a CUDA device object
y = torch.ones_like(x, device=device) # directly create a tensor on GPU
# or just use strings ``.to("cuda")``
# 相当于x.to("cuda")
x = x.to(device)
z = x + y
print(z)
# 将结果送到cpu上,还可以改变类型
print(z.to("cpu", torch.double))
# 输出
tensor([1.4118, 1.2739, 1.6951, 1.7109, 1.4069], device='cuda:0')
tensor([1.4118, 1.2739, 1.6951, 1.7109, 1.4069], dtype=torch.float64)
NEURAL NETWORKS
定义一个网络,例子:
import torch
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
# 1 input image channel, 6 output channels, 5x5 square convolution
# kernel
self.conv1 = nn.Conv2d(1, 6, 5)
self.conv2 = nn.Conv2d(6, 16, 5)
# an affine operation: y = Wx + b
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
# Max pooling over a (2, 2) window
x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
# If the size is a square you can only specify a single number
x = F.max_pool2d(F.relu(self.conv2(x)), 2)
x = x.view(-1, self.num_flat_features(x))
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
def num_flat_features(self, x):
size = x.size()[1:] # all dimensions except the batch dimension
num_features = 1
for s in size:
num_features *= s
return num_features
net = Net()
print(net)
params = list(net.parameters())
print(len(params)) # w-b-w-b~~~~~依次,五层,所以输出为10
print(params[0].size()) # conv1's .weight
print(params[1].size()) # 偏置
# 利用伪数据进行前向推断,维度依次为batchsize、图像通道、长宽
input = torch.randn(1, 1, 32, 32)
out = net(input)
print(out)
# 输出
# 直接就可以打印出每一层信息
Net(
(conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
(conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
(fc1): Linear(in_features=400, out_features=120, bias=True)
(fc2): Linear(in_features=120, out_features=84, bias=True)
(fc3): Linear(in_features=84, out_features=10, bias=True)
)
10
torch.Size([6, 1, 5, 5])
torch.Size([6])
tensor([[ 0.1246, -0.0511, 0.0235, 0.1766, -0.0359, -0.0334, 0.1161, 0.0534,
0.0282, -0.0202]], grad_fn=<ThAddmmBackward>)
LOSS FUNCTION
output = net(input)
target = torch.randn(10) # a dummy target, for example
target = target.view(1, -1) # make it the same shape as output
criterion = nn.MSELoss()
loss = criterion(output, target)
print(loss)
BACKPROP
通过调用loss.backward()来进行反向误差传播,但在这之前要进行梯度清零,因为计算的梯度会累积起来,所以不清零的话下次减去的就是累积的梯度了,这是不对的!,举例:
net.zero_grad() # 梯度置为0
print('BP之前的梯度')
print(net.conv1.bias.grad)
loss.backward()
print('BP之后的梯度')
print(net.conv1.bias.grad)
#输出
conv1.bias.grad before backward
tensor([0., 0., 0., 0., 0., 0.])
conv1.bias.grad after backward
tensor([ 0.0181, -0.0048, -0.0229, -0.0138, -0.0088, -0.0107])
UPDATE THE WEIGHTS
参数更新公式:weight = weight - learning_rate * gradient
这是最简单的更新形式,可以用简单的代码来实现:
learning_rate = 0.01
for f in net.parameters():
f.data.sub_(f.grad.data * learning_rate)
但这种写法封装程度不高,而且在实现Adam, RMSProp等的时候就很复杂了,还好pytorch提供了封装好的库,在torch.optim里面
import torch.optim as optim
# create your optimizer
optimizer = optim.SGD(net.parameters(), lr=0.01)
# in your training loop:
optimizer.zero_grad() # zero the gradient buffers
output = net(input)
loss = criterion(output, target)
loss.backward()
optimizer.step() # Does the update
TRAINING A CLASSIFIER
以 CIFAR10 dataset为例子,介绍了如何读取、显示、训练、测试等操作
1. Loading and normalizing CIFAR10
import torch
import torchvision
import torchvision.transforms as transforms
# 对图像进行规范化操作
transform = transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
trainset = torchvision.datasets.CIFAR10(root='E:/database/cifar-10', train=True,
download=False, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
shuffle=True, num_workers=2)
# testset = torchvision.datasets.CIFAR10(root='./data', train=False,
# download=True, transform=transform)
# testloader = torch.utils.data.DataLoader(testset, batch_size=4,
# shuffle=False, num_workers=2)
classes = ('plane', 'car', 'bird', 'cat',
'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
def imshow(img):
img = img / 2 + 0.5 # unnormalize
npimg = img.numpy()
plt.imshow(np.transpose(npimg, (1, 2, 0)))
plt.show()
if __name__ == '__main__':
# get some random training images
dataiter = iter(trainloader)
images, labels = dataiter.next()
# show images
imshow(torchvision.utils.make_grid(images))
# print labels
print(' '.join('%5s' % classes[labels[j]] for j in range(4)))
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
# 定义网络
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, 16 * 5 * 5)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
net = Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
# train~~
for epoch in range(1): # 1周期
running_loss = 0.0
for i, data in enumerate(trainloader, 0):
# get the inputs
inputs, labels = data
# 梯度清零
optimizer.zero_grad()
# forward + backward + optimize
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
# verbose
running_loss += loss.item()
if i % 2000 == 1999: # print every 2000 mini-batches
print('[%d, %5d] loss: %.3f' %
(epoch + 1, i + 1, running_loss / 2000))
running_loss = 0.0
print('Finished Training')
# 输出
[1, 2000] loss: 2.191
[1, 4000] loss: 1.868
[1, 6000] loss: 1.684
[1, 8000] loss: 1.588
[1, 10000] loss: 1.524
[1, 12000] loss: 1.487
Finished Training
# test
correct = 0
total = 0
# 因为是测试,所以不需要计算梯度
with torch.no_grad():
for data in testloader:
images, labels = data
outputs = net(images)
# 返回最大的数值和idx,这里我们只需要索引也就是predicted
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('Accuracy of the network on the 10000 test images: %d %%' % (
100 * correct / total))
# 输出
Accuracy of the network on the 10000 test images: 47 %
# 每个类别的正确率
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
with torch.no_grad():
for data in testloader:
images, labels = data
outputs = net(images)
_, predicted = torch.max(outputs, 1)
# 这里squeeze()是取出维度是1的维度,但我试了两个一维tensor(labels也是一维的)
# 结果是加上squeeze()和不加没区别,这里不知道是什么意思
c = (predicted == labels).squeeze()
for i in range(4):
label = labels[i]
class_correct[label] += c[i].item()
class_total[label] += 1
for i in range(10):
print('Accuracy of %5s : %2d %%' % (
classes[i], 100 * class_correct[i] / class_total[i]))
# 输出
Accuracy of plane : 43 %
Accuracy of car : 48 %
Accuracy of bird : 27 %
Accuracy of cat : 5 %
Accuracy of deer : 24 %
Accuracy of dog : 44 %
Accuracy of frog : 73 %
Accuracy of horse : 67 %
Accuracy of ship : 78 %
Accuracy of truck : 57 %
TRAINING ON GPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# 将网络送入gpu
net.to(device)
# 输入和label也必须送入gpu
inputs, labels = inputs.to(device), labels.to(device)
以上代码都运行了,文档说,没有感觉到gpu的加速效果是因为网络太小了,这是必然的;我运行的时候发现,不是加速不明显的问题了,而是比cpu也慢很多,好吧,也难怪,毕竟不停的将数据送入显存,然后把结果送回cpu输出,是会很耗费开销!!!
以上初次了解Pytorch,确实和keras,TF不一样,用pytorch感觉就真的像是在写普通的python代码,而TF、keras真的是无时无刻不感受框架的存在和限制!·~~~~~
这些只是简介,更多的函数用法等还需要去查看教程文档~~
参考
https://pytorch.org/tutorials/beginner/deep_learning_60min_blitz.html
https://pytorch.org/tutorials/beginner/blitz/tensor_tutorial.html#numpy-bridge
https://pytorch.org/docs/stable/torch.html?highlight=torch%20rand#torch.rand