pytorch code learn
1pytorch 学习
在学习深度学习框架之前,个人认为了解各个框架的输入才是一切的开始。因此,今天讨论下pytorch的数据输入。各种数据之间的互换、常用代码块
1PIL
好多pytorch的测试代码都是用PIL读取图片而本人则比较喜欢opencv读取,那么先讨论PIL,cv2和torchvision.transforms的区别。
from PIL import Image
import numpy as np
image = Image.open('test.jpg')
print type(image) # 输出是PIL类型
print image.size
print image.mode # out: 'RGB'
print image.getpixel((0,0)) # out: (143, 198, 201)
# resize w*h
plt.figure(1)
plt.imshow(img)#可以正常的输出
plt.show()
image = image.resize((200,100),Image.NEAREST)
print image.size # out: (200,100)
image = np.array(image,dtype=np.float32) # 输出是numpy
print image.shape
plt.figure(1)
plt.imshow(img)#不能正常的输出
plt.show()
1.2 opencv
import cv2
import numpy as np
image = cv2.imread('test.jpg')
print type(image) # 输出是numpy数组
print image.dtype # 图片读入的类型都是uint8
print image.shape #
print image # BGR
'''
array([
[ [143, 198, 201 (dim=3)],[143, 198, 201],... (w=200)],
[ [143, 198, 201],[143, 198, 201],... ],
...(h=100)
], dtype=uint8)
'''
# w*h
image = cv2.resize(image,(100,200),interpolation=cv2.INTER_LINEAR)
print image.dtype # out: dtype('uint8')
print image.shape # out: (200, 100, 3)
plt.figure(1)
plt.imshow(img)#可以正常输出
plt.show()
1.3 torchvision.transforms.ToTensor()
关键看一下其中的toTensor函数
def to_tensor(pic):
"""Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor.
转变PIL和numpy数据,输入是pil数据或者numpy图片,输出是转变后的数据
"""
if not(_is_pil_image(pic) or _is_numpy_image(pic)):
raise TypeError('pic should be PIL Image or ndarray. Got {}'.format(type(pic)))
if isinstance(pic, np.ndarray):
img = torch.from_numpy(pic.transpose((2, 0, 1)))
if isinstance(img, torch.ByteTensor):
return img.float().div(255)
else:
return img
if accimage is not None and isinstance(pic, accimage.Image):
nppic = np.zeros([pic.channels, pic.height, pic.width], dtype=np.float32)
pic.copyto(nppic)
return torch.from_numpy(nppic)
# handle PIL Image
if pic.mode == 'I':
img = torch.from_numpy(np.array(pic, np.int32, copy=False))
elif pic.mode == 'I;16':
img = torch.from_numpy(np.array(pic, np.int16, copy=False))
elif pic.mode == 'F':
img = torch.from_numpy(np.array(pic, np.float32, copy=False))
elif pic.mode == '1':
img = 255 * torch.from_numpy(np.array(pic, np.uint8, copy=False))
else:
img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
# PIL image mode: L, P, I, F, RGB, YCbCr, RGBA, CMYK
if pic.mode == 'YCbCr':
nchannel = 3
elif pic.mode == 'I;16':
nchannel = 1
else:
nchannel = len(pic.mode)
img = img.view(pic.size[1], pic.size[0], nchannel)
# put it from HWC to CHW format
# yikes, this transpose takes 80% of the loading time/CPU
img = img.transpose(0, 1).transpose(0, 2).contiguous()
if isinstance(img, torch.ByteTensor):
return img.float().div(255)
else:
return img
总结
PIL图片转换成numpy后,格式为(h,w,c),像素顺序为RGB;
opencv在读取后就是numpy,格式为(h,w,c),像素顺序为BGR;
torchvison.transforms函数会自己判断图片是PIL格式还是numpy格式,并变成torch所需的格式。
PIL和cv2的互相转换代码如下:
import cv2
from PIL import Image
import numpy as np
def pil_cv2(img_path):
image = Image.open(img_path)
img = cv2.cvtColor(np.asarray(image),cv2.COLOR_RGB2BGR)
return img
import cv2
from PIL import Image
def cv2_pil(img_path):
image = cv2.imread(img_path)
image = Image.fromarray(cv2.cvtColor(image,cv2.COLOR_BGR2RGB))
return image
在做测试的时候代码如下,可以看出torch的像素顺序为RGB,这就是torch使用PIL多的原因大概。
def totorch(img_path,mode):
if mode=="PIL":
img = Image.open(img_path)
if img.mode == 'L':
img = img.convert('RGB')
img = np.array(img)
else:
img=cv2.imread(img_path)
height, width, _ = img.shape
max_im_shrink = np.sqrt(
1700 * 1200 / (img.shape[0] * img.shape[1]))
image = cv2.resize(img, None, None, fx=max_im_shrink,
fy=max_im_shrink, interpolation=cv2.INTER_LINEAR)
# image = cv2.resize(img, (640, 640))
x = to_chw_bgr(image)
x = x.astype('float32')
x -= cfg.img_mean
if mode=="PIL":
x = x[[2, 1, 0], :, :]
x = Variable(torch.from_numpy(x).unsqueeze(0))
if use_cuda:
x = x.cuda()
t1 = time.time()
y = net(x)
2torch保存模型
首先了解下什么是字典状态
在Pytorch中,torch.nn.Module 模型的可学习参数(即权重和偏差)包含在模型的 parameters 中,(使用model.parameters()可以进行访问)。 state_dict 仅仅是python字典对象,它将每一层映射到其参数张量。注意,只有具有可学习参数的层(如卷积层、线性层等)的模型才具有 state_dict 这一项。优化目标 torch.optim 也有 state_dict 属性,它包含有关优化器的状态信息,以及使用的超参数
2.1保存/加载 state_dict (推荐使用)
保存:
torch.save(model.state_dict(), PATH)
加载:
model = TheModelClass(*args, **kwargs)
model.load_state_dict(torch.load(PATH))
model.eval()
2.2保存/加载完整模型
保存:
torch.save(model, PATH)
加载
# Model class must be defined somewhere
model = torch.load(PATH)
model.eval()
2.3保存 和 加载 Checkpoint 用于推理/继续训练
保存:
torch.save({
'epoch': epoch,
'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'loss': loss,
...
}, PATH)
加载:
model = TheModelClass(*args, **kwargs)
optimizer = TheOptimizerClass(*args, **kwargs)
checkpoint = torch.load(PATH)
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
loss = checkpoint['loss']
model.eval()
# - or -
model.train()
2.4 保存多个模型
保存:
torch.save({
'modelA_state_dict': modelA.state_dict(),
'modelB_state_dict': modelB.state_dict(),
'optimizerA_state_dict': optimizerA.state_dict(),
'optimizerB_state_dict': optimizerB.state_dict(),
...
}, PATH)
加载:
modelA = TheModelAClass(*args, **kwargs)
modelB = TheModelBClass(*args, **kwargs)
optimizerA = TheOptimizerAClass(*args, **kwargs)
optimizerB = TheOptimizerBClass(*args, **kwargs)
checkpoint = torch.load(PATH)
modelA.load_state_dict(checkpoint['modelA_state_dict'])
modelB.load_state_dict(checkpoint['modelB_state_dict'])
optimizerA.load_state_dict(checkpoint['optimizerA_state_dict'])
optimizerB.load_state_dict(checkpoint['optimizerB_state_dict'])
modelA.eval()
modelB.eval()
# - or -
modelA.train()
modelB.train()
2.5保存多gpu模型
保存:
torch.save(model.module.state_dict(), PATH)
加载可以用任何方式加载,但要注意使用关键字 map_location
3 网络模型的构造
3.1 nn.Sequential()
对于cnn前馈神经网络如果前馈一次写一个forward函数会有些麻烦,在此就有两种简化方式,ModuleList和Sequential。建立nn.Sequential()对象,必须小心确保一个块的输出大小与下一个块的输入大小匹配。
第一种写法:
net1=nn.Sequential()
net1.add_module('conv', nn.Conv2d(3,3,3))
net1.add_module('batchnorm', nn.BatchNorm2d(3))
net1.add_module('activation_layer', nn.ReLU())
第二种写法:
net2 = nn.Sequential(
nn.Conv2d(3, 3, 3),
nn.BatchNorm2d(3),
nn.ReLU())
第三种写法:
from collections import OrderedDict
net3=nn.Sequential(OrderedDict([
("conv1",nn.Conv2d(3,3,3)),
("batch",nn.BatchNorm2d(3)),
("activat",nn.ReLU())
]))
打印出三种写法的网络输出:
print(net1)
'''Sequential(
(conv): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1))
(batchnorm): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(activation_layer): ReLU()
)'''
print(net2)
'''Sequential(
(0): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1))
(1): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU())'''
print(net3)
'''Sequential(
(conv1): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1))
(batch): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(activat): ReLU())'''
so,可根据名字或序号取出子module
x= torch.randn(3,3,320,320)
y=net1(x)
net1.conv, net2[0], net3.conv1
(Conv2d (3, 3, kernel_size=(3, 3), stride=(1, 1)),
Conv2d (3, 3, kernel_size=(3, 3), stride=(1, 1)),
Conv2d (3, 3, kernel_size=(3, 3), stride=(1, 1)))
3.2nn.ModuleList
vgg_cfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512, 'M',512, 512, 512, 'M']
def vgg(cfg, i, batch_norm=False):
layers = []
in_channels = i
for v in cfg:
if v == 'M':
layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
elif v == 'C':
layers += [nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)]
else:
conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
if batch_norm:
layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
else:
layers += [conv2d, nn.ReLU(inplace=True)]
in_channels = v
conv6 = nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6)
conv7 = nn.Conv2d(1024, 1024, kernel_size=1)
layers += [conv6,
nn.ReLU(inplace=True), conv7, nn.ReLU(inplace=True)]
return layers
vgg=vgg(vgg_cfg,3)
vgg=nn.ModuleList(vgg)
x= torch.randn(3,3,320,320)
for i in range(len(vgg)):
x=vgg[i](x)
3.3 torch.nn的某些函数
print(vgg)
ModuleList(
(0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): ReLU(inplace=True)
(2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
获取网络的层名和每层的结构
for name,module in vgg.named_children():
print("name is {}".format(name))
print(module)
输出
name is 0
Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
name is 1
ReLU(inplace=True)
name is 2
Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
named_modules()和named_children()区别在于一个返回的是子模块的迭代器,另一个返回的是所有模块的迭代器。
import torch
import torch.nn as nn
class TestModule(nn.Module):
def __init__(self):
super(TestModule,self).__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(16,32,3,1),
nn.ReLU(inplace=True)
)
self.layer2 = nn.Sequential(
nn.Linear(32,10)
)
def forward(self,x):
x = self.layer1(x)
x = self.layer2(x)
model = TestModule()
for name, module in model.named_children():
print('children module:', name)
for name, module in model.named_modules():
print('modules:', name)
输出
>>out:
children module: layer1
children module: layer2
modules:
modules: layer1
modules: layer1.0
modules: layer1.1
modules: layer2
modules: layer2.0
获取参数名和其值
for name,param in vgg.named_parameters():
print("name is {}".format(name))
print(param)
输出
name is 0.weight
Parameter containing:
tensor([[[[-3.6296e-02, 1.0836e-01, 1.3510e-01],
[-1.3361e-01, 5.9745e-02, 2.4438e-02],
[-1.7095e-01, -9.9576e-02, 1.1297e-01]],
[[-8.1718e-02, 6.0913e-02, -3.0109e-02],
[-1.5120e-01, 8.5647e-02, 1.4474e-01],
[-6.8853e-02, -8.2803e-02, -1.8270e-01]]])
name is 0.bias
Parameter containing:
tensor([ 1.8735e-01, 3.1941e-03, -1.8747e-01, -3.1531e-02, 3.9880e-04,
-8.7427e-02, 1.9193e-01, -7.6810e-03, -5.3820e-02, -5.4126e-02,
1.5074e-01, -6.0450e-02, -7.7125e-02, 8.1087e-02, 1.8392e-01,
7.3845e-02, 1.4453e-01, -1.0507e-01, -5.9577e-02, 2.3714e-02],
requires_grad=True)
name is 2.weight
Parameter containing:
tensor([[[[-2.3275e-02, -3.5920e-02, -3.5464e-02],
[-1.2356e-02, -1.4983e-02, -2.8448e-02],
[ 7.7514e-03, 3.7017e-02, -3.8343e-02]],
[[-2.7261e-02, 2.7608e-03, 3.8855e-02],
[ 3.4545e-02, -2.2671e-02, -1.8810e-02]])
3.4总结
两种方法的不同在于ModuleList传入的参数是一个列表,并且ModuleList不支持自动推导。
4数据集的定义
4.1 使用pytorch定义的Dataset类
data_transform= transforms.Compose([
transforms.Resize(224),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
train_dataset=datasets.ImageFolder(root="/disk3/dataset/classfy/finaldataset/train",transform=data_transform)
train_loader=torch.utils.data.DataLoader(train_dataset,batch_size=4,shuffle=True)
val_dataset=datasets.ImageFolder(root="/disk3/dataset/classfy/finaldataset/val",transform=data_transform)
val_loader=torch.utils.data.DataLoader(val_dataset,batch_size=4,shuffle=True)
只需要数据按下图摆放即可
-
train
- 类别一
- 类别二
-
val
- 类别一
- 类别二
4.2编写自己的dataset类
编写自己的dataset类,需要继承Dataset类,并进行override,最重要的复写类中的几个函数如下:
(1) init : 读各种格式的数据集、路径等,控制传入参数
(2) getitem : 使dataset[i]能够获得第i个样本数据,即导入具体数据
(3) len : len(dataset) returns the size of the dataset
class mydataset(Dataset):
def __init__(self,txt_path,transform=data_transform):
self.image_path,self.label_name=self.get_image_path(txt_path)
self.transform=data_transform
def __getitem__(self,index):
img_path=self.image_path[index]
label=int(self.label_name[index])
img=Image.open(img_path)
if self.transform is not None:
img=self.transform(img)
return img,label
def __len__(self):
return len(self.label_name)
def get_image_path(self,path):
with open(path,"r") as fr:
total_list=[i.replace("\n","") for i in fr.readlines()]
img_list=[]
label_list=[]
for i in total_list:
i=i.split(" ")
img_list.append(i[0])
label_list.append(i[1])
return img_list,label_list
train1=mydataset("/disk3/dataset/classfy/finaldataset/train.txt",data_transform)
test1=mydataset("/disk3/dataset/classfy/finaldataset/test.txt",data_transform)
train1_loader=torch.utils.data.DataLoader(train1,batch_size=4,shuffle=True)
val1_loader=torch.utils.data.DataLoader(test1,batch_size=4,shuffle=True)
其中__getitem__()函数主要关注下transform函数的输出和__getitem__()函数的输出,以后在探究。现在可行的是PIL读取的图片可直接送入transform(),getitem()输出的label不可以是string。
5训练时指标计算方法
def train():
for i in range(epochs):
runing_loss=0
trian_correct=0
train_total=0
for j,(images,labels) in enumerate(train1_loader):
images=Variable(images.cuda())
labels=Variable(labels.cuda())
optimizer.zero_grad()
output=alexnet(images)
_,pred=torch.max(output.data,1)
trian_correct+=(pred==labels.data).sum()
loss=criterion(output,labels)
loss.backward()
optimizer.step()
runing_loss+=loss.item()
train_total+=labels.size(0)
if j%100==0:
print("train epoch is{} ,loss is{},acc is{}".format(i+1,runing_loss/train_total,
100*trian_correct/train_total))
print("train epoch is{} ,loss is{},acc is{}".format(i+1,runing_loss/train_total,
100*trian_correct/train_total))
correct=0
test_loss=0
test_total=0
alexnet.eval()
for (test_images,test_labels) in val1_loader:
test_images=Variable(test_images.cuda())
test_labels=Variable(test_labels.cuda())
test_output=alexnet(test_images)
_,pred=torch.max(test_output.data,1)
loss=criterion(test_output,test_labels)
test_loss+=loss.item()
test_total+=test_labels.size(0)
correct+=(pred==test_labels.data).sum()
print("test epoch is{},loss is{},acc is{}".format(i+1,test_loss/test_total,100*correct/test_total))
最后上传一个完整代码
6常用函数
torch.cat和torch.stact
import torch
a=torch.randn((1,3,4,4)) #假设代表了[N,c,w,h]
b=torch.cat((a,a)) #维度默认是0
# (2, 3, 4, 4)
c=torch.cat((a,a),dim=1)
# (1, 6, 4, 4)
torch.cat除了要cat的维度其他维度必须一致
import torch
a=torch.randn((1,3,4,4)) #[N,c,w,h]
b=torch.stack((a,a))
# (2, 1, 3, 4, 4)
c=torch.stack((a,a),1)
# (1, 2, 3, 4, 4)
d=torch.stack((a,a),2)
# (1, 3, 2, 4, 4)
torch.stack先将数组要先在stack的维度扩充,在stack。比如d,先将a变成1,3,1,4,4在cat最终的结果就是1,3,2,4,4。
torch.scatter()和scatter_()函数
参考scatter
scatter(dim, index, src)
dim: 索引的维度。按照i, j, k, …的哪个方向进行索引
index: 索引。可以是一个tensor,存储需要改的元素的位置的tensor
src: 用src中的值来修改。可以是tensor;可以是一个数字,用同样的数字写入tensor
scatter() 和 scatter_() 函数功能相同:只不过带下划线的函数,通常是直接修改原来的tensor
原理
self[index[i][j][k]][j][k] = src[i][j][k] # if dim == 0
self[i][index[i][j][k]][k] = src[i][j][k] # if dim == 1
self[i][j][index[i][j][k]] = src[i][j][k] # if dim == 2
在简单RNN中的应用
def one_hot(x, n_class, dtype=torch.float32):
# X shape: (batch), output shape: (batch, n_class)
x = x.long()
res = torch.zeros(x.shape[0], n_class, dtype=dtype, device=x.device)
res.scatter_(1, x.view(-1, 1), 1)
return res
x = torch.tensor([0, 2])
one_hot(x, vocab_size)
结果
tensor([[1., 0., 0., ..., 0., 0., 0.],
[0., 0., 1., ..., 0., 0., 0.]])
在本例中,该函数的任务是将输入的文本使用one_hot编码。其中,x是一个vector,代表一个二字词语,其中的0和2代表汉字(在程序上文定义的字典中)所对应的数字。vocab_size是字典大小,即在该程序中所考虑的汉字总个数。n_class是one_hot编码中所考虑的类别数,在本例中等于vocab_size
简单的理解就是将一个词语各个词在词表index中的位置置与1其他位置置于0.
在CRNN中应用
def oneHot(v, v_length, nc):
batchSize = v_length.size(0)
maxLength = v_length.max()
v_onehot = torch.FloatTensor(batchSize, maxLength, nc).fill_(0)
acc = 0
for i in range(batchSize):
length = v_length[i]
label = v[acc:acc + length].view(-1, 1).long()
v_onehot[i, :length].scatter_(1, label, 1.0)
acc += length
return v_onehot
Embedding
Embedding相关介绍
简单的理解就是label做成one-hot的稀疏矩阵不能反正单词之间的联系,因此通过全连接层挖掘label间N维向量的关系,形成一个密集矩阵,这个密集矩阵是可以反向传播的,因此在训练的最后阶段,可以学习到一个相对稳定的关系。
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
word_to_ix = {'hello': 0, 'world': 1}
embeds = nn.Embedding(2, 5)
hello_idx = torch.LongTensor([word_to_ix['hello']])
hello_idx = Variable(hello_idx)
hello_embed = embeds(hello_idx)
print(hello_embed)
nn.Embedding()第一个参数代表词袋长度,第二个参数代表期望用N维的特征描述词
结果
Variable containing:
0.4606 0.6847 -1.9592 0.9434 0.2316
[torch.FloatTensor of size 1x5]