Pytorch笔记:风格迁移
训练模型:风格迁移网络+VGG16网络
生成网络:风格迁移网络
代码如下(根据陈云《深度学习框架:Pytorch入门与实践》的代码改动)
main.py
import torch as t
import cv2 as cv
import torchvision as tv
from torch.utils import data
from transformer_net import TransformerNet
import utils
from PackedVGG import Vgg16
from torch.nn import functional as F
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
class Config(object):#基本参数配置
# General Args
use_gpu = True
model_path = None # pretrain model path (for resume training or test)
# Train Args
image_size = 256 # image crop_size for training
batch_size = 3
data_root = 'F:\dl\chapter7\data2/' # dataset root:$data_root/coco/a.jpg
num_workers = 4 # dataloader num of workers
lr = 1e-3
epoches = 2 # total epoch to train
content_weight = 1e5 # weight of content_loss
style_weight = 1e10 # weight of style_loss
style_path = 'F:\dl\chapter7\style.png' # style image path
debug_file = '/tmp/debugnn' # touch $debug_fie to interrupt and enter
# Test Args
content_path = 'F:\dl\chapter7\input.jpg' # input file to do style transfer [for test]
result_path = 'F:\dl\output.png' # style transfer result [for test]
def train():
opt = Config()
device = t.device('cuda') if opt.use_gpu else t.device('cpu')#判断是否使用GPU训练
# Data loading
transfroms = tv.transforms.Compose([#8万多张平时场景图数据集
tv.transforms.Resize(opt.image_size),
tv.transforms.CenterCrop(opt.image_size),
tv.transforms.ToTensor(),
tv.transforms.Lambda(lambda x: x * 255)
])
dataset = tv.datasets.ImageFolder(opt.data_root, transfroms)
dataloader = data.DataLoader(dataset, opt.batch_size)
# style transformer network 风格迁移网络
transformer = TransformerNet()
if opt.model_path:#如果有训练一半的模型,加载后继续训练
transformer.load_state_dict(t.load(opt.model_path, map_location=lambda _s, _: _s))
transformer.to(device)
# Vgg16 for Perceptual Loss
vgg = Vgg16().eval()#训练和测试时采用不同方式 Dropout和Batch Normalization
vgg.to(device)
for param in vgg.parameters():
param.requires_grad = False
# Optimizer
optimizer = t.optim.Adam(transformer.parameters(), opt.lr)#风格迁移网络优化器
# Get style image 获得所需风格图片
style = utils.get_style_data(opt.style_path)
style = style.to(device)
# gram matrix for style image 计算风格图片的风格矩阵
with t.no_grad():
features_style = vgg(style)
gram_style = [utils.gram_matrix(y) for y in features_style]
for epoch in range(opt.epoches):
for ii, (x, _) in enumerate(dataloader):
# Train
optimizer.zero_grad()
x = x.to(device)
y = transformer(x)
y = utils.normalize_batch(y)
x = utils.normalize_batch(x)
features_y = vgg(y)
features_x = vgg(x)
# content loss 内容损失 生成图片和原图越像越好
content_loss = opt.content_weight * F.mse_loss(features_y.relu2_2, features_x.relu2_2)
# style loss 风格损失 生成图片和风格图的风格越像越好
style_loss = 0.
for ft_y, gm_s in zip(features_y, gram_style):#'relu1_2', 'relu2_2', 'relu3_3', 'relu4_3'输出相加
gram_y = utils.gram_matrix(ft_y)
style_loss += F.mse_loss(gram_y, gm_s.expand_as(gram_y))
style_loss *= opt.style_weight
total_loss = content_loss + style_loss#两种损失叠加
print(ii,"loss is %f" % total_loss.data.cpu().numpy())
total_loss.backward()
optimizer.step()
if ii%500==0 :
t.save(transformer.state_dict(), 'F:\dl\chapter7\check/style_%s.pth' % ii)
# save checkpoint
t.save(transformer.state_dict(), 'F:\dl\chapter7\check/%s_style.pth' % epoch)
def stylize():
"""
perform style transfer
"""
opt = Config()
#device = t.device('cuda') if opt.use_gpu else t.device('cpu')
device=t.device('cpu')
# input image preprocess 输入需变换图片
content_image = tv.datasets.folder.default_loader('F:\dl\mmexport1598515552726.jpg')
content_transform = tv.transforms.Compose([
tv.transforms.ToTensor(),
tv.transforms.Lambda(lambda x: x.mul(255))
])
content_image = content_transform(content_image)
content_image = content_image.unsqueeze(0).to(device).detach()
# model setup 加载模型
style_model = TransformerNet().eval()
style_model.load_state_dict(t.load('F:\dl\chapter7\check\style_25000.pth', map_location=lambda _s, _: _s))
style_model.to(device)
# style transfer and save output 输出生成图片
output = style_model(content_image)
output_data = output.cpu().data[0]
tv.utils.save_image(((output_data / 255)).clamp(min=0, max=1), 'F:\dl\outputmym.png' )
print("输出并保存完毕")
scr = cv.imread('F:\dl\outputmym.png' )
cv.imshow("scr", scr)
cv.waitKey(0)
#stylize()#输出风格化图片
train()#训练
utils.py
import torch as t
import torchvision as tv
import numpy as np
IMAGENET_MEAN = [0.485, 0.456, 0.406]
IMAGENET_STD = [0.229, 0.224, 0.225]
def gram_matrix(y):
"""
Input shape: b,c,h,w
Output shape: b,c,c
"""
(b, ch, h, w) = y.size()
features = y.view(b, ch, w * h)
features_t = features.transpose(1, 2)
gram = features.bmm(features_t) / (ch * h * w)
return gram
def get_style_data(path):
"""
load style image,
Return: tensor shape 1*c*h*w, normalized
"""
style_transform = tv.transforms.Compose([
tv.transforms.ToTensor(),
tv.transforms.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
])
style_image = tv.datasets.folder.default_loader(path)
style_tensor = style_transform(style_image)
return style_tensor.unsqueeze(0)
def normalize_batch(batch):
"""
Input: b,ch,h,w 0~255
Output: b,ch,h,w -2~2
"""
mean = batch.data.new(IMAGENET_MEAN).view(1, -1, 1, 1)
std = batch.data.new(IMAGENET_STD).view(1, -1, 1, 1)
mean = (mean.expand_as(batch.data))
std = (std.expand_as(batch.data))
return (batch / 255.0 - mean) / std
transformer_net.py
"""
code refer to https://github.com/abhiskk/fast-neural-style/blob/master/neural_style/transformer_net.py
"""
import torch as t
import torch.nn as nn
import numpy as np
class TransformerNet(nn.Module):#风格迁移网络
def __init__(self):
super(TransformerNet, self).__init__()
# Down sample layers
self.initial_layers = nn.Sequential(
ConvLayer(3, 32, kernel_size=9, stride=1),
nn.InstanceNorm2d(32, affine=True),
nn.ReLU(True),
ConvLayer(32, 64, kernel_size=3, stride=2),
nn.InstanceNorm2d(64, affine=True),
nn.ReLU(True),
ConvLayer(64, 128, kernel_size=3, stride=2),
nn.InstanceNorm2d(128, affine=True),
nn.ReLU(True),
)
# Residual layers
self.res_layers = nn.Sequential(
ResidualBlock(128),
ResidualBlock(128),
ResidualBlock(128),
ResidualBlock(128),
ResidualBlock(128)
)
# Upsampling Layers
self.upsample_layers = nn.Sequential(
UpsampleConvLayer(128, 64, kernel_size=3, stride=1, upsample=2),
nn.InstanceNorm2d(64, affine=True),
nn.ReLU(True),
UpsampleConvLayer(64, 32, kernel_size=3, stride=1, upsample=2),
nn.InstanceNorm2d(32, affine=True),
nn.ReLU(True),
ConvLayer(32, 3, kernel_size=9, stride=1)
)
def forward(self, x):
x = self.initial_layers(x)
x = self.res_layers(x)
x = self.upsample_layers(x)
return x
class ConvLayer(nn.Module):
"""
add ReflectionPad for Conv
"""
def __init__(self, in_channels, out_channels, kernel_size, stride):
super(ConvLayer, self).__init__()
reflection_padding = int(np.floor(kernel_size / 2))
self.reflection_pad = nn.ReflectionPad2d(reflection_padding)
self.conv2d = nn.Conv2d(in_channels, out_channels, kernel_size, stride)
def forward(self, x):
out = self.reflection_pad(x)
out = self.conv2d(out)
return out
class UpsampleConvLayer(nn.Module):
"""UpsampleConvLayer
instead of ConvTranspose2d, we do UpSample + Conv2d
see ref for why.
ref: http://distill.pub/2016/deconv-checkerboard/
"""
def __init__(self, in_channels, out_channels, kernel_size, stride, upsample=None):
super(UpsampleConvLayer, self).__init__()
self.upsample = upsample
reflection_padding = int(np.floor(kernel_size / 2))
self.reflection_pad = nn.ReflectionPad2d(reflection_padding)
self.conv2d = nn.Conv2d(in_channels, out_channels, kernel_size, stride)
def forward(self, x):
x_in = x
if self.upsample:
x_in = t.nn.functional.interpolate(x_in, scale_factor=self.upsample)
out = self.reflection_pad(x_in)
out = self.conv2d(out)
return out
class ResidualBlock(nn.Module):
"""ResidualBlock
introduced in: https://arxiv.org/abs/1512.03385
recommended architecture: http://torch.ch/blog/2016/02/04/resnets.html
"""
def __init__(self, channels):
super(ResidualBlock, self).__init__()
self.conv1 = ConvLayer(channels, channels, kernel_size=3, stride=1)
self.in1 = nn.InstanceNorm2d(channels, affine=True)
self.conv2 = ConvLayer(channels, channels, kernel_size=3, stride=1)
self.in2 = nn.InstanceNorm2d(channels, affine=True)
self.relu = nn.ReLU()
def forward(self, x):
residual = x
out = self.relu(self.in1(self.conv1(x)))
out = self.in2(self.conv2(out))
out = out + residual
return out
PackedVGG.py
import torch
import torch.nn as nn
from torchvision.models import vgg16
from collections import namedtuple
class Vgg16(torch.nn.Module):#Vgg16模型,已经训练好,可以分类1000种
def __init__(self):
super(Vgg16, self).__init__()
features = list(vgg16(pretrained=True).features)[:23]
# the 3rd, 8th, 15th and 22nd layer of \
# self.features are: relu1_2,relu2_2,relu3_3,relu4_3
self.features = nn.ModuleList(features).eval()
def forward(self, x):
results = []
for ii, model in enumerate(self.features):
x = model(x)
if ii in {3, 8, 15, 22}:
results.append(x)
vgg_outputs = namedtuple("VggOutputs", ['relu1_2', 'relu2_2', 'relu3_3', 'relu4_3'])
return vgg_outputs(*results)
运行结果
H:\ProgramData\Anaconda3\python.exe D:/PycharmProjects/untitled/风格迁移/main.py
0 loss is 24709134.000000
1 loss is 24594472.000000
2 loss is 24192556.000000
3 loss is 23792200.000000
Process finished with exit code -1
所想要的风格
style.png
训练6万张图片后输出:(损失从2400万降到140万)
参考文章链接
深度学习框架PyTorch入门与实践:第八章 AI艺术家:神经网络风格迁移
CNN系列学习之VGG16
训练时间10小时,完成6万张图片输入训练,模型名字中数字为读取多少张图片后生成的模型,文件夹内图片为该模型的风格,该方法一个模型只能实现一个风格,生成模型链接:https://download.csdn.net/download/qq_42017767/12821255