Pytorch笔记：风格迁移

最新推荐文章于 2022-09-30 10:05:57 发布

xiaolaoshuXD

最新推荐文章于 2022-09-30 10:05:57 发布

阅读量505

点赞数 3

分类专栏： pytorch 文章标签：神经网络深度学习 python 人工智能 pytorch

本文链接：https://blog.csdn.net/qq_42017767/article/details/108456945

版权

pytorch 专栏收录该内容

8 篇文章 0 订阅

订阅专栏

Pytorch笔记：风格迁移
训练模型：风格迁移网络+VGG16网络
生成网络：风格迁移网络
代码如下（根据陈云《深度学习框架：Pytorch入门与实践》的代码改动）
main.py

import torch as t
import cv2 as cv
import torchvision as tv
from torch.utils import data
from transformer_net import TransformerNet
import utils
from PackedVGG import Vgg16
from torch.nn import functional as F


mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]


class Config(object):#基本参数配置
    # General Args
    use_gpu = True
    model_path = None  # pretrain model path (for resume training or test)

    # Train Args
    image_size = 256  # image crop_size for training
    batch_size = 3
    data_root = 'F:\dl\chapter7\data2/'  # dataset root：$data_root/coco/a.jpg
    num_workers = 4  # dataloader num of workers

    lr = 1e-3
    epoches = 2  # total epoch to train
    content_weight = 1e5  # weight of content_loss
    style_weight = 1e10  # weight of style_loss

    style_path = 'F:\dl\chapter7\style.png'  # style image path

    debug_file = '/tmp/debugnn'  # touch $debug_fie to interrupt and enter

    # Test Args
    content_path = 'F:\dl\chapter7\input.jpg'  # input file to do style transfer [for test]
    result_path = 'F:\dl\output.png'  # style transfer result [for test]


def train():
    opt = Config()
    device = t.device('cuda') if opt.use_gpu else t.device('cpu')#判断是否使用GPU训练
    # Data loading
    transfroms = tv.transforms.Compose([#8万多张平时场景图数据集
        tv.transforms.Resize(opt.image_size),
        tv.transforms.CenterCrop(opt.image_size),
        tv.transforms.ToTensor(),
        tv.transforms.Lambda(lambda x: x * 255)
    ])
    dataset = tv.datasets.ImageFolder(opt.data_root, transfroms)
    dataloader = data.DataLoader(dataset, opt.batch_size)

    # style transformer network 风格迁移网络
    transformer = TransformerNet()
    if opt.model_path:#如果有训练一半的模型，加载后继续训练
        transformer.load_state_dict(t.load(opt.model_path, map_location=lambda _s, _: _s))
    transformer.to(device)

    # Vgg16 for Perceptual Loss
    vgg = Vgg16().eval()#训练和测试时采用不同方式 Dropout和Batch Normalization
    vgg.to(device)
    for param in vgg.parameters():
        param.requires_grad = False

    # Optimizer
    optimizer = t.optim.Adam(transformer.parameters(), opt.lr)#风格迁移网络优化器

    # Get style image  获得所需风格图片
    style = utils.get_style_data(opt.style_path)
    style = style.to(device)

    # gram matrix for style image 计算风格图片的风格矩阵
    with t.no_grad():
        features_style = vgg(style)
        gram_style = [utils.gram_matrix(y) for y in features_style]

    for epoch in range(opt.epoches):
        for ii, (x, _) in enumerate(dataloader):
            # Train
            optimizer.zero_grad()
            x = x.to(device)
            y = transformer(x)
            y = utils.normalize_batch(y)
            x = utils.normalize_batch(x)
            features_y = vgg(y)
            features_x = vgg(x)

            # content loss 内容损失 生成图片和原图越像越好
            content_loss = opt.content_weight * F.mse_loss(features_y.relu2_2, features_x.relu2_2)

            # style loss 风格损失 生成图片和风格图的风格越像越好
            style_loss = 0.
            for ft_y, gm_s in zip(features_y, gram_style):#'relu1_2', 'relu2_2', 'relu3_3', 'relu4_3'输出相加
                gram_y = utils.gram_matrix(ft_y)
                style_loss += F.mse_loss(gram_y, gm_s.expand_as(gram_y))
            style_loss *= opt.style_weight

            total_loss = content_loss + style_loss#两种损失叠加
            print(ii,"loss is %f" % total_loss.data.cpu().numpy())
            total_loss.backward()
            optimizer.step()

            if ii%500==0 :
                t.save(transformer.state_dict(), 'F:\dl\chapter7\check/style_%s.pth' % ii)
        # save checkpoint
        t.save(transformer.state_dict(), 'F:\dl\chapter7\check/%s_style.pth' % epoch)


def stylize():
    """
    perform style transfer
    """
    opt = Config()
    #device = t.device('cuda') if opt.use_gpu else t.device('cpu')
    device=t.device('cpu')

    # input image preprocess 输入需变换图片
    content_image = tv.datasets.folder.default_loader('F:\dl\mmexport1598515552726.jpg')
    content_transform = tv.transforms.Compose([
        tv.transforms.ToTensor(),
        tv.transforms.Lambda(lambda x: x.mul(255))
    ])
    content_image = content_transform(content_image)
    content_image = content_image.unsqueeze(0).to(device).detach()

    # model setup 加载模型
    style_model = TransformerNet().eval()
    style_model.load_state_dict(t.load('F:\dl\chapter7\check\style_25000.pth', map_location=lambda _s, _: _s))
    style_model.to(device)

    # style transfer and save output 输出生成图片
    output = style_model(content_image)
    output_data = output.cpu().data[0]
    tv.utils.save_image(((output_data / 255)).clamp(min=0, max=1), 'F:\dl\outputmym.png' )
    print("输出并保存完毕")
    scr = cv.imread('F:\dl\outputmym.png' )
    cv.imshow("scr", scr)
    cv.waitKey(0)

#stylize()#输出风格化图片
train()#训练

utils.py

import torch as t
import torchvision as tv
import numpy as np

IMAGENET_MEAN = [0.485, 0.456, 0.406]
IMAGENET_STD = [0.229, 0.224, 0.225]

def gram_matrix(y):
    """
    Input shape: b,c,h,w
    Output shape: b,c,c
    """
    (b, ch, h, w) = y.size()
    features = y.view(b, ch, w * h)
    features_t = features.transpose(1, 2)
    gram = features.bmm(features_t) / (ch * h * w)
    return gram

def get_style_data(path):
    """
    load style image，
    Return： tensor shape 1*c*h*w, normalized
    """
    style_transform = tv.transforms.Compose([
        tv.transforms.ToTensor(),
        tv.transforms.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
    ])

    style_image = tv.datasets.folder.default_loader(path)
    style_tensor = style_transform(style_image)
    return style_tensor.unsqueeze(0)

def normalize_batch(batch):
    """
    Input: b,ch,h,w  0~255
    Output: b,ch,h,w  -2~2
    """
    mean = batch.data.new(IMAGENET_MEAN).view(1, -1, 1, 1)
    std = batch.data.new(IMAGENET_STD).view(1, -1, 1, 1)
    mean = (mean.expand_as(batch.data))
    std = (std.expand_as(batch.data))
    return (batch / 255.0 - mean) / std

transformer_net.py

"""
code refer to https://github.com/abhiskk/fast-neural-style/blob/master/neural_style/transformer_net.py
"""
import torch as t
import torch.nn as nn
import numpy as np

class TransformerNet(nn.Module):#风格迁移网络
    def __init__(self):
        super(TransformerNet, self).__init__()

        # Down sample layers
        self.initial_layers = nn.Sequential(
            ConvLayer(3, 32, kernel_size=9, stride=1),
            nn.InstanceNorm2d(32, affine=True),
            nn.ReLU(True),
            ConvLayer(32, 64, kernel_size=3, stride=2),
            nn.InstanceNorm2d(64, affine=True),
            nn.ReLU(True),
            ConvLayer(64, 128, kernel_size=3, stride=2),
            nn.InstanceNorm2d(128, affine=True),
            nn.ReLU(True),
        )

        # Residual layers
        self.res_layers = nn.Sequential(
            ResidualBlock(128),
            ResidualBlock(128),
            ResidualBlock(128),
            ResidualBlock(128),
            ResidualBlock(128)
        )

        # Upsampling Layers
        self.upsample_layers = nn.Sequential(
            UpsampleConvLayer(128, 64, kernel_size=3, stride=1, upsample=2),
            nn.InstanceNorm2d(64, affine=True),
            nn.ReLU(True),
            UpsampleConvLayer(64, 32, kernel_size=3, stride=1, upsample=2),
            nn.InstanceNorm2d(32, affine=True),
            nn.ReLU(True),
            ConvLayer(32, 3, kernel_size=9, stride=1)
        )

    def forward(self, x):
        x = self.initial_layers(x)
        x = self.res_layers(x)
        x = self.upsample_layers(x)
        return x


class ConvLayer(nn.Module):
    """
    add ReflectionPad for Conv
    """

    def __init__(self, in_channels, out_channels, kernel_size, stride):
        super(ConvLayer, self).__init__()
        reflection_padding = int(np.floor(kernel_size / 2))
        self.reflection_pad = nn.ReflectionPad2d(reflection_padding)
        self.conv2d = nn.Conv2d(in_channels, out_channels, kernel_size, stride)

    def forward(self, x):
        out = self.reflection_pad(x)
        out = self.conv2d(out)
        return out


class UpsampleConvLayer(nn.Module):
    """UpsampleConvLayer
    instead of ConvTranspose2d, we do UpSample + Conv2d
    see ref for why.
    ref: http://distill.pub/2016/deconv-checkerboard/
    """

    def __init__(self, in_channels, out_channels, kernel_size, stride, upsample=None):
        super(UpsampleConvLayer, self).__init__()
        self.upsample = upsample
        reflection_padding = int(np.floor(kernel_size / 2))
        self.reflection_pad = nn.ReflectionPad2d(reflection_padding)
        self.conv2d = nn.Conv2d(in_channels, out_channels, kernel_size, stride)

    def forward(self, x):
        x_in = x
        if self.upsample:
            x_in = t.nn.functional.interpolate(x_in, scale_factor=self.upsample)
        out = self.reflection_pad(x_in)
        out = self.conv2d(out)
        return out


class ResidualBlock(nn.Module):
    """ResidualBlock
    introduced in: https://arxiv.org/abs/1512.03385
    recommended architecture: http://torch.ch/blog/2016/02/04/resnets.html
    """

    def __init__(self, channels):
        super(ResidualBlock, self).__init__()
        self.conv1 = ConvLayer(channels, channels, kernel_size=3, stride=1)
        self.in1 = nn.InstanceNorm2d(channels, affine=True)
        self.conv2 = ConvLayer(channels, channels, kernel_size=3, stride=1)
        self.in2 = nn.InstanceNorm2d(channels, affine=True)
        self.relu = nn.ReLU()

    def forward(self, x):
        residual = x
        out = self.relu(self.in1(self.conv1(x)))
        out = self.in2(self.conv2(out))
        out = out + residual
        return out

PackedVGG.py

import torch
import torch.nn as nn
from torchvision.models import vgg16
from collections import namedtuple

class Vgg16(torch.nn.Module):#Vgg16模型，已经训练好，可以分类1000种
    def __init__(self):
        super(Vgg16, self).__init__()
        features = list(vgg16(pretrained=True).features)[:23]
        # the 3rd, 8th, 15th and 22nd layer of \
        # self.features are: relu1_2,relu2_2,relu3_3,relu4_3
        self.features = nn.ModuleList(features).eval()

    def forward(self, x):
        results = []
        for ii, model in enumerate(self.features):
            x = model(x)
            if ii in {3, 8, 15, 22}:
                results.append(x)

        vgg_outputs = namedtuple("VggOutputs", ['relu1_2', 'relu2_2', 'relu3_3', 'relu4_3'])
        return vgg_outputs(*results)

运行结果

H:\ProgramData\Anaconda3\python.exe D:/PycharmProjects/untitled/风格迁移/main.py
0 loss is 24709134.000000
1 loss is 24594472.000000
2 loss is 24192556.000000
3 loss is 23792200.000000

Process finished with exit code -1

所想要的风格
style.png

训练6万张图片后输出：（损失从2400万降到140万）
在这里插入图片描述

参考文章链接
深度学习框架PyTorch入门与实践：第八章 AI艺术家：神经网络风格迁移
 CNN系列学习之VGG16
训练时间10小时，完成6万张图片输入训练，模型名字中数字为读取多少张图片后生成的模型，文件夹内图片为该模型的风格，该方法一个模型只能实现一个风格，生成模型链接：https://download.csdn.net/download/qq_42017767/12821255