python基于VGG19实现图像风格迁移

目录

1、原理

2、代码实现


1、原理

图像风格迁移是一种将一张图片的内容与另一张图片的风格进行合成的技术。

风格(style)是指图像中不同空间尺度的纹理、颜色和视觉图案,内容(content)是指图像的高级宏观结构。

实现风格迁移背后的关键概念与所有深度学习算法的核心思想是一样的:定义一个损失函数来指定想要实现的目标,然后将这个损失最小化。你知道想要实现的目标是什么,就是保存原始图像的内容,同时采用参考图像的风格。

在Python中,我们可以使用基于深度学习的模型来实现这一技术。​神经风格迁移可以用任何预训练卷积神经网络来实现。我们这里将使用    Gatys等人所使用的 VGG19网络。

2、代码实现

​以下是一个基于VGG19模型的简单图像风格迁移的实现过程:

(1)创建一个网络,它能够同时计算风格参考图像、目标图像和生成图像的 VGG19层激活。

(2)使用这三张图像上计算的层激活来定义之前所述的损失函数,为了实现风格迁移,需要将这个损失函数最小化。

(3)设置梯度下降过程来将这个损失函数最小化

from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
​
import torch
import torch.optim as optim
from torchvision import transforms, models
​
vgg = models.vgg19(pretrained=True).features
​
for param in vgg.parameters():
    param.requires_grad_(False)
​
​
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
​
vgg.to(device)
​
​
def load_image(img_path, max_size=400):
​
    image = Image.open(img_path)
    
    if max(image.size) > max_size:
        size = max_size
    else:
        size = max(image.size)
        
    image_transform = transforms.Compose([
                        transforms.Resize(size),
                        transforms.ToTensor(),
                        transforms.Normalize((0.485, 0.456, 0.406), 
                                             (0.229, 0.224, 0.225))])
​
    image = image_transform(image).unsqueeze(0)
    
    return image
​
​
content = load_image('dogs_and_cats.jpg').to(device)
style = load_image('picasso.jpg').to(device)
​
​
assert style.size() == content.size(), "输入的风格图片和内容图片大小需要一致"
​
​
plt.ion()
def imshow(tensor,title=None):
    
    image = tensor.cpu().clone().detach()
    image = image.numpy().squeeze()
    image = image.transpose(1,2,0)
    image = image * np.array((0.229, 0.224, 0.225)) + np.array((0.485, 0.456, 0.406))
    plt.imshow(image)
    if title is not None:
        plt.title(title)
    plt.pause(0.1)
​
plt.figure()
imshow(style, title='Style Image')
​
plt.figure()
imshow(content, title='Content Image')
​
​
​
​
def get_features(image, model, layers=None):
   
    if layers is None:
        layers = {'0': 'conv1_1',
                  '5': 'conv2_1', 
                  '10': 'conv3_1', 
                  '19': 'conv4_1',
                  '21': 'conv4_2',  
                  '28': 'conv5_1'}
        
    features = {}
    x = image
    for name, layer in model._modules.items():
        x = layer(x)
        if name in layers:
            features[layers[name]] = x
            
    return features
​
content_features = get_features(content, vgg)
style_features = get_features(style, vgg)
​
def gram_matrix(tensor):
    
    _, d, h, w = tensor.size() 
    tensor = tensor.view(d, h * w)
    gram = torch.mm(tensor, tensor.t())
    return gram
​
style_grams={}
for layer in style_features:
  style_grams[layer] = gram_matrix(style_features[layer])
​
import torch.nn.functional as F
​
def ContentLoss(target_features,content_features):
  content_loss = F.mse_loss(target_features['conv4_2'],content_features['conv4_2'])
  return content_loss
​
def StyleLoss(target_features,style_grams,style_weights):
    style_loss = 0
    for layer in style_weights:
        target_feature = target_features[layer]
        target_gram = gram_matrix(target_feature)
        _, d, h, w = target_feature.shape
        style_gram = style_grams[layer]
        layer_style_loss = style_weights[layer] * F.mse_loss(target_gram,style_gram)
        style_loss += layer_style_loss / (d * h * w)
​
    return style_loss
​
​
style_weights = {'conv1_1': 1.,
                 'conv2_1': 0.75,
                 'conv3_1': 0.2,
                 'conv4_1': 0.2,
                 'conv5_1': 0.2}
​
alpha = 1  # alpha
beta = 1e6  # beta
​
​
show_every = 100
steps = 2000 
​
target = content.clone().requires_grad_(True).to(device)
optimizer = optim.Adam([target], lr=0.003)
​
​
for ii in range(1, steps+1):
    
    target_features = get_features(target, vgg)
    
    content_loss = ContentLoss(target_features,content_features)
    
    style_loss = StyleLoss(target_features,style_grams,style_weights)
        
    total_loss = alpha * content_loss + beta * style_loss
    
    optimizer.zero_grad()
    total_loss.backward()
    optimizer.step()
    #print(ii)
    
    if  ii % show_every == 0:
        print('Total loss: ', total_loss.item())
        plt.figure()      
        imshow(target)
​
plt.figure()
imshow(target,"Target Image")
plt.ioff()
plt.show()
​

图像风格迁移是指将一张图片的内容与另一张图片的风格合并到一起,生成一张新的图片。在基于pytorch的VGG19模型实现图像风格迁移时,主要分为以下几步: 1. 加载VGG19模型及其预训练参数 ```python import torch import torchvision.models as models # 加载VGG19模型 vgg = models.vgg19(pretrained=True).features # 将VGG19模型的参数设置为不需要更新 for param in vgg.parameters(): param.requires_grad_(False) # 将模型移到GPU上 device = torch.device("cuda" if torch.cuda.is_available() else "cpu") vgg.to(device) ``` 2. 定义图像风格损失函数 ```python def gram_matrix(input): batch_size, depth, height, width = input.size() features = input.view(batch_size * depth, height * width) gram = torch.mm(features, features.t()) return gram class StyleLoss(nn.Module): def __init__(self, target_features): super(StyleLoss, self).__init__() self.target = gram_matrix(target_features).detach() def forward(self, input): G = gram_matrix(input) self.loss = F.mse_loss(G, self.target) return input ``` 其中,`gram_matrix`函数用于计算输入张量的Gram矩阵,`StyleLoss`类用于计算图像风格损失。 3. 加载内容图片和风格图片 ```python def load_image(img_path, max_size=400, shape=None): image = Image.open(img_path).convert('RGB') if max(image.size) > max_size: size = max_size else: size = max(image.size) if shape is not None: size = shape in_transform = transforms.Compose([ transforms.Resize(size), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) image = in_transform(image)[:3,:,:].unsqueeze(0) return image content = load_image("content.jpg").to(device) style = load_image("style.jpg", shape=content.shape[-2:]).to(device) ``` 其中,`load_image`函数用于加载图片,并进行缩放、裁剪、归一化处理。 4. 定义模型和优化器 ```python # 定义目标图像 target = content.clone().requires_grad_(True).to(device) # 定义损失函数 content_losses = [] style_losses = [] model = nn.Sequential() for layer_num, layer in vgg._modules.items(): model.add_module(layer_num, layer) if isinstance(layer, nn.Conv2d): name = "conv{}_{}".format(layer_num[:1], layer_num[2:]) content_loss = ContentLoss(target) model.add_module("content_loss_{}".format(name), content_loss) content_losses.append(content_loss) if isinstance(layer, nn.ReLU): name = "relu{}_{}".format(layer_num[:1], layer_num[2:]) model.add_module(name, layer) style_loss = StyleLoss(style) model.add_module("style_loss_{}".format(name), style_loss) style_losses.append(style_loss) optimizer = torch.optim.Adam([target], lr=0.01) # 定义训练函数 def train(model, target, content_losses, style_losses, optimizer, steps=2000, style_weight=1000000, content_weight=1): for step in range(steps): target = model(target) content_loss = 0 style_loss = 0 for cl in content_losses: content_loss += cl.loss for sl in style_losses: style_loss += sl.loss loss = style_weight * style_loss + content_weight * content_loss optimizer.zero_grad() loss.backward() optimizer.step() if step % 100 == 0: print("Step {}:".format(step)) print("Style Loss: {:.4f} Content Loss: {:.4f}".format(style_loss.item(), content_loss.item())) return target ``` 其中,`train`函数用于训练模型,并计算损失函数。 5. 进行图像风格迁移 ```python output = train(model, target, content_losses, style_losses, optimizer, steps=2000, style_weight=1000000, content_weight=1) output_image = tensor_to_image(output) output_image.show() ``` 其中,`tensor_to_image`函数用于将张量转换为图像,`show`函数用于显示图像。 以上就是基于pytorch的VGG19模型实现图像风格迁移的主要步骤。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值