单张图片风格人脸卡通化: 试玩JoJoGAN

最新推荐文章于 2023-11-01 13:48:00 发布

xiexiecn

最新推荐文章于 2023-11-01 13:48:00 发布

阅读量1.3k

点赞数 1

分类专栏： stylegan2 人脸卡通化文章标签：深度学习机器学习算法

本文链接：https://blog.csdn.net/xiexiecn/article/details/122208323

版权

stylegan2 同时被 2 个专栏收录

8 篇文章 2 订阅

订阅专栏

人脸卡通化

2 篇文章 0 订阅

订阅专栏

人脸卡通化好玩有趣，问题是样本不好找，采用后面链接里的方法来训练至少需要2000张左右的样本才能达到一个比较好玩的效果。基于此有不少few shots或者one shot的方案被提出来，JoJoGAN就是最近大火的一个。好处就是只有一张样本，也可以制作你自己的卡通模型。Toonify yourself | Justin Pinkney

原理不做多数，我将https://github.com/mchong6/JoJoGAN里colab带的代码抽取出来做了些简单的实验，代码在最后。首先是复现了一下双城之战的结果：

输入图片为:

200次迭代后，结果如下，看起来也可以接受:

尝试另外一种风格，输入样式为:

50个周期后效果较优（200个后就变成妖精了），不过也还是不够好。

简单对学习率和w潜向量做了一些增广，感觉也提高不了太多，有待进一步研究。

import torch
torch.backends.cudnn.benchmark = True
from torchvision import transforms, utils
from util import *
from PIL import Image
import math
import random
import os
import sys
import glob

import numpy as np
from torch import nn, autograd, optim
from torch.nn import functional as F
from tqdm import tqdm
import lpips
import wandb
from model import *
from e4e_projection import projection as e4e_projection

from copy import deepcopy
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive

os.makedirs('inversion_codes', exist_ok=True)
os.makedirs('style_images', exist_ok=True)
os.makedirs('style_images_aligned', exist_ok=True)
os.makedirs('models', exist_ok=True)

device = 'cuda'

filepath = './testsample.jpg'
name = strip_path_extension(filepath)+'.pt'

# aligns and crops face
aligned_face = align_face(filepath)

# my_w = restyle_projection(aligned_face, name, device, n_iters=1).unsqueeze(0)
my_w = e4e_projection(aligned_face, name, device).unsqueeze(0)

latent_dim = 512

# Load original generator
original_generator = Generator(1024, latent_dim, 8, 2).to(device)
ckpt = torch.load('models/stylegan2-ffhq-config-f.pt', map_location=lambda storage, loc: storage)
original_generator.load_state_dict(ckpt["g_ema"], strict=False)
mean_latent = original_generator.mean_latent(10000)

# to be finetuned generator
generator = deepcopy(original_generator)

transform = transforms.Compose(
    [
        transforms.Resize((1024, 1024)),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ]
)

#@markdown Upload your own style images into the style_images folder and type it into the field in the following format without the directory name. Upload multiple style images to do multi-shot image translation
#names = ['arcane_caitlyn.jpeg', 'arcane_jinx.jpeg', 'arcane_jayce.jpeg', 'arcane_viktor.jpeg'] #@param {type:"raw"}

targets = []
latents = []

#for name in names:
#    style_path = os.path.join('style_images', name)
if len(sys.argv) > 1:
    style_paths = glob.glob(os.path.join(sys.argv[1], "*.*"))
else:
    style_paths = glob.glob("./style_images/*.*")

for style_path in style_paths:
    #assert os.path.exists(style_path), f"{style_path} does not exist!"

    #name = strip_path_extension(name)
    name = os.path.splitext(os.path.basename(style_path))[0]

    # crop and align the face
    #style_aligned_path = os.path.join('style_images_aligned', f'{name}.png')
    #style_aligned_path = style_path.replace("/style_images/", "/style_images_aligned/")
    style_aligned_path = os.path.join("./style_images_aligned/", os.path.basename(style_path))
    #if not os.path.exists(style_aligned_path):
    if False:
        style_aligned = align_face(style_path)
        style_aligned.save(style_aligned_path)
    else:
        style_aligned = Image.open(style_aligned_path).convert('RGB')

    # GAN invert
    style_code_path = os.path.join('inversion_codes', f'{name}.pt')
    if not os.path.exists(style_code_path):
        latent = e4e_projection(style_aligned, style_code_path, device)
    else:
        latent = torch.load(style_code_path)['latent']

    targets.append(transform(style_aligned).to(device))
    latents.append(latent.to(device))

targets = torch.stack(targets, 0)
latents = torch.stack(latents, 0)

target_im = utils.make_grid(targets, normalize=True, range=(-1, 1))

#@title Finetune StyleGAN
#@markdown alpha controls the strength of the style
alpha =  1.0 #@param {type:"slider", min:0, max:1, step:0.1}
alpha = 1-alpha

#@markdown Tries to preserve color of original image by limiting family of allowable transformations. Set to false if you want to transfer color from reference image. This also leads to heavier stylization
preserve_color = True #@param{type:"boolean"}
#preserve_color = False
#@markdown Number of finetuning steps. Different style reference may require different iterations. Try 200~500 iterations.
num_iter = 200 #@param {type:"number"}
#@markdown Log training on wandb and interval for image logging
use_wandb = True #@param {type:"boolean"}
save_model = False
log_interval = 10 #@param {type:"number"}

if use_wandb:
    os.environ["WANDB_MODE"]="dryrun"
    wandb.init(project="JoJoGAN")
    config = wandb.config
    config.num_iter = num_iter
    config.preserve_color = preserve_color
    wandb.log(
    {"Style reference": [wandb.Image(transforms.ToPILImage()(target_im))]},
    step=0)

lpips_fn = lpips.LPIPS(net='vgg').to(device)

# reset generator
del generator
generator = deepcopy(original_generator)

g_optim = optim.Adam(generator.parameters(), lr=2e-3, betas=(0, 0.99))
#g_optim = optim.Adam(generator.parameters(), lr=3e-4, betas=(0, 0.99))

# Which layers to swap for generating a family of plausible real images -> fake image
if preserve_color:
    id_swap = [7,9,11,15,16,17]
else:
    id_swap = list(range(7, generator.n_latent))

total_batch = latents.size(0)
batch_size = 8 if total_batch >=8 else 1

for idx in tqdm(range(num_iter)):
    cur_batch = idx % (total_batch-batch_size+1)

    if preserve_color:
        random_alpha = 0
    else:
        random_alpha = np.random.uniform(alpha, 1)
    #mean_w = generator.get_latent(torch.randn([latents.size(0), latent_dim]).to(device)).unsqueeze(1).repeat(1, generator.n_latent, 1)
    mean_w = generator.get_latent(torch.randn([batch_size, latent_dim]).to(device)).unsqueeze(1).repeat(1, generator.n_latent, 1)
    #in_latent = latents.clone()
    in_latent = latents[cur_batch:cur_batch+batch_size,:,:].clone()
    #in_latent[:, id_swap] = alpha*latents[:, id_swap] + (1-alpha)*mean_w[:, id_swap]
    in_latent[:, id_swap] = alpha*in_latent[:, id_swap] + (1-alpha)*mean_w[:, id_swap]

    img = generator(in_latent, input_is_latent=True)
    #loss = lpips_fn(F.interpolate(img, size=(256,256), mode='area'), F.interpolate(targets, size=(256,256), mode='area')).mean()
    loss = lpips_fn(F.interpolate(img, size=(256,256), mode='area'), F.interpolate(targets[cur_batch:cur_batch+batch_size,:,:,:], size=(256,256), mode='area')).mean()
    
    if use_wandb:
        wandb.log({"loss": loss}, step=idx)
        if idx % log_interval == 0:
            generator.eval()
            my_sample = generator(my_w, input_is_latent=True)
            generator.train()
            my_sample = transforms.ToPILImage()(utils.make_grid(my_sample, normalize=True, range=(-1, 1)))
            wandb.log(
            {"Current stylization": [wandb.Image(my_sample)]},
            step=idx)

            if save_model:
                torch.save(generator, "./ckpts/G_{}.pth".format(str(idx).zfill(5)))

    g_optim.zero_grad()
    loss.backward()
    g_optim.step()

xiexiecn

关注

1
点赞
踩
5

收藏

觉得还不错? 一键收藏
0
评论
单张图片风格人脸卡通化: 试玩JoJoGAN

人脸卡通化好玩有趣，问题是样本不好找，采用后面链接里的方法来训练至少需要2000张左右的样本才能达到一个比较好玩的效果。基于此有不少few shots或者one shot的方案被提出来，JoJoGAN就是最近大火的一个。好处就是只有一张样本，也可以制作你自己的卡通模型。Toonify yourself | Justin Pinkney 原理不做多数，我将https://github.com/mchong6/JoJoGAN里colab带的代码抽取出来做了些简单的实验，代码在最后。首先是复现了一下...
复制链接

扫一扫