单张图片风格人脸卡通化: 试玩JoJoGAN

  人脸卡通化好玩有趣,问题是样本不好找,采用后面链接里的方法来训练至少需要2000张左右的样本才能达到一个比较好玩的效果。基于此有不少few shots或者one shot的方案被提出来,JoJoGAN就是最近大火的一个。好处就是只有一张样本,也可以制作你自己的卡通模型。Toonify yourself | Justin Pinkney

    原理不做多数,我将https://github.com/mchong6/JoJoGAN里colab带的代码抽取出来做了些简单的实验,代码在最后。首先是复现了一下双城之战的结果:

    输入图片为:

    200次迭代后,结果如下,看起来也可以接受: 

  尝试另外一种风格,输入样式为:

  

 50个周期后效果较优(200个后就变成妖精了),不过也还是不够好。

  

  简单对学习率和w潜向量做了一些 增广,感觉也提高不了太多,有待进一步研究。

import torch
torch.backends.cudnn.benchmark = True
from torchvision import transforms, utils
from util import *
from PIL import Image
import math
import random
import os
import sys
import glob

import numpy as np
from torch import nn, autograd, optim
from torch.nn import functional as F
from tqdm import tqdm
import lpips
import wandb
from model import *
from e4e_projection import projection as e4e_projection

from copy import deepcopy
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive

os.makedirs('inversion_codes', exist_ok=True)
os.makedirs('style_images', exist_ok=True)
os.makedirs('style_images_aligned', exist_ok=True)
os.makedirs('models', exist_ok=True)

device = 'cuda'

filepath = './testsample.jpg'
name = strip_path_extension(filepath)+'.pt'

# aligns and crops face
aligned_face = align_face(filepath)

# my_w = restyle_projection(aligned_face, name, device, n_iters=1).unsqueeze(0)
my_w = e4e_projection(aligned_face, name, device).unsqueeze(0)

latent_dim = 512

# Load original generator
original_generator = Generator(1024, latent_dim, 8, 2).to(device)
ckpt = torch.load('models/stylegan2-ffhq-config-f.pt', map_location=lambda storage, loc: storage)
original_generator.load_state_dict(ckpt["g_ema"], strict=False)
mean_latent = original_generator.mean_latent(10000)

# to be finetuned generator
generator = deepcopy(original_generator)

transform = transforms.Compose(
    [
        transforms.Resize((1024, 1024)),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ]
)

#@markdown Upload your own style images into the style_images folder and type it into the field in the following format without the directory name. Upload multiple style images to do multi-shot image translation
#names = ['arcane_caitlyn.jpeg', 'arcane_jinx.jpeg', 'arcane_jayce.jpeg', 'arcane_viktor.jpeg'] #@param {type:"raw"}

targets = []
latents = []

#for name in names:
#    style_path = os.path.join('style_images', name)
if len(sys.argv) > 1:
    style_paths = glob.glob(os.path.join(sys.argv[1], "*.*"))
else:
    style_paths = glob.glob("./style_images/*.*")

for style_path in style_paths:
    #assert os.path.exists(style_path), f"{style_path} does not exist!"

    #name = strip_path_extension(name)
    name = os.path.splitext(os.path.basename(style_path))[0]

    # crop and align the face
    #style_aligned_path = os.path.join('style_images_aligned', f'{name}.png')
    #style_aligned_path = style_path.replace("/style_images/", "/style_images_aligned/")
    style_aligned_path = os.path.join("./style_images_aligned/", os.path.basename(style_path))
    #if not os.path.exists(style_aligned_path):
    if False:
        style_aligned = align_face(style_path)
        style_aligned.save(style_aligned_path)
    else:
        style_aligned = Image.open(style_aligned_path).convert('RGB')

    # GAN invert
    style_code_path = os.path.join('inversion_codes', f'{name}.pt')
    if not os.path.exists(style_code_path):
        latent = e4e_projection(style_aligned, style_code_path, device)
    else:
        latent = torch.load(style_code_path)['latent']

    targets.append(transform(style_aligned).to(device))
    latents.append(latent.to(device))

targets = torch.stack(targets, 0)
latents = torch.stack(latents, 0)

target_im = utils.make_grid(targets, normalize=True, range=(-1, 1))

#@title Finetune StyleGAN
#@markdown alpha controls the strength of the style
alpha =  1.0 #@param {type:"slider", min:0, max:1, step:0.1}
alpha = 1-alpha

#@markdown Tries to preserve color of original image by limiting family of allowable transformations. Set to false if you want to transfer color from reference image. This also leads to heavier stylization
preserve_color = True #@param{type:"boolean"}
#preserve_color = False
#@markdown Number of finetuning steps. Different style reference may require different iterations. Try 200~500 iterations.
num_iter = 200 #@param {type:"number"}
#@markdown Log training on wandb and interval for image logging
use_wandb = True #@param {type:"boolean"}
save_model = False
log_interval = 10 #@param {type:"number"}

if use_wandb:
    os.environ["WANDB_MODE"]="dryrun"
    wandb.init(project="JoJoGAN")
    config = wandb.config
    config.num_iter = num_iter
    config.preserve_color = preserve_color
    wandb.log(
    {"Style reference": [wandb.Image(transforms.ToPILImage()(target_im))]},
    step=0)

lpips_fn = lpips.LPIPS(net='vgg').to(device)

# reset generator
del generator
generator = deepcopy(original_generator)

g_optim = optim.Adam(generator.parameters(), lr=2e-3, betas=(0, 0.99))
#g_optim = optim.Adam(generator.parameters(), lr=3e-4, betas=(0, 0.99))

# Which layers to swap for generating a family of plausible real images -> fake image
if preserve_color:
    id_swap = [7,9,11,15,16,17]
else:
    id_swap = list(range(7, generator.n_latent))

total_batch = latents.size(0)
batch_size = 8 if total_batch >=8 else 1

for idx in tqdm(range(num_iter)):
    cur_batch = idx % (total_batch-batch_size+1)

    if preserve_color:
        random_alpha = 0
    else:
        random_alpha = np.random.uniform(alpha, 1)
    #mean_w = generator.get_latent(torch.randn([latents.size(0), latent_dim]).to(device)).unsqueeze(1).repeat(1, generator.n_latent, 1)
    mean_w = generator.get_latent(torch.randn([batch_size, latent_dim]).to(device)).unsqueeze(1).repeat(1, generator.n_latent, 1)
    #in_latent = latents.clone()
    in_latent = latents[cur_batch:cur_batch+batch_size,:,:].clone()
    #in_latent[:, id_swap] = alpha*latents[:, id_swap] + (1-alpha)*mean_w[:, id_swap]
    in_latent[:, id_swap] = alpha*in_latent[:, id_swap] + (1-alpha)*mean_w[:, id_swap]

    img = generator(in_latent, input_is_latent=True)
    #loss = lpips_fn(F.interpolate(img, size=(256,256), mode='area'), F.interpolate(targets, size=(256,256), mode='area')).mean()
    loss = lpips_fn(F.interpolate(img, size=(256,256), mode='area'), F.interpolate(targets[cur_batch:cur_batch+batch_size,:,:,:], size=(256,256), mode='area')).mean()
    
    if use_wandb:
        wandb.log({"loss": loss}, step=idx)
        if idx % log_interval == 0:
            generator.eval()
            my_sample = generator(my_w, input_is_latent=True)
            generator.train()
            my_sample = transforms.ToPILImage()(utils.make_grid(my_sample, normalize=True, range=(-1, 1)))
            wandb.log(
            {"Current stylization": [wandb.Image(my_sample)]},
            step=idx)

            if save_model:
                torch.save(generator, "./ckpts/G_{}.pth".format(str(idx).zfill(5)))

    g_optim.zero_grad()
    loss.backward()
    g_optim.step()

    

  • 1
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值