这是MultiSpecific.ipynb
## make sure you are using a runtime with GPU
## you can check at Runtime/Change runtime type in the top bar.
!nvidia-smi
!git clone https://github.com/neuralchen/SimSwap
!cd SimSwap && git pull
!pip install insightface==0.2.1 onnxruntime moviepy
!pip install googledrivedownloader
!pip install imageio==2.4.1
import os
os.chdir("SimSwap")
!ls
## You can upload filed manually
# from google.colab import drive
# drive.mount('/content/gdrive')
from google_drive_downloader import GoogleDriveDownloader
### it seems that google drive link may not be permenant, you can find this ID from our open url.
# GoogleDriveDownloader.download_file_from_google_drive(file_id='1TLNdIufzwesDbyr_nVTR7Zrx9oRHLM_N',
# dest_path='./arcface_model/arcface_checkpoint.tar')
# GoogleDriveDownloader.download_file_from_google_drive(file_id='1PXkRiBUYbu1xWpQyDEJvGKeqqUFthJcI',
# dest_path='./checkpoints.zip')
!wget -P ./arcface_model https://github.com/neuralchen/SimSwap/releases/download/1.0/arcface_checkpoint.tar
!wget https://github.com/neuralchen/SimSwap/releases/download/1.0/checkpoints.zip
!unzip ./checkpoints.zip -d ./checkpoints
!wget -P ./parsing_model/checkpoint https://github.com/neuralchen/SimSwap/releases/download/1.0/79999_iter.pth
!wget --no-check-certificate "https://sh23tw.dm.files.1drv.com/y4mmGiIkNVigkSwOKDcV3nwMJulRGhbtHdkheehR5TArc52UjudUYNXAEvKCii2O5LAmzGCGK6IfleocxuDeoKxDZkNzDRSt4ZUlEt8GlSOpCXAFEkBwaZimtWGDRbpIGpb_pz9Nq5jATBQpezBS6G_UtspWTkgrXHHxhviV2nWy8APPx134zOZrUIbkSF6xnsqzs3uZ_SEX_m9Rey0ykpx9w" -O antelope.zip
!unzip ./antelope.zip -d ./insightface_func/models/
import cv2
import torch
import fractions
import numpy as np
from PIL import Image
import torch.nn.functional as F
from torchvision import transforms
from models.models import create_model
from options.test_options import TestOptions
from insightface_func.face_detect_crop_multi import Face_detect_crop
from util.videoswap_multispecific import video_swap
import os
import glob
def lcm(a, b): return abs(a * b) / fractions.gcd(a, b) if a and b else 0
transformer = transforms.Compose([
transforms.ToTensor(),
#transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
transformer_Arcface = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
!ls ./checkpoints
opt = TestOptions()
opt.initialize()
opt.parser.add_argument('-f') ## dummy arg to avoid bug
opt = opt.parse()
opt.multisepcific_dir = './demo_file/multispecific' ## or replace it with folder from your own google drive
## and remember to follow the dir structure in usage.md
opt.video_path = './demo_file/multi_people_1080p.mp4' ## or replace it with video from your own google drive
opt.output_path = './output/multi_test_multispecific.mp4'
opt.temp_path = './tmp'
opt.Arc_path = './arcface_model/arcface_checkpoint.tar'
opt.name = 'people'
opt.isTrain = False
opt.use_mask = True ## new feature up-to-date
crop_size = opt.crop_size
下面是models.py
import math
import torch
import torch.nn.functional as F
from torch import nn
from torch.nn import Parameter
from .config import device, num_classes
def create_model(opt):
if opt.model == 'pix2pixHD':
#from .pix2pixHD_model import Pix2PixHDModel, InferenceModel
from .fs_model import fsModel
model = fsModel()
else:
from .ui_model import UIModel
model = UIModel()
model.initialize(opt)
if opt.verbose:
print("model [%s] was created" % (model.name()))
if opt.isTrain and len(opt.gpu_ids) and not opt.fp16:
model = torch.nn.DataParallel(model, device_ids=opt.gpu_ids)
return model
class SEBlock(nn.Module):
def __init__(self, channel, reduction=16):
super(SEBlock, self).__init__()
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.fc = nn.Sequential(
nn.Linear(channel, channel // reduction),
nn.PReLU(),
nn.Linear(channel // reduction, channel),
nn.Sigmoid()
)
def forward(self, x):
b, c, _, _ = x.size()
y = self.avg_pool(x).view(b, c)
y = self.fc(y).view(b, c, 1, 1)
return x * y
class IRBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None, use_se=True):
super(IRBlock, self).__init__()
self.bn0 = nn.BatchNorm2d(inplanes)
self.conv1 = conv3x3(inplanes, inplanes)
self.bn1 = nn.BatchNorm2d(inplanes)
self.prelu = nn.PReLU()
self.conv2 = conv3x3(inplanes, planes, stride)
self.bn2 = nn.BatchNorm2d(planes)
self.downsample = downsample
self.stride = stride
self.use_se = use_se
if self.use_se:
self.se = SEBlock(planes)
def forward(self, x):
residual = x
out = self.bn0(x)
out = self.conv1(out)
out = self.bn1(out)
out = self.prelu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.use_se:
out = self.se(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.prelu(out)
return out
class ResNet(nn.Module):
def __init__(self, block, layers, use_se=True):
self.inplanes = 64
self.use_se = use_se
super(ResNet, self).__init__()
self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.prelu = nn.PReLU()
self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
self.bn2 = nn.BatchNorm2d(512)
self.dropout = nn.Dropout()
self.fc = nn.Linear(512 * 7 * 7, 512)
self.bn3 = nn.BatchNorm1d(512)
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.xavier_normal_(m.weight)
elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.xavier_normal_(m.weight)
nn.init.constant_(m.bias, 0)
def _make_layer(self, block, planes, blocks, stride=1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.inplanes, planes * block.expansion,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(planes * block.expansion),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample, use_se=self.use_se))
self.inplanes = planes
for i in range(1, blocks):
layers.append(block(self.inplanes, planes, use_se=self.use_se))
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.prelu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.bn2(x)
x = self.dropout(x)
x = x.view(x.size(0), -1)
x = self.fc(x)
x = self.bn3(x)
return x
class ArcMarginModel(nn.Module):
def __init__(self, args):
super(ArcMarginModel, self).__init__()
self.weight = Parameter(torch.FloatTensor(num_classes, args.emb_size))
nn.init.xavier_uniform_(self.weight)
self.easy_margin = args.easy_margin
self.m = args.margin_m
self.s = args.margin_s
self.cos_m = math.cos(self.m)
self.sin_m = math.sin(self.m)
self.th = math.cos(math.pi - self.m)
self.mm = math.sin(math.pi - self.m) * self.m
def forward(self, input, label):
x = F.normalize(input)
W = F.normalize(self.weight)
cosine = F.linear(x, W)
sine = torch.sqrt(1.0 - torch.pow(cosine, 2))
phi = cosine * self.cos_m - sine * self.sin_m # cos(theta + m)
if self.easy_margin:
phi = torch.where(cosine > 0, phi, cosine)
else:
phi = torch.where(cosine > self.th, phi, cosine - self.mm)
one_hot = torch.zeros(cosine.size(), device=device)
one_hot.scatter_(1, label.view(-1, 1).long(), 1)
output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
output *= self.s
return output
下面是test_options.py
'''
Author: Naiyuan liu
Github: https://github.com/NNNNAI
Date: 2021-11-23 17:03:58
LastEditors: Naiyuan liu
LastEditTime: 2021-11-23 17:08:08
Description:
'''
from .base_options import BaseOptions
class TestOptions(BaseOptions):
def initialize(self):
BaseOptions.initialize(self)
self.parser.add_argument('--ntest', type=int, default=float("inf"), help='# of test examples.')
self.parser.add_argument('--results_dir', type=str, default='./results/', help='saves results here.')
self.parser.add_argument('--aspect_ratio', type=float, default=1.0, help='aspect ratio of result images')
self.parser.add_argument('--phase', type=str, default='test', help='train, val, test, etc')
self.parser.add_argument('--which_epoch', type=str, default='latest', help='which epoch to load? set to latest to use latest cached model')
self.parser.add_argument('--how_many', type=int, default=50, help='how many test images to run')
self.parser.add_argument('--cluster_path', type=str, default='features_clustered_010.npy', help='the path for clustered results of encoded features')
self.parser.add_argument('--use_encoded_image', action='store_true', help='if specified, encode the real image to get the feature map')
self.parser.add_argument("--export_onnx", type=str, help="export ONNX model to a given file")
self.parser.add_argument("--engine", type=str, help="run serialized TRT engine")
self.parser.add_argument("--onnx", type=str, help="run ONNX model via TRT")
self.parser.add_argument("--Arc_path", type=str, default='arcface_model/arcface_checkpoint.tar', help="run ONNX model via TRT")
self.parser.add_argument("--pic_a_path", type=str, default='G:/swap_data/ID/elon-musk-hero-image.jpeg', help="Person who provides identity information")
self.parser.add_argument("--pic_b_path", type=str, default='G:/swap_data/ID/bengio.jpg', help="Person who provides information other than their identity")
self.parser.add_argument("--pic_specific_path", type=str, default='./crop_224/zrf.jpg', help="The specific person to be swapped")
self.parser.add_argument("--multisepcific_dir", type=str, default='./demo_file/multispecific', help="Dir for multi specific")
self.parser.add_argument("--video_path", type=str, default='G:/swap_data/video/HSB_Demo_Trim.mp4', help="path for the video to swap")
self.parser.add_argument("--temp_path", type=str, default='./temp_results', help="path to save temporarily images")
self.parser.add_argument("--output_path", type=str, default='./output/', help="results path")
self.parser.add_argument('--id_thres', type=float, default=0.03, help='how many test images to run')
self.parser.add_argument('--no_simswaplogo', action='store_true', help='Remove the watermark')
self.parser.add_argument('--use_mask', action='store_true', help='Use mask for better result')
self.parser.add_argument('--crop_size', type=int, default=224, help='Crop of size of input image')
self.isTrain = False
下面是face_detect_crop_multi.py
'''
Author: Naiyuan liu
Github: https://github.com/NNNNAI
Date: 2021-11-23 17:03:58
LastEditors: Naiyuan liu
LastEditTime: 2021-11-24 16:45:41
Description:
'''
from __future__ import division
import collections
import numpy as np
import glob
import os
import os.path as osp
import cv2
from insightface.model_zoo import model_zoo
from insightface_func.utils import face_align_ffhqandnewarc as face_align
__all__ = ['Face_detect_crop', 'Face']
Face = collections.namedtuple('Face', [
'bbox', 'kps', 'det_score', 'embedding', 'gender', 'age',
'embedding_norm', 'normed_embedding',
'landmark'
])
Face.__new__.__defaults__ = (None, ) * len(Face._fields)
class Face_detect_crop:
def __init__(self, name, root='~/.insightface_func/models'):
self.models = {}
root = os.path.expanduser(root)
onnx_files = glob.glob(osp.join(root, name, '*.onnx'))
onnx_files = sorted(onnx_files)
for onnx_file in onnx_files:
if onnx_file.find('_selfgen_')>0:
#print('ignore:', onnx_file)
continue
model = model_zoo.get_model(onnx_file)
if model.taskname not in self.models:
print('find model:', onnx_file, model.taskname)
self.models[model.taskname] = model
else:
print('duplicated model task type, ignore:', onnx_file, model.taskname)
del model
assert 'detection' in self.models
self.det_model = self.models['detection']
def prepare(self, ctx_id, det_thresh=0.5, det_size=(640, 640), mode ='None'):
self.det_thresh = det_thresh
self.mode = mode
assert det_size is not None
print('set det-size:', det_size)
self.det_size = det_size
for taskname, model in self.models.items():
if taskname=='detection':
model.prepare(ctx_id, input_size=det_size)
else:
model.prepare(ctx_id)
def get(self, img, crop_size, max_num=0):
bboxes, kpss = self.det_model.detect(img,
threshold=self.det_thresh,
max_num=max_num,
metric='default')
if bboxes.shape[0] == 0:
return None
ret = []
# for i in range(bboxes.shape[0]):
# bbox = bboxes[i, 0:4]
# det_score = bboxes[i, 4]
# kps = None
# if kpss is not None:
# kps = kpss[i]
# M, _ = face_align.estimate_norm(kps, crop_size, mode ='None')
# align_img = cv2.warpAffine(img, M, (crop_size, crop_size), borderValue=0.0)
align_img_list = []
M_list = []
for i in range(bboxes.shape[0]):
kps = None
if kpss is not None:
kps = kpss[i]
M, _ = face_align.estimate_norm(kps, crop_size, mode = self.mode)
align_img = cv2.warpAffine(img, M, (crop_size, crop_size), borderValue=0.0)
align_img_list.append(align_img)
M_list.append(M)
# det_score = bboxes[..., 4]
# best_index = np.argmax(det_score)
# kps = None
# if kpss is not None:
# kps = kpss[best_index]
# M, _ = face_align.estimate_norm(kps, crop_size, mode ='None')
# align_img = cv2.warpAffine(img, M, (crop_size, crop_size), borderValue=0.0)
return align_img_list, M_list
下面是
import os
import cv2
import glob
import torch
import shutil
import numpy as np
from tqdm import tqdm
from util.reverse2original import reverse2wholeimage
import moviepy.editor as mp
from moviepy.editor import AudioFileClip, VideoFileClip
from moviepy.video.io.ImageSequenceClip import ImageSequenceClip
import time
from util.add_watermark import watermark_image
from util.norm import SpecificNorm
import torch.nn.functional as F
from parsing_model.model import BiSeNet
def _totensor(array):
tensor = torch.from_numpy(array)
img = tensor.transpose(0, 1).transpose(0, 2).contiguous()
return img.float().div(255)
def video_swap(video_path, target_id_norm_list,source_specific_id_nonorm_list,id_thres, swap_model, detect_model, save_path, temp_results_dir='./temp_results', crop_size=224, no_simswaplogo = False,use_mask =False):
video_forcheck = VideoFileClip(video_path)
if video_forcheck.audio is None:
no_audio = True
else:
no_audio = False
del video_forcheck
if not no_audio:
video_audio_clip = AudioFileClip(video_path)
video = cv2.VideoCapture(video_path)
logoclass = watermark_image('./simswaplogo/simswaplogo.png')
ret = True
frame_index = 0
frame_count = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
# video_WIDTH = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
# video_HEIGHT = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = video.get(cv2.CAP_PROP_FPS)
if os.path.exists(temp_results_dir):
shutil.rmtree(temp_results_dir)
spNorm =SpecificNorm()
mse = torch.nn.MSELoss().cuda()
if use_mask:
n_classes = 19
net = BiSeNet(n_classes=n_classes)
net.cuda()
save_pth = os.path.join('./parsing_model/checkpoint', '79999_iter.pth')
net.load_state_dict(torch.load(save_pth))
net.eval()
else:
net =None
# while ret:
for frame_index in tqdm(range(frame_count)):
ret, frame = video.read()
if ret:
detect_results = detect_model.get(frame,crop_size)
if detect_results is not None:
# print(frame_index)
if not os.path.exists(temp_results_dir):
os.mkdir(temp_results_dir)
frame_align_crop_list = detect_results[0]
frame_mat_list = detect_results[1]
id_compare_values = []
frame_align_crop_tenor_list = []
for frame_align_crop in frame_align_crop_list:
# BGR TO RGB
# frame_align_crop_RGB = frame_align_crop[...,::-1]
frame_align_crop_tenor = _totensor(cv2.cvtColor(frame_align_crop,cv2.COLOR_BGR2RGB))[None,...].cuda()
frame_align_crop_tenor_arcnorm = spNorm(frame_align_crop_tenor)
frame_align_crop_tenor_arcnorm_downsample = F.interpolate(frame_align_crop_tenor_arcnorm, size=(112,112))
frame_align_crop_crop_id_nonorm = swap_model.netArc(frame_align_crop_tenor_arcnorm_downsample)
id_compare_values.append([])
for source_specific_id_nonorm_tmp in source_specific_id_nonorm_list:
id_compare_values[-1].append(mse(frame_align_crop_crop_id_nonorm,source_specific_id_nonorm_tmp).detach().cpu().numpy())
frame_align_crop_tenor_list.append(frame_align_crop_tenor)
id_compare_values_array = np.array(id_compare_values).transpose(1,0)
min_indexs = np.argmin(id_compare_values_array,axis=0)
min_value = np.min(id_compare_values_array,axis=0)
swap_result_list = []
swap_result_matrix_list = []
swap_result_ori_pic_list = []
for tmp_index, min_index in enumerate(min_indexs):
if min_value[tmp_index] < id_thres:
swap_result = swap_model(None, frame_align_crop_tenor_list[tmp_index], target_id_norm_list[min_index], None, True)[0]
swap_result_list.append(swap_result)
swap_result_matrix_list.append(frame_mat_list[tmp_index])
swap_result_ori_pic_list.append(frame_align_crop_tenor_list[tmp_index])
else:
pass
if len(swap_result_list) !=0:
reverse2wholeimage(swap_result_ori_pic_list,swap_result_list, swap_result_matrix_list, crop_size, frame, logoclass,\
os.path.join(temp_results_dir, 'frame_{:0>7d}.jpg'.format(frame_index)),no_simswaplogo,pasring_model =net,use_mask=use_mask, norm = spNorm)
else:
if not os.path.exists(temp_results_dir):
os.mkdir(temp_results_dir)
frame = frame.astype(np.uint8)
if not no_simswaplogo:
frame = logoclass.apply_frames(frame)
cv2.imwrite(os.path.join(temp_results_dir, 'frame_{:0>7d}.jpg'.format(frame_index)), frame)
else:
if not os.path.exists(temp_results_dir):
os.mkdir(temp_results_dir)
frame = frame.astype(np.uint8)
if not no_simswaplogo:
frame = logoclass.apply_frames(frame)
cv2.imwrite(os.path.join(temp_results_dir, 'frame_{:0>7d}.jpg'.format(frame_index)), frame)
else:
break
video.release()
# image_filename_list = []
path = os.path.join(temp_results_dir,'*.jpg')
image_filenames = sorted(glob.glob(path))
clips = ImageSequenceClip(image_filenames,fps = fps)
if not no_audio:
clips = clips.set_audio(video_audio_clip)
clips.write_videofile(save_path,audio_codec='aac')
下面是我平常最后生成视频用到的主函数
import cv2
import torch
import fractions
import numpy as np
from PIL import Image
import torch.nn.functional as F
from torchvision import transforms
from models.models import create_model
from options.test_options import TestOptions
from insightface_func.face_detect_crop_multi import Face_detect_crop
from util.videoswap import video_swap
from util.add_watermark import watermark_image
transformer = transforms.Compose([
transforms.ToTensor(),
#transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
transformer_Arcface = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
detransformer = transforms.Compose([
transforms.Normalize([0, 0, 0], [1/0.229, 1/0.224, 1/0.225]),
transforms.Normalize([-0.485, -0.456, -0.406], [1, 1, 1])
])
opt = TestOptions()
opt.initialize()
opt.parser.add_argument('-f') ## dummy arg to avoid bug
opt = opt.parse()
opt.pic_a_path = './demo_file/Iron_man.jpg' ## or replace it with image from your own google drive
opt.video_path = './demo_file/multi_people_1080p.mp4' ## or replace it with video from your own google drive
opt.output_path = './output/demo.mp4'
opt.temp_path = './tmp'
opt.Arc_path = './arcface_model/arcface_checkpoint.tar'
opt.isTrain = False
opt.use_mask = True ## new feature up-to-date
crop_size = opt.crop_size
torch.nn.Module.dump_patches = True
model = create_model(opt)
model.eval()
app = Face_detect_crop(name='antelope', root='./insightface_func/models')
app.prepare(ctx_id= 0, det_thresh=0.6, det_size=(640,640))
with torch.no_grad():
pic_a = opt.pic_a_path
# img_a = Image.open(pic_a).convert('RGB')
img_a_whole = cv2.imread(pic_a)
img_a_align_crop, _ = app.get(img_a_whole,crop_size)
img_a_align_crop_pil = Image.fromarray(cv2.cvtColor(img_a_align_crop[0],cv2.COLOR_BGR2RGB))
img_a = transformer_Arcface(img_a_align_crop_pil)
img_id = img_a.view(-1, img_a.shape[0], img_a.shape[1], img_a.shape[2])
# convert numpy to tensor
img_id = img_id.cuda()
#create latent id
img_id_downsample = F.interpolate(img_id, size=(112,112))
latend_id = model.netArc(img_id_downsample)
latend_id = latend_id.detach().to('cpu')
latend_id = latend_id/np.linalg.norm(latend_id,axis=1,keepdims=True)
latend_id = latend_id.to('cuda')
video_swap(opt.video_path, latend_id, model, app, opt.output_path, temp_results_dir=opt.temp_path, use_mask=opt.use_mask)