1:同比例缩放
有时候直接进行resize会有形变,所以想到这样的方式,同比例缩放,然后补0。torchvision中是用的PIL。在推理时需要用opencv。
def ZeroPaddingResizeCV(img, size=(224, 224), interpolation=None):
isize = img.shape
ih, iw = isize[0], isize[1]
h, w = size[0], size[1]
scale = min(w / iw, h / ih)
new_w = int(iw * scale + 0.5)
new_h = int(ih * scale + 0.5)
img = cv2.resize(img, (new_w, new_h), interpolation)
new_img = np.zeros((h, w, 3), np.uint8)
new_img[(h-new_h)//2:(h+new_h)//2, (w-new_w)//2:(w+new_w)//2] = img
return new_img
new_image=ZeroPaddingResizeCV(img,(96,96))
2。log
import logging
def getLogger(log_path):
logger = logging.getLogger()
logger.setLevel(logging.INFO) # Log等级总开关
formatter = logging.Formatter(fmt="[%(asctime)s|%(filename)s|%(levelname)s] %(message)s",
datefmt="%a %b %d %H:%M:%S %Y")
# StreamHandler
sHandler = logging.StreamHandler()
sHandler.setFormatter(formatter)
logger.addHandler(sHandler)
fHandler = logging.FileHandler(log_path, mode='w')
fHandler.setLevel(logging.DEBUG) # 输出到file的log等级的开关
fHandler.setFormatter(formatter) # 定义handler的输出格式
logger.addHandler(fHandler) # 将logger添加到handler里面
return logger
3,计算模型的flops与params
from models.AudioSync import AudioSync_1M_emotion,AudioSync_3M_emotion,AudioSync_21M_emotion,AudioSync_39M_emotion
import torch
from thop import profile
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model=AudioSync_3M_emotion(26).to(device) #26,11
input1 = torch.randn(1,1,80,16).cuda()
flops, params = profile(model, inputs=(input1, ))
# print('FLOPs = ' + str(flops/1000**3) + 'G')
# print('Params = ' + str(params/1000**2) + 'M')
print('FLOPs = ' + str(flops))
print('Params = ' + str(params))
图片到视频,无损合成
import subprocess
image_folder = '/data/dengjia/projects/digithuman/tmp/data/evalvideo/train/chuyao_white/syncnet_crop_face/%6d.jpg'
video_path = '/data/dengjia/projects/digithuman/chuyao_white.mp4'
command = ("ffmpeg -f image2 -framerate 25 -i %s -b:v 5626k %s" % (image_folder,video_path))
output = subprocess.call(command, shell=True, stdout=None)
合成的视频,再逐帧打开,视频帧的分辨率与合成之前的图片一致。
在mac上可以打开使用,但是在win下格式编码不支持,需要在合成生成的视频.mp4改为.avi即可
import subprocess
image_folder = '/data/dengjia/projects/digithuman/tmp/data/evalvideo/train/chuyao_white/syncnet_crop_face/%6d.jpg'
video_path = '/data/dengjia/projects/digithuman/chuyao_white.avi'
command = ("ffmpeg -f image2 -framerate 25 -i %s -b:v 5626k %s" % (image_folder,video_path))
output = subprocess.call(command, shell=True, stdout=None)
踩坑记录:
1:数据集打包
loader = data.DataLoader(
dataset,
num_workers=8,
batch_size=args.batch_size // world_size,
sampler=data.distributed.DistributedSampler(dataset, num_replicas=world_size, rank=rank, shuffle=True),
pin_memory=True,
drop_last=True,
)
drop_last=True 表示最后数据不足一个batch时,丢弃
如果默认drop_last=False,有可能最后一个batch不是你设置的那个数,后面训练有可能存在数据维度对不齐的问题,eg:RuntimeError: shape ‘[4, -1, 1, 512, 4, 4]’ is invalid for input of size 73728等