1:同比例缩放
有时候直接进行resize会有形变,所以想到这样的方式,同比例缩放,然后补0。torchvision中是用的PIL。在推理时需要用opencv。
def ZeroPaddingResizeCV(img, size=(224, 224), interpolation=None):
isize = img.shape
ih, iw = isize[0], isize[1]
h, w = size[0], size[1]
scale = min(w / iw, h / ih)
new_w = int(iw * scale + 0.5)
new_h = int(ih * scale + 0.5)
img = cv2.resize(img, (new_w, new_h), interpolation)
new_img = np.zeros((h, w, 3), np.uint8)
new_img[(h-new_h)//2:(h+new_h)//2, (w-new_w)//2:(w+new_w)//2] = img
return new_img
new_image=ZeroPaddingResizeCV(img,(96,96))
2。log
import logging
def getLogger(log_path):
logger = logging.getLogger()
logger.setLevel(logging.INFO) # Log等级总开关
formatter = logging.Formatter(fmt="[%(asctime)s|%(filename)s|%(levelname)s] %(message)s",
datefmt="%a %b %d %H:%M:%S %Y")
# StreamHandler
sHandler = logging.StreamHandler()
sHandler.setFormatter(formatter)
logger.addHandler(sHandler)
fHandler = logging.FileHandler(log_path, mode='w')
fHandler.setLevel(logging.DEBUG) # 输出到file的log等级的开关
fHandler.setFormatter(formatter) # 定义handler的输出格式
logger.addHandler(fHandler) # 将logger添加到handler里面
return logger
3,计算模型的flops与params
from models.AudioSync import AudioSync_1M_emotion,AudioSync_3M_emotion,AudioSync_21M_emotion,AudioSync_39M_emotion
import torch
from thop import profile
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model=AudioSync_3M_emotion(26).to(device) #26,11
input1 = torch.randn(1,1,80,16).cuda()
flops, params = profile(model, inputs=(input1, ))
# print('FLOPs = ' + str(flops/1000**3) + 'G')
# print('Params = ' + str(params/1000**2) + 'M')
print('FLOPs = ' + str(flops))
print('Params = ' + str(params))
图片到视频,无损合成
import subprocess
image_folder = '/data/dengjia/projects/digithuman/tmp/data/evalvideo/train/chuyao_white/syncnet_crop_face/%6d.jpg'
video_path = '/data/dengjia/projects/digithuman/chuyao_white.mp4'
command = ("ffmpeg -f image2 -framerate 25 -i %s -b:v 5626k %s" % (image_folder,video_path))
output = subprocess.call(command, shell=True, stdout=None)
合成的视频,再逐帧打开,视频帧的分辨率与合成之前的图片一致。
在mac上可以打开使用,但是在win下格式编码不支持,需要在合成生成的视频.mp4改为.avi即可
import subprocess
image_folder = '/data/dengjia/projects/digithuman/tmp/data/evalvideo/train/chuyao_white/syncnet_crop_face/%6d.jpg'
video_path = '/data/dengjia/projects/digithuman/chuyao_white.avi'
command = ("ffmpeg -f image2 -framerate 25 -i %s -b:v 5626k %s" % (image_folder,video_path))
output = subprocess.call(command, shell=True, stdout=None)
踩坑记录:
1:数据集打包
loader = data.DataLoader(
dataset,
num_workers=8,
batch_size=args.batch_size // world_size,
sampler=data.distributed.DistributedSampler(dataset, num_replicas=world_size, rank=rank, shuffle=True),
pin_memory=True,
drop_last=True,
)
drop_last=True 表示最后数据不足一个batch时,丢弃
如果默认drop_last=False,有可能最后一个batch不是你设置的那个数,后面训练有可能存在数据维度对不齐的问题,eg:RuntimeError: shape ‘[4, -1, 1, 512, 4, 4]’ is invalid for input of size 73728等
2:onnx->tflite
使用onnx_tf库,转换出来的模型,会存在很多transpose层。在mac上推理一点问题没有,但是接入到移动端,速度就很慢。(巨坑!!!!!!!!!!!!!!!!!!!!!!!!)
onnx_model = onnx.load(ONNX_PATH) # load onnx model
tf_rep = prepare(onnx_model) # creating TensorflowRep object
tf_rep.export_graph(TF_PATH)
converter = tf.lite.TFLiteConverter.from_saved_model(TF_PATH)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
#converter.target_spec.supported_types = [tf.float16] #打开,转fp16,打开之后,模型从240KB变成440KB
tf_lite_model = converter.convert()
with open(TFLITE_PATH, 'wb') as f:
f.write(tf_lite_model)
官方链接:
https://www.tensorflow.org/lite/performance/post_training_quantization?hl=zh-cn
更改:使用TensorFlow-Lite 转换
参考转换工程:
[https://gitcode.com/MPolaris/onnx2tflite/overview?utm_source=csdn_github_accelerator&isLogin=1]
效果:
3:CoreML模型转换问题
在转化coreml过程中,如果代码中存在pytorch双线差值上采样,在ios13上会报错,在ios14上可以顺利执行。
out=F.interpolate(conv1, scale_factor=2, mode="bilinear", align_corners=True)
如果:
coreml_model = coremltools.convert(model=net, inputs=[sample], minimum_deployment_target=coremltools.target.iOS13)
会报以下错:
如果:
coreml_model = coremltools.convert(model=net, inputs=[sample], minimum_deployment_target=coremltools.target.iOS14)
则正常,但是这样转换出来的CoreML模型,只支持iOS14+版本,iOS13版本不支持。
为了支持更低版本,以下为解决方案:
第一步:修改库源码。
具体操作如下:
修改coremltools库中,upsample_bilinear2d函数源码:
upsample_bilinear = mb.upsample_bilinear(
x=_input,
scale_factor_height=scales_h,
scale_factor_width=scales_w,
align_corners=align_corners,
name=node.name,
)
改为:
upsample_bilinear = mb.resize_bilinear(
x=_input,
target_size_height=output_size.val[0],
target_size_width=output_size.val[1],
name=node.name,
sampling_mode='STRICT_ALIGN_CORNERS',
)
第二步:修改上采样代码,将动态大小,改为固定大小。
具体操作如下:
最后,CoreML模型转换的测试代码:
import torch
from torch.nn import functional as F
import coremltools
from torch import nn
def double_conv(in_channels, out_channels):
return nn.Sequential(
nn.Conv2d(in_channels, out_channels, 3, padding=(1,1)),
nn.ReLU(inplace=True),
nn.Conv2d(out_channels, out_channels, 3, padding=(1,1)),
nn.ReLU(inplace=True)
)
class MyModule(torch.nn.Module):
def __init__(self):
super(MyModule, self).__init__()
self.first_layer = double_conv(3, 6)
def forward(self, x):
conv1 = self.first_layer(x)
#out=F.interpolate(conv1, scale_factor=2, mode="bilinear", align_corners=True) #torch.Size([1, 6, 510, 510])
out=F.interpolate(conv1, size=(510,510), mode="bilinear", align_corners=True)
return out
if __name__ == "__main__":
model_out='./xxxxx222.mlmodel'
net = MyModule()
sample = torch.randn(1, 3,255,255)
net = torch.jit.trace(net, sample, strict=False)
sample = coremltools.TensorType(name="image", shape=(1, 3,255,255))
coreml_model = coremltools.convert(model=net, inputs=[sample], minimum_deployment_target=coremltools.target.iOS13)
# open_quantization=False #quantization only supported on macOS
# if open_quantization:
# bits, mode =16, 'linear'
# coreml_model = coremltools.models.neural_network.quantization_utils.quantize_weights(coreml_model, bits, mode)
coreml_model.save(model_out)
执行结果:
去掉gradio-flag
gradio.Blocks()或者gradio.Interface() ,加一行:footer{display:none !important}
1:使用内联样式直接在 HTML 元素上应用样式:
with gr.Blocks(css="footer{display:none !important}") as demo:
gr.Markdown("XXXXXXXXXXXXXX")
demo.launch(server_name="0.0.0.0",server_port=7080)
2:可以通过添加自定义 CSS 来隐藏特定的元素
import gradio as gr
def greet(name):
return "Hello " + name + "!"
demo = gr.Interface(fn=greet, inputs="text", outputs="text", css="footer{display:none !important}")
demo.launch()