❀一、开题报告
标题:视频人像背景替换器
视频人像背景替换器是一种基于Paddle和OpenCV的应用程序,旨在将人像从一个视频中提取出来,并将其放置在另一个视频的背景中。本文将介绍如何使用Paddle和OpenCV实现这一功能,并分享在开发过程中遇到的几个问题及其解决方法。
实现步骤:
1. 视频人像提取:
首先,我们使用Paddle提供的深度学习模型对输入的人像视频进行分割,将人像从背景中提取出来。Paddle提供了许多先进的语义分割模型,如PaddleSeg和PaddleX,可以帮助我们实现高质量的人像分割效果。通过将人像分割结果与原始视频进行融合,我们可以得到一个只包含人像的视频。
2. 视频背景替换:
接下来,我们使用OpenCV库读取第二个视频的每一帧,并将人像视频的每一帧与背景视频的对应帧进行拼接。在拼接过程中,我们可以使用OpenCV提供的图像处理函数,如resize、crop和blend等,对人像进行适当的调整和融合,以使其与背景视频更好地融合。
遇到的问题及解决方法:
1. 人像分割效果不理想:
在使用Paddle提供的语义分割模型进行人像分割时,可能会遇到人像分割效果不理想的情况,如边缘不清晰、遮挡部分未被正确分割等。为了解决这个问题,我们可以尝试使用更先进的分割模型或进行模型微调,以提高分割的准确性和鲁棒性。
2. 人像与背景融合效果不自然:
在将人像与背景视频进行融合时,可能会出现人像与背景之间的边缘不自然、光照不一致等问题。为了解决这个问题,我们可以使用图像处理技术,如调整人像的亮度、对比度和色调,以使其与背景视频更好地融合。此外,还可以尝试使用深度学习模型进行更精细的融合,如基于生成对抗网络(GAN)的图像融合方法。
视频人像背景替换器是一个基于Paddle和OpenCV的应用程序,通过人像分割和背景替换技术,可以将人像从一个视频中提取出来,并放置在另一个视频的背景中。在开发过程中,我们遇到了人像分割效果和人像与背景融合效果的问题,通过使用更先进的分割模型、模型微调和图像处理技术,我们成功解决了这些问题,实现了更好的人像背景替换效果。
二、实现代码
❀描述
❀环境
简而言之:使用人体语义分割实现抠图替换动态背景。首先毫无疑问就是环境配置,附上链接→
❀结构
❀代码
'''
《视频人像背景替换器》
------------------
❀功能介绍:
【Input】
|-> human_video_path:人像视频路径, background_video_path:背景视频路径
↓
【Ouput】
|-> result (人像视频扣出来放在背景视频里面)
----------------------------------------------------------------------------------
① 如果背景视频时长 < 人像视频时长,那么循环播放背景视频。
② 如果背景视频和抠出来的人像分辨率不匹配,那么自适应两者的分辨率。
'''
🎈demo.py
from utils import *
from videoPersonExtraction import PersonExtraction
from videoBackgroundInsertion import BackgroundInsertion
if __name__ == '__main__':
human_video_path = r'datasets\human_video.mp4'
human_video_path = input("请输入人像视频路径:")
background_video_path = r'datasets\background_video.mp4'
background_video_path = input("请输入背景视频路径:")
print('人像抠图正在努力加载中ing...请主人稍后片刻哦!')
# select the video you want to use
'''
@video_path<str> -> |视频路径
@use_frame<list> -> |取值范围[0,1],如果设置为【0,0.5】那么意思就是只使用该视频的前半段。
@display<bool> -> |是否播放视频。
'''
with PersonExtraction(video_path=human_video_path,use_frame=[0, 0.04],display=False) as vdo_trk:
human_segments = vdo_trk.run()
human_audio_path = vdo_trk.extract_audio()
print("人像抠图加载完毕!")
print('背景提取正在努力加载中ing...请主人稍后片刻哦!')
with BackgroundInsertion(video_path=background_video_path,use_frame=[0, 1],display=False) as vdo_trk:
background_frames = vdo_trk.run()
print('背景提取加载完毕!')
print("人像抠图文件长度为->【{}】 背景提取文件长度为->【{}】".format(len(human_segments), len(background_frames)))
if len(human_segments) > len(background_frames):
print("人像抠图文件长度 <大于> 背景提取文件长度,开启背景视频循环播放...")
elif len(human_segments) < len(background_frames):
print("人像抠图文件长度 <小于> 背景提取文件长度,开启背景视频裁剪...")
else:
print("人像抠图文件长度 <等于> 背景提取文件长度,无需自动处理...")
print("*************开始合成*************")
# @参数:move_ypixel越大越上面,不要一下调太多 会出界报错。
merge_images(human_segments, background_frames, "./datasets/merge.mp4", move_ypixel=105)
from merage_av import main
main()
🎈videoPersonExtraction.py
“
从人像视频中扣出人像图片并且保存人像视频中的音频到datasets。
”
import cv2
import time
import moviepy.editor as mp
# import torch
import warnings
import numpy as np
from PIL import Image
from loguru import logger
import os
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
from src.infer import Predictor
from src.seg_demo import get_bg_img
'''
视频人像背景替换器
------------------
功能介绍:
Input->human_video_path(人像视频), background_video_path(背景视频)
Ouput->result(人像视频扣出来放在背景视频里面)
① 如果背景视频时长 < 人像视频时长,那么循环播放背景视频。
② 如果背景视频和抠出来的人像分辨率不匹配,那么自适应两者的分辨率。
'''
class PersonExtraction(object):
def __init__(self, cam=-1, video_path='', save_path='', use_frame=[0, 1], display=True):
self.ppHuman_predictor = Predictor(r'inference_models/human_pp_humansegv2_lite_192x192_inference_model_with_softmax/deploy.yaml')
self.display = display
self.use_frame = use_frame
self.video_path = video_path
self.cam = cam
if self.cam != -1:
print("Using webcam :" + str(self.cam))
self.vdo = cv2.VideoCapture(self.cam)
else:
print("Using video :" + str(self.video_path))
self.vdo = cv2.VideoCapture()
self.save_path = save_path
self.frame_interval = 1
# self.use_cuda = True
# use_cuda = self.use_cuda and torch.cuda.is_available()
# if not use_cuda:
# warnings.warn("Running in cpu mode which maybe very slow!", UserWarning)
def __enter__(self):
if self.cam != -1:
ret, frame = self.vdo.read()
assert ret, "Error: Camera error"
self.im_width = frame.shape[0]
self.im_height = frame.shape[1]
self.count_frame = int(-1)
else:
assert os.path.isfile(self.video_path), "Path error"
self.vdo.open(self.video_path)
self.im_width = int(self.vdo.get(cv2.CAP_PROP_FRAME_WIDTH))
self.im_height = int(self.vdo.get(cv2.CAP_PROP_FRAME_HEIGHT))
self.count_frame = int(self.vdo.get(cv2.CAP_PROP_FRAME_COUNT))
assert self.vdo.isOpened()
if self.save_path != '':
os.makedirs(self.save_path, exist_ok=True)
# path of saved video and results
self.save_video_path = os.path.join(self.save_path, "results.avi")
# create video writer
fourcc = cv2.VideoWriter_fourcc(*'MJPG')
self.writer = cv2.VideoWriter(self.save_video_path, fourcc, 24, (self.im_width, self.im_height))
# logging
logger.info("Save results to {}".format(self.save_path))
return self
def __exit__(self, exc_type, exc_value, exc_traceback):
if exc_type:
print(exc_type, exc_value, exc_traceback)
def run(self):
idx_frame = 0
all_costTime = 0
human_segments = []
while self.vdo.grab():
idx_frame += 1
start_iter_frame_id = int(self.count_frame * self.use_frame[0])
end_iter_frame_id = int(self.count_frame * self.use_frame[1])
self.show_count_frames = end_iter_frame_id
if idx_frame % self.frame_interval:
continue
if idx_frame < start_iter_frame_id:
continue
if idx_frame > end_iter_frame_id:
break
start = time.time()
ref, ori_im = self.vdo.retrieve()
if ref is True:
# start your code from here
bg_img = get_bg_img(None, (self.im_height, self.im_width, 3))
silhouette_frame = self.ppHuman_predictor.run(ori_im, bg_img)
human_segments.append(silhouette_frame)
# -----------end-----------
if self.display:
cv2.imshow("frame", silhouette_frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
if self.save_path:
self.writer.write(ori_im)
# logging
end = time.time()
all_costTime += end - start
if self.cam != -1:
logger.info("frame schedule:<{}/-1> ({:.2f} ms), fps: {:.03f}"
.format(idx_frame, end - start, 1 / (end - start)))
else:
logger.info("frame schedule:<{}/{}> ({:.2f} ms), fps: {:.03f}"
.format(idx_frame, self.show_count_frames, end - start, 1 / (end - start)))
logger.info("ALL_COST_TIME:{:.3f}s".format(all_costTime))
return human_segments
def extract_audio(self):
video = mp.VideoFileClip(self.video_path)
audio = video.audio
audio_path = os.path.join('datasets', "audio.wav")
audio.write_audiofile(audio_path)
# 释放资源
video.close()
audio.close()
return audio_path
🎈videoBackgroundInsertion.py
“
从背景图片里面直接取出每一帧图片。
”
import cv2
import time
# import torch
import warnings
import numpy as np
from PIL import Image
from loguru import logger
import os
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
'''
视频人像背景替换器
------------------
功能介绍:
Input->human_video_path(人像视频), background_video_path(背景视频)
Ouput->result(人像视频扣出来放在背景视频里面)
① 如果背景视频时长 < 人像视频时长,那么循环播放背景视频。
② 如果背景视频和抠出来的人像分辨率不匹配,那么自适应两者的分辨率。
'''
class BackgroundInsertion(object):
def __init__(self, cam=-1, video_path='', save_path='', use_frame=[0, 1], display=True):
self.display = display
self.use_frame = use_frame
self.video_path = video_path
self.cam = cam
if self.cam != -1:
print("Using webcam :" + str(self.cam))
self.vdo = cv2.VideoCapture(self.cam)
else:
print("Using video :" + str(self.video_path))
self.vdo = cv2.VideoCapture()
self.save_path = save_path
self.frame_interval = 1
self.use_cuda = True
# use_cuda = self.use_cuda and torch.cuda.is_available()
# if not use_cuda:
# warnings.warn("Running in cpu mode which maybe very slow!", UserWarning)
def __enter__(self):
if self.cam != -1:
ret, frame = self.vdo.read()
assert ret, "Error: Camera error"
self.im_width = frame.shape[0]
self.im_height = frame.shape[1]
self.count_frame = int(-1)
else:
assert os.path.isfile(self.video_path), "Path error"
self.vdo.open(self.video_path)
self.im_width = int(self.vdo.get(cv2.CAP_PROP_FRAME_WIDTH))
self.im_height = int(self.vdo.get(cv2.CAP_PROP_FRAME_HEIGHT))
self.count_frame = int(self.vdo.get(cv2.CAP_PROP_FRAME_COUNT))
assert self.vdo.isOpened()
if self.save_path != '':
os.makedirs(self.save_path, exist_ok=True)
# path of saved video and results
self.save_video_path = os.path.join(self.save_path, "results.avi")
# create video writer
fourcc = cv2.VideoWriter_fourcc(*'MJPG')
self.writer = cv2.VideoWriter(self.save_video_path, fourcc, 24, (self.im_width, self.im_height))
# logging
logger.info("Save results to {}".format(self.save_path))
return self
def __exit__(self, exc_type, exc_value, exc_traceback):
if exc_type:
print(exc_type, exc_value, exc_traceback)
def run(self):
idx_frame = 0
all_costTime = 0
background_frames = []
while self.vdo.grab():
idx_frame += 1
start_iter_frame_id = int(self.count_frame * self.use_frame[0])
end_iter_frame_id = int(self.count_frame * self.use_frame[1])
self.show_count_frames = end_iter_frame_id
if idx_frame % self.frame_interval:
continue
if idx_frame < start_iter_frame_id:
continue
if idx_frame > end_iter_frame_id:
break
start = time.time()
ref, ori_im = self.vdo.retrieve()
if ref is True:
# start your code from here
background_frames.append(ori_im)
# -----------end-----------
if self.display:
cv2.imshow("frame", ori_im)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
if self.save_path:
self.writer.write(ori_im)
# logging
end = time.time()
all_costTime += end - start
if self.cam != -1:
logger.info("frame schedule:<{}/-1> ({:.2f} ms), fps: {:.03f}"
.format(idx_frame, end - start, 1 / (end - start)))
else:
logger.info("frame schedule:<{}/{}> ({:.2f} ms), fps: {:.03f}"
.format(idx_frame, self.show_count_frames, end - start, 1 / (end - start)))
logger.info("ALL_COST_TIME:{:.3f}s".format(all_costTime))
return background_frames
🎈utils.py
'''
合并扣出來的人像和取出來的背景图。
'''
import cv2
import cv2
import numpy as np
def resize_with_threshold(image, threshold):
height, width = image.shape[:2]
# 计算缩小后的目标尺寸
target_height = int(height * threshold)
target_width = int(width * threshold)
# 缩小图像
resized_image = cv2.resize(image, (target_width, target_height))
# 创建与输入图像相同大小的黑色图像
output_image = np.zeros_like(image)
# 将缩小后的图像放置在黑色图像最上面最中间
y_offset = 120
x_offset = (width - target_width) // 2
output_image[y_offset:y_offset+target_height, x_offset:x_offset+target_width] = resized_image
return output_image
import cv2
from PIL import Image
def cv2_to_pil(cv2_image):
cv2_image_rgb = cv2.cvtColor(cv2_image, cv2.COLOR_BGR2RGB)
pil_image = Image.fromarray(cv2_image_rgb)
return pil_image
def merge_images(human_segments, background_frames, output_path, move_ypixel=105):
merged_images = []
num_backgrounds = len(background_frames)
print("human_image_type:", type(human_segments[0]))
print("bakcground_image_type:", type(background_frames[0]))
for i, human_img in enumerate(human_segments):
# 调整人像图片的分辨率以匹配背景图片
print('进度条【{}/{}】'.format(i+1, len(human_segments)))
human_height, human_width, _ = human_img.shape
background_img = background_frames[i % num_backgrounds]
background_height, background_width, _ = background_img.shape
human_img_resized = cv2.resize(human_img, (background_width, background_height))
human_img_resized = resize_with_threshold(human_img_resized, 0.5)
# 合成人像和背景图片
merged_img = cv2.addWeighted(human_img_resized, 1, background_img, 1, 0)
# 樱花刀
im = cv2_to_pil(human_img)
im_rgba = im.convert("RGBA")
r, g, b, a = im_rgba.split()
# 将背景图像转换为PIL.Image对象
bg_im = cv2_to_pil(background_img)
bg_im = bg_im.convert("RGBA")
# 创建一个新的"RGBA"模式的图像,大小与bg_im相同
new_im = Image.new("RGBA", bg_im.size, (0, 0, 0, 0))
# 将新图像的所有像素设置为bg_im的颜色
new_im.paste(bg_im, (0, 0))
# 将im图像粘贴到新图像的中心,使用mask参数将im图像中黑色像素的地方填充为bg_im图像
new_im.paste(im_rgba, (int((bg_im.width - im.width) / 2), int((bg_im.height - im.height) / 2)-move_ypixel), mask=a)
# 将新图像转换为OpenCV格式的图像
result = np.array(new_im.convert('RGB'))[:, :, ::-1]
# 将图像转换为numpy数组
new_im_arr = np.array(new_im.convert('RGB'))[:, :, ::-1]
bg_im_arr = np.array(bg_im.convert('RGB'))[:, :, ::-1]
# 找出黑色像素的位置
black_pixels = np.where(np.all(new_im_arr == [0, 0, 0], axis=-1))
# 将bg_im图像中的相应位置填充到new_im图像中黑色像素的位置
new_im_arr[black_pixels] = bg_im_arr[black_pixels]
merged_images.append(new_im_arr)
# 创建视频编写器
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
video_writer = cv2.VideoWriter(output_path, fourcc, 30, (background_width, background_height))
# 将合成图片写入视频
for img in merged_images:
video_writer.write(img)
# 释放资源
video_writer.release()
🎈merage_av.py
'''
把音頻文件添加到合并好的视频中去。
'''
from moviepy.editor import VideoFileClip, AudioFileClip
def main():
video = VideoFileClip("datasets//merge.mp4")
audio = AudioFileClip("datasets//audio.wav")
# 获取视频和音频的最短时长
min_duration = min(video.duration, audio.duration)
# 裁剪视频和音频,使它们的时长一致
video = video.subclip(0, min_duration)
audio = audio.subclip(0, min_duration)
# 将音频添加到视频中
video = video.set_audio(audio)
# 保存合并后的视频
video.write_videofile("datasets/output.mp4", codec="libx264", audio_codec="aac")
❀展示
output