视频按照bbox切割算法

最新推荐文章于 2024-05-29 09:40:14 发布
Fu_Xingwen
最新推荐文章于 2024-05-29 09:40:14 发布
阅读量245
点赞数
分类专栏：笔记文章标签： pytorch 深度学习神经网络
本文链接：https://blog.csdn.net/qq_41249412/article/details/123278306
版权
笔记专栏收录该内容
23 篇文章 0 订阅
订阅专栏
import os
import cv2
from PIL import Image
import json
from tqdm import tqdm
import numpy
import scipy.signal as signal

# local_map = {"右上": "RU", "右上/右下": "R", "右下": "RD", "右中": "RM", "中上": "MU", "左下": "LD", "左上": "LU", "左下/右下": "D",
#              "左中": "LM", "右下角": "RD", "中间": "M", "左上/左下/右上/右下": "ALL", "中下": "MD", "右上右下": "R", "右下方": "RD",
#              "右上/左下": "ALL", "左上右上": "U", "左上/右上": "U", "右上方": "RU", "左下/右上": "ALL", "左上左下": "L",
#              "右下/右上": "R", "左下方": "LD", "左下/右下/右上": "ALL"}

fps = 25  # 帧率


def path_replace(path_ori, file_ori, file_new):
    """
    :param path_ori: 需要被替换的路径
    :param file_ori: 需要被替换的文件夹
    :param file_new: 新文件夹
    :return: path_new：新文件夹内结构与原文件夹一样
    """

    path_file_new = ('/'.join(path_ori.split('/')[:-1]).replace(file_ori, file_new))  # 视频文件的路径（没有后缀）
    print(path_file_new)
    path_new = '/'.join(path_file_new.split('/')[:-1])  # 上级目录
    print(path_new)

    if not os.path.exists(path_new):
        os.makedirs(path_new)
        print('{} has been created!'.format(path_new))
    else:
        print('{} exsits'.format(path_new))

    return path_new, path_file_new


def max_bbox(bbox_track):
    """
    功能： 从bbox_track中找到最大的bbox
    输出： bbox的x_len_max，y_len_max
    bbox_track: 存放着视频中所有帧的box
    """
    x_len_max = 0
    y_len_max = 0
    for bbox in bbox_track:
        x_len = bbox[2] - bbox[0]
        y_len = bbox[3] - bbox[1]
        if x_len > x_len_max:
            x_len_max = x_len
        if y_len > y_len_max:
            y_len_max = y_len

    return int(x_len_max), int(y_len_max)


def bbox_re(x_len_max, y_len_max, ratio):
    # resize
    x_len_max = int(x_len_max * ratio)
    y_len_max = int(y_len_max * ratio)
    return x_len_max, y_len_max


def middle_fild(x_middle, y_middle):
    x_middle = numpy.array(x_middle)
    y_middle = numpy.array(y_middle)
    resi_x = x_middle[0]
    resi_y = y_middle[0]
    x_middle -= resi_x
    y_middle -= resi_y
    x_middle = signal.medfilt(x_middle, 51)
    y_middle = signal.medfilt(y_middle, 51)
    x_middle += resi_x
    y_middle += resi_y
    x_middle = x_middle.tolist()
    y_middle = y_middle.tolist()
    return x_middle, y_middle


def _bbox_track_re(bbox_track, x_len_max, y_len_max):
    x_len_max = int(x_len_max / 2)
    y_len_max = int(y_len_max / 2)
    x_middle = []
    y_middle = []
    for bbox in bbox_track:
        # 找到中点
        x_middle.append(int((bbox[2] + bbox[0]) / 2))
        y_middle.append(int((bbox[3] + bbox[1]) / 2))
    # 平滑（可以解决跳帧）
    # x_middle, y_middle = middle_fild(x_middle, y_middle)
    # 统一重整为最大的框
    # 这里不能因为浮点数小数位的取舍导致图片大小不统一
    for bbox, x_i, y_i in zip(bbox_track, x_middle, y_middle):
        bbox[0] = x_i - x_len_max
        bbox[1] = y_i - y_len_max
        bbox[2] = x_i + x_len_max
        bbox[3] = y_i + y_len_max
    # 这里检查一下bbox_track是否被修改
    return bbox_track


def img_cut(img, bbox):
    image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    image = image.crop((bbox[0], bbox[1], bbox[2], bbox[3]))
    img = cv2.cvtColor(numpy.asarray(image), cv2.COLOR_RGB2BGR)
    return img


def _bbox_resize(bbox, ratio):
    # 输入是bbox和resize比例
    # 输出是resize后的bbox
    # xmin, ymin, xmax, ymax = bbox.xmin, bbox.ymin, bbox.xmax, bbox.ymax
    xmin, ymin, xmax, ymax = bbox[0], bbox[1], bbox[2], bbox[3]
    w, h = xmax - xmin, ymax - ymin
    x, y = xmin + w / 2, ymin + h / 2
    w, h = w * ratio, h * ratio
    return [int(x - w / 2), int(y - h / 2), int(x + w / 2), int(y + h / 2)]


def path_change(img_path_all):
    path_local = '/home/fuxingwen/code2/vql-serving/projects/data_process/data/img_test2/'
    img_path_change = []
    for path_i in img_path_all:
        # print(path_i)
        img_path = path_local + path_i.split('/')[-1]
        img_path_change.append(img_path)
    img_path_all = img_path_change
    return img_path_all


def img_track_cut(img_path_all, bbox_track, ratio, file_ori, file_new, track_id):
    """
    :param img_path_all: 是所有图片的路径
    :param bbox_track: 多个bbox的列表
    :param ratio: 缩放的比例
    :param file_ori: 原文件夹的名称
    :param file_new: 新文件夹的名称
    :param stage: “右下”之类的状态
    :return: video_path_cut：切好视频路径
    """
    # 路径替换
    img_path_all = path_change(img_path_all)  # 需要将原url替换为本地图片的路径

    # 首先替换文件夹 path_new是图片所在的上层目录，video_path_cut是第一张图片的路径（没有后缀）
    path_new, video_path_cut = path_replace(img_path_all[0], file_ori, file_new)
    video_path_cut = video_path_cut + '_' + track_id + '.mp4'  # 再添加‘stage’标记

    # 获得bbox最大的长和宽
    x_len_max, y_len_max = max_bbox(bbox_track)
    # print(x_len_max, y_len_max)
    # resize
    x_len_max, y_len_max = bbox_re(x_len_max, y_len_max, ratio)
    # 统一bbox
    # print(bbox_track)
    size = x_len_max, y_len_max
    bbox_track = _bbox_track_re(bbox_track, x_len_max, y_len_max)
    # print(bbox_track)
    # 按照bbox切
    video = cv2.VideoWriter(video_path_cut,
                            cv2.VideoWriter_fourcc(*'MP4V'),
                            fps,
                            size,
                            True)
    i = 0

    for img_path, bbox in zip(img_path_all, bbox_track):
        img = cv2.imread(img_path)
        img = img_cut(img, bbox)
        cv2.imwrite(path_new + '/' + str(i) + '.jpg', img)
        i = i + 1
        # print(img.shape)
        video.write(img)

    video.release()
    cv2.destroyAllWindows()
    print('track视频合成生成完成啦')
    return video_path_cut


def img_fix_cut(img_path_all, bbox_fix, ratio, file_ori, file_new, track_id):
    """
    # 输入：
    # img_path_all 是图片的路径
    # bbox_fix 是要切的位置，
    # stage 是“右下”之类的标签，
    # file_ori 是需要被替换的原文件夹，
    # file_new 是之后数据保存的新文件夹
    # 其中：
    # video_path_cut 是被切后的视频路径
    """
    # 路径替换
    # 首先替换文件夹 path_new是图片所在的上层目录，video_path_cut是视频的路径（没有后缀）

    path_new, video_path_cut = path_replace(img_path_all[0], file_ori, file_new)  # 首先替换文件夹（这里我也可以给上层目录）
    video_path_cut = video_path_cut + '_' + track_id + '.mp4'  # 再添加‘stage’标记

    # resize
    # 读取第一张图片获取基本信息
    bbox = _bbox_resize(bbox_fix, ratio)

    size = bbox[2] - bbox[0], bbox[3] - bbox[1]  # cv2的shape是宽和高，，而shape[0]是行数(高)

    # 按照bbox切
    video = cv2.VideoWriter(video_path_cut,
                            cv2.VideoWriter_fourcc(*'MP4V'),
                            fps,
                            size,
                            True)
    # i = 0
    for img_path in img_path_all:
        img = cv2.imread(img_path)
        img = img_cut(img, bbox)
        # cv2.imwrite(path_new + '/' + str(i) + '.jpg', img)
        # i = i + 1
        video.write(img)

    video.release()
    cv2.destroyAllWindows()
    print('fix视频合成生成完成啦')
    return video_path_cut


def img_process(json_ori_path, json_new_path, bbox_ratio, file_ori, file_track, file_smooth, file_fix):
    """
    :param json_ori_path: 原json文件的路径
    :param json_new_path: 新json文件的路径
    :param bbox_ratio: bbox resize的比例
    :param file_ori: 原文件夹的名字
    :param file_track: 新文件夹的名字（track）
    :param file_smooth: 新文件夹的名字（smooth）
    :param file_fix: 新文件夹的名字（fix）
    :return:
    """
    data = json.load(open(json_ori_path))
    for d in tqdm(data):
        video_track = img_track_cut(
            img_path_all=d['img_path_all'],
            bbox_track=d['bbox_track'],
            track_id='30',
            ratio=bbox_ratio,
            file_ori=file_ori,
            file_new=file_track,
        )
        # video_smooth = img_track_cut(
        #     img_path_all=d['path_exit'],
        #     bbox_track=d['bbox_track_smooth'],
        #     track_id=d['track_id'],
        #     ratio=bbox_ratio,
        #     file_ori=file_ori,
        #     file_new=file_smooth,
        # )
        # video_fix = img_fix_cut(
        #     img_path_all=d['img_path_all'],
        #     bbox_fix=d['bbox_fix'],
        #     track_id=d['track_id'],
        #     ratio=bbox_ratio,
        #     file_ori=file_ori,
        #     file_new=file_fix,
        # )

    d['video_track'] = video_track
    # d['video_smooth'] = video_smooth
    # d['video_fix'] = video_fix
    with open(json_new_path, 'w', encoding='utf-8') as f:
        json.dump(data, f, indent=2, ensure_ascii=False)


# 按间距中的绿色按钮以运行脚本。
if __name__ == '__main__':
    img_process(
        json_ori_path='/home/fuxingwen/code2/vql-serving/projects/ByteTrack/data/clips2.json',
        json_new_path='milktea_position.json',
        bbox_ratio=1,
        file_ori='data',
        file_track='data_track_test',
        file_smooth='data_track_smooth',
        file_fix='data_fix'
    )