【图像-关键点json标签】的数据集生成器

最新推荐文章于 2024-06-17 09:44:39 发布

柏常青

最新推荐文章于 2024-06-17 09:44:39 发布

阅读量1.3k

点赞数 3

分类专栏： python语言基础文章标签：计算机视觉 json opencv

本文链接：https://blog.csdn.net/beauthy/article/details/124947446

版权

python语言基础专栏收录该内容

52 篇文章 5 订阅

订阅专栏

利用opencv图像处理库制作【图像-关键点json标签】的数据集生成器

背景：关于读取json文件的方法

1. 数据

数据目录：
– /keypoints_pose/

–/train_img/
–/train_label_pose/
–/val_img/
–/val_label_pose/

如图所示：
在这里插入图片描述

2. 数据集结果展示：（图像，坐标）

在这里插入图片描述

3. 代码实现

`dataset_json_by_cv2.py`

# !/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2022/5/24 15:32
# @Author  : Hyan Tan 
# @File    : dataset_json_by_cv2.py.py
import os
from abc import abstractmethod, ABCMeta
import numpy as np
import pandas as pd
import json

import cv2
from torch.utils.data import Dataset, DataLoader

from torchvision import transforms
from transformation import RandomAdd, ImageResize, RandomCrop, RandomFlip, RandomRotate, Distort


# ---人像-人体关键点数据集生成器---
class KeyPointsPose(Dataset):
    def __init__(self, root_dir=r'E:/Datasets/keypoints_pose/', image_set='train', transforms=None):
        """
        初始化数据集
        :param root_dir: 数据目录(.csv和images的根目录)
        :param image_set: train训练,val验证,test测试
        :param transforms（callable,optional）:图像变换-可选
        标签数据文件格式为json_file: 标签json文件(内容：图像相对地址-category类型-标签coordination坐标)
        """
        super(KeyPointsPose, self).__init__()
        self._imgset = image_set
        self._image_paths = []
        self._labels = []
        self._root_dir = root_dir
        self._transform = transforms

        self.__getFileList()  # 图地址列表和标签列表

    def __getFileList(self):
        img_path = os.path.join(self._root_dir, self._imgset + '_img')  # 图像目录
        label_path = os.path.join(self._root_dir, self._imgset + '_label_pose')  # 标签目录
        self._image_paths = os.listdir(img_path)
        for img_ in self._image_paths:
            pose_name = img_.replace('.png', '_keypoints.json').replace('.jpg', '_keypoints.json')  # 标签名和图像对应，后缀不同。
            with open(os.path.join(label_path, pose_name), 'r') as f:
                pose_label = json.load(f)
                try:
                    pose_data = pose_label['people'][0]['pose_keypoints']
                except IndexError:
                    pose_data = [0 for i in range(54)]  # 数据读取失败时，创建18组0,0,0
                pose_data = np.array(pose_data)
                pose_data = pose_data.reshape((-1, 3))
                self._labels.append(pose_data)

    def __getitem__(self, idx):
        label = self._labels[idx]
        image = cv2.imread(os.path.join(self._root_dir + self._imgset + '_img', self._image_paths[idx]),
                           cv2.IMREAD_COLOR)
        imgSize = image.shape  # cv2读取的是图像数组类型  BGR  H W C
        if self._transform:
            image = self._transform(image)
            afterSize = image.shape
            # bi = np.array(afterSize[0:2]) / np.array(imgSize[0:2])
            # 坐标（x,y）代表的是w,h。图像是h，w，c的格式，所以此处反着来
            bi = np.array((afterSize[1], afterSize[0])) / np.array((imgSize[1], imgSize[0]))
            label[:, 0:2] = label[:, 0:2] * bi  # 图像伸缩变换，坐标同步伸缩
        image = image.astype(np.float32)
        return image, label

    def __len__(self):
        return len(self._image_paths)


def showImageAndCoor(img, coords):
    point_size = 1
    point_color = (0, 0, 255)  # BGR
    thickness = 4  # 0 、4、8
    img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB)  # numpy数组和cv2的mat的转换
    cv2.namedWindow("image")
    for coor in coords:
        if coor[2] == -1:  # 略去不存在的点
            pass
        else:
            cv2.circle(img, (int(coor[0]), int(coor[1])), point_size, point_color, thickness)  # 展示图像是正常的
    cv2.imwrite('1.png', img, [int(cv2.IMWRITE_JPEG_QUALITY), 95])
    cv2.imshow("image", img)
    cv2.waitKey(1500)


train_transform = transforms.Compose([
    ImageResize(size=256),
])
val_transform = train_transform
test_transform = transforms.Compose([
    ImageResize(size=288),
    RandomCrop(in_size=288, out_size=256),  # 随机裁剪，测试可用，验证也不可用
    RandomFlip(),  # 随机翻转
    RandomRotate(),  # 随机旋转
    Distort()  # 歪曲
])

if __name__ == "__main__":
    kpp = KeyPointsPose(root_dir=r"E:/Datasets/keypoints_pose/", image_set='train', transforms=train_transform)
    train_loader = DataLoader(kpp, batch_size=4, shuffle=False, drop_last=True)
    # content = Content(root_dir=r"E:/Datasets/keypoints_pose/", image_set='train', strategy='json', outsize=256)
    # train_loader = content.get_dataLoader()
    for i_batch, data in enumerate(train_loader):
        img, label = data
        img, label = img.numpy(), label.numpy()
        print(img.shape, label.shape)
        showImageAndCoor(img[0], label[0])

`tranformation.py:`

# -*- coding: utf-8 -*-
# @Time    : 2022/5/16 11:38
# @Author  : Hyan Tan 
# @File    : transformation.py
import cv2
import random
import os


class RandomAdd:
    def __init__(self, add_path=""):
        self.imgs = []
        for files in os.walk(add_path):
            for file_name in files[2]:
                self.imgs.append(cv2.imread(os.path.join(add_path, file_name)))

    def __call__(self, image):
        if random.random() < 0.2:
            idx = random.randrange(len(self.imgs))
            select_img = self.imgs[idx]
            h, w, _ = select_img.shape
            if h < image.shape[0] // 2 and w < image.shape[1] // 2:
                begin_x = random.randrange(image.shape[1] - w)
                begin_y = random.randrange(image.shape[0] - h)
                image[begin_y:begin_y + h, begin_x:begin_x + w, :] = select_img
        return image


class ImageResize:
    def __init__(self, size=256):
        self._size = size

    def __call__(self, image):
        image = cv2.resize(image, (self._size, self._size))
        return image


class RandomCrop:
    def __init__(self, in_size=256, out_size=224):
        self._in_size = in_size
        self._out_size = out_size

    def __call__(self, image):
        dis = self._in_size - self._out_size
        col_start = random.randint(0, dis)
        row_start = random.randint(0, dis)

        image = image[row_start:(row_start + self._out_size), col_start:(col_start + self._out_size)]
        return image


class RandomFlip:
    def __call__(self, image):
        if random.random() < 0.5:
            image = image[:, ::-1, :]
        return image


class RandomRotate:
    def rotateImg(self, image, angle):
        rows, cols, _ = image.shape
        M = cv2.getRotationMatrix2D(((cols - 1) / 2.0, (rows - 1) / 2.0), angle, 1)
        image = cv2.warpAffine(image, M, (cols, rows))
        return image

    def __call__(self, image):
        if random.randrange(2):
            if random.randrange(2):
                image = self.rotateImg(image, 5)
            else:
                image = self.rotateImg(image, -5)
        return image


class Distort:
    def _convert(self, image, alpha=1, beta=0):
        tmp = image.astype(float) * alpha + beta
        tmp[tmp < 0] = 0
        tmp[tmp > 255] = 255
        image[:] = tmp

    def __call__(self, image):
        if random.randrange(2):

            # brightness distortion
            if random.randrange(2):
                self._convert(image, beta=random.uniform(-32, 32))

            # contrast distortion
            if random.randrange(2):
                self._convert(image, alpha=random.uniform(0.5, 1.5))

            image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)

            # saturation distortion
            if random.randrange(2):
                self._convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5))

            # hue distortion
            if random.randrange(2):
                tmp = image[:, :, 0].astype(int) + random.randint(-18, 18)
                tmp %= 180
                image[:, :, 0] = tmp

            image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)

        else:

            # brightness distortion
            if random.randrange(2):
                self._convert(image, beta=random.uniform(-32, 32))

            image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)

            # saturation distortion
            if random.randrange(2):
                self._convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5))

            # hue distortion
            if random.randrange(2):
                tmp = image[:, :, 0].astype(int) + random.randint(-18, 18)
                tmp %= 180
                image[:, :, 0] = tmp

            image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)

            # contrast distortion
            if random.randrange(2):
                self._convert(image, alpha=random.uniform(0.5, 1.5))

        return image

4.注意事项：

cv2读图的数据结构为（h,w,c）=(高，宽，通道)，坐标组是（宽x, 高y）。统一伸缩时注意对应。
本文输出数据集为了显示并没有对图像数组进行归一化或标准化操作，用的时候需要加上归一化。
本文数据集为：图像-人体关键点json标签，读取csv文件用pandas.read_csv,读取json文件直接用open和json.load(f)方法。

柏常青

关注

3
点赞
踩
9

收藏

觉得还不错? 一键收藏
打赏
1
评论
【图像-关键点json标签】的数据集生成器

前面做了一篇关于[服装关键点检测算法](https://blog.csdn.net/beauthy/article/details/114318277)的博客，因为重点在算法模型上，所以数据集这一块做的比较粗糙。评论区对于数据集的问题还蛮多，原文标签存储为csv文件，大家自己的标签多为json文件，所以，问我怎么读取或转csv文件怎么处理。本文就图像-关键点json文件标签的数据集进行处理。
复制链接

扫一扫