解析Labelme标注文件

最新推荐文章于 2024-03-30 15:23:32 发布

我有酒两杯

最新推荐文章于 2024-03-30 15:23:32 发布

阅读量1.7k

点赞数 1

文章标签： json python 深度学习

本文链接：https://blog.csdn.net/weixin_42232041/article/details/117705810

版权

文章目录

1、前言

在深度学习任务中，有时需要使用Labelme标注工具做数据集，Labelme生成的数据是*.json文件，然后用json_to_dataset.py转成二值化数据。有时需要修改源码才能转多个json文件。
为简化工作，仿照json_to_dataset.py重新了一个。

2、JSON文件

json是将文件以字典的形式进行存储。

json内容
只需要获取每个目标的名字(label字段)，坐标点(points字段)，就可以绘制出二值化图。

json中的图片
部分json文件中并没有直接保存原始图像的宽度和高度，导致无法直接确定二值化图的大小。
但是json文件中以bs64格式保存了原始图像数据，所以通过把bs64格式转成数组的形式，可以获得原始图像的宽度和高度。
代码如下：

    def img_b64_to_arr(self, img_b64):  # 图片转格式
        f = io.BytesIO()
        f.write(base64.b64decode(img_b64))
        img_arr = np.array(PIL.Image.open(f))
        return img_arr
        
     img_h, img_w, _ = img_arr.shape

读取json

    def read_json(self, file_path):         # 读取JSON文件，获取坐标信息，保存二值化图
        with open(file_path) as f:          # 打开json文件
            json_list = json.load(f)        # 读取到json_list中

        shape = json_list['shapes']         # 获取shape字段内容
        fileName = json_list['imagePath']
        fileName = fileName.split('.')[:-1]
        fileName.append('label.png')
        fileName = '_'.join(fileName)

        img = self.img_b64_to_arr(json_list['imageData'])   # 图片转格式
        img_h, img_w, _ = img.shape        # 获取图片的h,w,c

        mask = np.zeros((img_h, img_w), dtype=np.uint8)
        masks = np.zeros((img_h, img_w), dtype=np.uint8)
        for i in range(len(shape)):
            label = shape[i]['label']       # 获取每一个目标的名字(label)
            if label not in self.label:     # 统计名字类别
                self.label.append(label)
            index = self.label.index(label)
            points = shape[i]['points']     # 获取每一个目标的坐标点
            '''
            坐标点格式如下
            points = [
            [x1,y1],[x2,y2]...
            ]
            '''
            masks = self.polygons_to_mask(mask, polygons=points, index=index) + masks   # 绘图函数
        cv.imwrite('./{}'.format(fileName), masks)  # 保存

3、整体代码

单张解析

import json
import numpy as np
import cv2 as cv
import os, glob, shutil, io, base64
import PIL
from PIL import Image, ImageDraw
'''
功能：
    把json文件转成label图片，
'''

class Peocess_Json():
    def __init__(self):
        self.label = ['background']

    def img_b64_to_arr(self, img_b64):  # 图片转格式
        f = io.BytesIO()
        f.write(base64.b64decode(img_b64))
        img_arr = np.array(PIL.Image.open(f))
        return img_arr

    def img_arr_to_b64(self, img_arr):  # 图片转格式
        img_pil = PIL.Image.fromarray(img_arr)
        f = io.BytesIO()
        img_pil.save(f, format='PNG')
        img_bin = f.getvalue()
        img_b64 = base64.encodebytes(img_bin)
        return img_b64

    def read_json(self, file_path):         # 读取JSON文件，获取坐标信息，保存二值化图
        with open(file_path) as f:          # 打开json文件
            json_list = json.load(f)        # 读取到json_list中

        shape = json_list['shapes']         # 获取shape字段内容
        fileName = json_list['imagePath']
        fileName = fileName.split('.')[:-1]
        fileName.append('label.png')
        fileName = '_'.join(fileName)

        img = self.img_b64_to_arr(json_list['imageData'])   # 图片转格式
        img_h, img_w, _ = img.shape        # 获取图片的h,w,c

        mask = np.zeros((img_h, img_w), dtype=np.uint8)
        masks = np.zeros((img_h, img_w), dtype=np.uint8)
        for i in range(len(shape)):
            label = shape[i]['label']       # 获取每一个目标的名字(label)
            if label not in self.label:     # 统计名字类别
                self.label.append(label)
            index = self.label.index(label)
            points = shape[i]['points']     # 获取每一个目标的坐标点
            '''
            坐标点格式如下
            points = [
            [x1,y1],[x2,y2]...
            ]
            '''
            masks = self.polygons_to_mask(mask, polygons=points, index=index) + masks   # 绘图函数
        cv.imwrite('./{}'.format(fileName), masks)  # 保存


    def polygons_to_mask(self, mask, polygons, index):  # 创建MASK图
        mask = PIL.Image.fromarray(mask)
        xy = list(map(tuple, polygons))
        PIL.ImageDraw.Draw(mask).polygon(xy=xy, outline=1, fill=1)
        mask = np.array(mask, dtype=bool)
        mask = np.where(mask, index, 0)
        return mask


if __name__ == '__main__':
    js = Peocess_Json()
    js.read_json('./data/3135.0-375.0DOM.json')

多张解析
代码中加入了多进程模式，但是部分电脑在使用多进程时速度反而变慢，可以在if __name__ == '__main__':下设置FLAG=False切换为不使用进程。

import json
import numpy as np
import cv2 as cv
import os, glob, shutil, io, base64
import PIL
from PIL import Image, ImageDraw
from threading import Thread
from multiprocessing import Process
'''
功能：
    把json文件转成label图片，
'''



class Peocess_Json():
    def __init__(self):
        self.label = ['background']

    def img_b64_to_arr(self, img_b64):
        f = io.BytesIO()
        f.write(base64.b64decode(img_b64))
        img_arr = np.array(PIL.Image.open(f))
        return img_arr

    def img_arr_to_b64(self, img_arr):
        img_pil = PIL.Image.fromarray(img_arr)
        f = io.BytesIO()
        img_pil.save(f, format='PNG')
        img_bin = f.getvalue()
        img_b64 = base64.encodebytes(img_bin)
        return img_b64

    def read_json(self, file_path, save_path):
        with open(file_path) as f:
            json_list = json.load(f)

        shape = json_list['shapes']
        fileName = json_list['imagePath']
        fileName = fileName.split('.')[:-1]
        fileName.append('label.png')
        fileName = '_'.join(fileName)

        img = self.img_b64_to_arr(json_list['imageData'])
        img_h, img_w, _ = img.shape        # h,w,c

        mask = np.zeros((img_h, img_w), dtype=np.uint8)
        masks = np.zeros((img_h, img_w), dtype=np.uint8)
        for i in range(len(shape)):


            label = shape[i]['label']
            if label not in self.label:
                self.label.append(label)
            index = self.label.index(label)
            # index = 255
            points = shape[i]['points']
            masks = self.polygons_to_mask(mask, polygons=points, index=index) + masks
        cv.imwrite('{}/{}'.format(save_path,fileName), masks)


    def polygons_to_mask(self, mask, polygons, index):
        mask = PIL.Image.fromarray(mask)
        xy = list(map(tuple, polygons))
        PIL.ImageDraw.Draw(mask).polygon(xy=xy, outline=1, fill=1)
        mask = np.array(mask, dtype=bool)
        mask = np.where(mask, index, 0)
        return mask
        

class MyProcess(Process):
    def __init__(self):
        super(MyProcess, self).__init__()

    def run(self) -> None:
        self.js = Peocess_Json()
        self.js.read_json(self.path, self.save_path)

    def getPath(self, path, save_p):
        self.path = path
        self.save_path = save_p

if __name__ == '__main__':
    '''
    解析json文件代码，参数有2个：
    1、read_json_file_path：按一下格式填入 json文件所在文件夹位置
    2、save_mask_file_path：按一下格式填入mask图保存位置
    '''
    FLAG = True # 是否使用多进程
    read_json_file_path = r'D:\image\yujiapo\newLabel\*.json'
    save_mask_file_path = r'D:\image\yujiapo\newLabel'
    json_files = glob.glob(json_file_path)
    for i in json_files:
        print(i)
        if FLAG:
	        myThread = MyProcess()
	        myThread.getPath(i, save_mask_file_path)
	        myThread.start()
	    else:
	        js = Peocess_Json()
		    js.read_json(i, save_mask_file_path )

我有酒两杯

关注

1
点赞
踩
12

收藏

觉得还不错? 一键收藏
0
评论
解析Labelme标注文件

文章目录1、前言2、JSON文件3、整体代码1、前言在深度学习任务中，有时需要使用Labelme标注工具做数据集，Labelme生成的数据是*.json文件，然后用json_to_dataset.py转成二值化数据。有时需要修改源码才能转多个json文件。为简化工作，仿照json_to_dataset.py重新了一个。2、JSON文件json是将文件以字典的形式进行存储。只需要获取每个目标的名字(label字段)，坐标点(points字段)，就可以绘制出二值化图。json中的图片部分js
复制链接

扫一扫