安装并运行labelme来标注数据集:
#安装labelme
pip install pyqt5
pip install labelme
#运行labelme
labelme
使用labelme标注之后每张图片都会生成对应的json标注文件。
将数据集转换成mask-rcnn训练数据集:
首先需要修改labelme的一些转换文件,找到labelme的安装位置,并找到json_to_dataset.py这个文件。我的文件在D:\anaconda\envs\labelme\Lib\site-packages\labelme\cli 这里面。
修改为下面代码:(最新版本的labelme删掉了yaml,我们重新添加上去,这样就可以生成yaml标注信息)
import argparse
import base64
import json
import os
import os.path as osp
import imgviz
import PIL.Image
import yaml
from labelme.logger import logger
from labelme import utils
def main():
logger.warning(
"This script is aimed to demonstrate how to convert the "
"JSON file to a single image dataset."
)
logger.warning(
"It won't handle multiple JSON files to generate a "
"real-use dataset."
)
parser = argparse.ArgumentParser()
parser.add_argument("json_file")
parser.add_argument("-o", "--out", default=None)
args = parser.parse_args()
json_file = args.json_file
if args.out is None:
out_dir = osp.basename(json_file).replace(".", "_")
#out_dir = osp.basename(json_file)
out_dir = osp.join(osp.dirname(json_file), out_dir)
else:
out_dir = args.out
if not osp.exists(out_dir):
os.mkdir(out_dir)
data = json.load(open(json_file))
imageData = data.get("imageData")
if not imageData:
imagePath = os.path.join(os.path.dirname(json_file), data["imagePath"])
with open(imagePath, "rb") as f:
imageData = f.read()
imageData = base64.b64encode(imageData).decode("utf-8")
img = utils.img_b64_to_arr(imageData)
label_name_to_value = {"_background_": 0}
for shape in sorted(data["shapes"], key=lambda x: x["label"]):
label_name = shape["label"]
if label_name in label_name_to_value:
label_value = label_name_to_value[label_name]
else:
label_value = len(label_name_to_value)
label_name_to_value[label_name] = label_value
lbl, _ = utils.shapes_to_label(
img.shape, data["shapes"], label_name_to_value
)
label_names = [None] * (max(label_name_to_value.values()) + 1)
for name, value in label_name_to_value.items():
label_names[value] = name
lbl_viz = imgviz.label2rgb(
label=lbl, img=imgviz.asgray(img), label_names=label_names, loc="rb"
)
PIL.Image.fromarray(img).save(osp.join(out_dir, "img.png"))
utils.lblsave(osp.join(out_dir, "label.png"), lbl)
PIL.Image.fromarray(lbl_viz).save(osp.join(out_dir, "label_viz.png"))
with open(osp.join(out_dir, "label_names.txt"), "w",encoding='utf-8') as f:
for lbl_name in label_names:
f.write(lbl_name + "\n")
logger.warning('info.yaml is being replaced by label_names.txt')
info = dict(label_names=label_names)
with open(osp.join(out_dir, 'info.yaml'), 'w',encoding='utf-8') as f:
yaml.safe_dump(info, f, default_flow_style=False)
logger.info("Saved to: {}".format(out_dir))
if __name__ == "__main__":
main()
会对每一个json问价能生成一个文件夹,里面包括 原图,mask图片,yaml,txt和mask的视图。我们只用到原图,mask图片和yaml文件。
修改完之后保存,然后运行下面的脚本就可以了。
import os
path = 'C:/Users/86183/Desktop/json1' # path为json文件存放的路径
json_file = os.listdir(path)
os.system("activate labelme") # 博主labelme所在的环境名就叫labelme,读者应修改成activate [自己labelme所在的环境名]
for file in json_file:
os.system("labelme_json_to_dataset.exe %s"%(path + '/' + file))
到目前为止我们已经把数据集都准备好了。如下图所示: