场景:
从网上下载了阿里云算法大赛图像识别的数据集,提供的是json格式,计划通过yolo算法进行练习,所以需要改造标注文件格式,记录一下以备以后使用。
原json格式数据集如下:
[
{
"name": "223_89_t20201125085855802_CAM3.jpg",
"image_height": 3500,
"image_width": 4096,
"category": 4,
"bbox": [
1702.79,
2826.53,
1730.79,
2844.53
]
},
{
"name": "235_2_t20201127123021723_CAM2.jpg",
"image_height": 6000,
"image_width": 8192,
"category": 5,
"bbox": [
1876.06,
998.04,
1883.06,
1004.04
]
},
{
"name": "235_2_t20201127123021723_CAM2.jpg",
"image_height": 6000,
"image_width": 8192,
"category": 5,
"bbox": [
1655.06,
1094.04,
1663.06,
1102.04
]
},
......
1、改造为yolov5需要的txt格式
yolov5输入的txt是相对坐标,需要进行计算。第一步:把json文件中图片名称转换为单独的txt文件,代码如下:
#json 转 yolov5需要的txt
import os
import json
json_dir = 'train_annos.json' # json文件路径
out_dir = 'output/' # 输出的 txt 文件路径
def main():
# 读取 json 文件数据
with open(json_dir, 'r') as load_f:
content = json.load(load_f)
# 循环处理
for t in content:
tmp = t['name'].split('.')
filename = out_dir + tmp[0] + '.txt'
if not os.path.exists(filename):
fp = open(filename, mode="w", encoding="utf-8")
fp.close()
# 计算 yolo 数据格式所需要的中心点的 相对 x, y 坐标, w,h 的值
dw=1./t['image_width']
dh=1./t['image_height']
x = (t['bbox'][0] + t['bbox'][2]) / 2.0 -1
y = (t['bbox'][1] + t['bbox'][3]) / 2.0 -1
w = (t['bbox'][2] - t['bbox'][0])
h = (t['bbox'][3] - t['bbox'][1])
x=x*dw
w=x*dw
y=y*dh
h=h*dh
fp = open(filename, mode="r+", encoding="utf-8")
file_str = str(t['category']) + ' ' + str(x) + ' ' + str(y) + ' ' + str(round(w, 6)) + \
' ' + str(h)
line_data = fp.readlines()
if len(line_data) != 0:
fp.write('\n' + file_str)
else:
fp.write(file_str)
fp.close()
if __name__ == '__main__':
main()
第二步:拆分训练集、测试集和验证集,先拆lables
#拆分lables
import os
import random
import glob
import shutil
xmlfilepath='/home/conda_work/conda_hsz/yolov5-master-aliyun/output/'
saveBasePath="/home/conda_work/conda_hsz/yolov5-master-aliyun/VOC/labels/train/"
saveBasePath1="/home/conda_work/conda_hsz/yolov5-master-aliyun/VOC/labels/val/"
trainval_percent=1
train_percent=0.8
temp_xml = os.listdir(xmlfilepath)
total_xml = []
for xml in temp_xml:
if xml.endswith(".txt"):
total_xml.append(xml)
num=len(total_xml)
list=range(num)
tv=int(num*trainval_percent)
tr=int(tv*train_percent)
trainval= random.sample(list,tv)
train=random.sample(trainval,tr)
for i in list:
name=total_xml[i]
if i in train:
shutil.copy(xmlfilepath + name, saveBasePath + name)
else:
shutil.copy(xmlfilepath + name, saveBasePath1 + name)
拆分图片
#拆分 图片
xmlfilepath2="/home/conda_work/conda_hsz/yolov5-master-aliyun/train_imgs/"
saveBasePath2="/home/conda_work/conda_hsz/yolov5-master-aliyun/VOC/images/train/"
saveBasePath3="/home/conda_work/conda_hsz/yolov5-master-aliyun/VOC/images/val/"
for i in list:
name=total_xml[i].split(".")[0]+".jpg"
if i in train:
shutil.copy(xmlfilepath2 + name, saveBasePath2 + name)
else:
shutil.copy(xmlfilepath2 + name, saveBasePath3 + name)
2、改造为yolov4需要的txt格式
yolov4需要的txt是真实坐标,第一步,拆分为单个txt文件,代码如下:
import codecs
import json
import glob
# import cv2
import shutil
from sklearn.model_selection import train_test_split
import os
#转换坐标
def conver(json_dir,out_dir):
# 读取 json 文件数据
with open(json_dir, 'r') as load_f:
content = json.load(load_f)
# 循环处理
for t in content:
tmp = t['name'].split('.')
filename = out_dir + tmp[0] + '.txt'
if not os.path.exists(filename):
fp = open(filename, mode="w", encoding="utf-8")
fp.close()
# 计算 yolo 数据格式所需要的中心点的 相对 x, y 坐标, w,h 的值
x = float(t['bbox'][0])
y = float(t['bbox'][1])
w = float(t['bbox'][2])
h = float(t['bbox'][3])
fp = open(filename, mode="r+", encoding="utf-8")
file_str = str(x) + ',' + str(y) + ',' + str(w) + \
',' + str(h)+ ',' + str(t['category'])+ ' '
line_data = fp.readlines()
if len(line_data) != 0:
fp.write(file_str)
else:
fp.write(file_str)
fp.close()
#开始转换
json_dir="/home/conda_work/conda_hsz/yolov4-keras-master_416/train_annos.json" #json文件位置
out_dir = "VOCdevkit/VOC2007/Annotations/"
conver(json_dir,out_dir)
第二步,整合到一个txt文件。
#整合到txt
import glob
out_dir = "VOCdevkit/VOC2007/Annotations/"
org_txt_files = sorted(glob.glob(os.path.join(out_dir, '*.txt')))
org_txt_file_names = [i.split("\\")[-1].split(".txt")[0] for i in org_txt_files]
#org_txt_file_names
img_path="/home/conda_work/conda_hsz/yolov4-keras-master-aliyun/train_imgs/"
list_file=open('./2007_train.txt','w')
org_img_files = sorted(glob.glob(os.path.join(img_path, '*.jpg')))
org_img_file_names = [i.split("\\")[-1].split(".jpg")[0].split("/")[-1] for i in org_img_files]
for i, json_file_ in enumerate(org_txt_files):
box_txt=open(json_file_, "r", encoding="utf-8").readline()
image_path = os.path.join(img_path, org_txt_file_names[i].split("/")[-1]+'.jpg')
if org_txt_file_names[i].split("/")[-1] in org_img_file_names:
list_file.write(image_path+ ' '+box_txt)
list_file.write('\n')
list_file.close()
3、改造yolov4中kmeans_for_anchors.py文件
主要是改造文件中的load_data方法,原来是基于xml读取标注数据,改造后基于json格式读取标注数据。原始及改造后函数内容如下:
#原始方法
def load_data1(path):
data = []
# 对于每一个xml都寻找box
for xml_file in glob.glob('{}/*xml'.format(path)):
tree = ET.parse(xml_file)
height = int(tree.findtext('./size/height'))
width = int(tree.findtext('./size/width'))
# 对于每一个目标都获得它的宽高
for obj in tree.iter('object'):
xmin = int(float(obj.findtext('bndbox/xmin'))) / width
ymin = int(float(obj.findtext('bndbox/ymin'))) / height
xmax = int(float(obj.findtext('bndbox/xmax'))) / width
ymax = int(float(obj.findtext('bndbox/ymax'))) / height
xmin = np.float64(xmin)
ymin = np.float64(ymin)
xmax = np.float64(xmax)
ymax = np.float64(ymax)
# 得到宽高
data.append([xmax-xmin,ymax-ymin])
return np.array(data)
改造后
def load_data(path):
data = []
# 对于每一个xml都寻找box
with open(path, 'r') as load_f:
content = json.load(load_f)
for t in content:
height = t['image_height']
width = t['image_width']
# 对于每一个目标都获得它的宽高
xmin = float(t['bbox'][0]) / width
ymin = float(t['bbox'][1]) / height
xmax = float(t['bbox'][2]) / width
ymax = float(t['bbox'][3]) / height
xmin = np.float64(xmin)
ymin = np.float64(ymin)
xmax = np.float64(xmax)
ymax = np.float64(ymax)
# 得到宽高
data.append([xmax-xmin,ymax-ymin])
return np.array(data)