参考链接:
https://segmentfault.com/a/1190000021794637?utm_source=tag-newest
https://blog.csdn.net/qq_38232598/article/details/88695454
代码:https://github.com/YunYang1994/tensorflow-yolov3
目录
数据处理
1.数据标注
安装labelIme:pip install labelImg 或者 pip install labelme
在控制台输入labelImg 或者 labelme,调出标注工具
使用labelImg工具标注,格式选择VOC(生成xml格式)
使用labelme工具标注(生成json格式)
2.xml格式数据处理
2.1 按比例划分数据集
#----------------------------------------------------------------------#
# 验证集的划分在train.py代码里面进行
# test.txt和val.txt里面没有内容是正常的。训练不会使用到。
#----------------------------------------------------------------------#
import os
import random
random.seed(0)
xmlfilepath=r'../data/img/xml'
saveBasePath=r"../data/img/xml/"
#----------------------------------------------------------------------#
# 想要增加测试集修改trainval_percent
# train_percent不需要修改
#----------------------------------------------------------------------#
train_percent=0.8
temp_xml = os.listdir(xmlfilepath)
total_xml = []
for xml in temp_xml:
if xml.endswith(".xml"):
total_xml.append(xml)
num=len(total_xml)
list=range(num)
tv=int(num*train_percent)
trainval= random.sample(list,tv)
print("train size",tv)
print("test size",num-tv)
ftest = open(os.path.join(saveBasePath,'test.txt'), 'w')
ftrain = open(os.path.join(saveBasePath,'train.txt'), 'w')
for i in list:
name=total_xml[i][:-4]+'\n'
if i in trainval:
ftrain.write(name)
else:
ftest.write(name)
ftrain.close()
ftest .close()
2.2 生成yolo训练所需的格式
import os
import argparse
import xml.etree.ElementTree as ET
def convert_voc_annotation(data_path, data_type, anno_path, use_difficult_bbox=True):
classes = ['p', 'r']
img_inds_file = os.path.join(data_path, data_type + '.txt')
with open(img_inds_file, 'r') as f:
txt = f.readlines()
image_inds = [line.strip() for line in txt]
with open(anno_path, 'a') as f:
for image_ind in image_inds:
image_path = os.path.join(data_path, image_ind + '.jpg')
annotation = image_path
label_path = os.path.join(data_path, image_ind + '.xml')
root = ET.parse(label_path).getroot()
objects = root.findall('object')
for obj in objects:
difficult = obj.find('difficult').text.strip()
if (not use_difficult_bbox) and(int(difficult) == 1):
continue
bbox = obj.find('bndbox')
class_ind = classes.index(obj.find('name').text.lower().strip())
xmin = bbox.find('xmin').text.strip()
xmax = bbox.find('xmax').text.strip()
ymin = bbox.find('ymin').text.strip()
ymax = bbox.find('ymax').text.strip()
annotation += ' ' + ','.join([xmin, ymin, xmax, ymax, str(class_ind)])
print(annotation)
f.write(annotation + "\n")
return len(image_inds)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--data_path", default="../data/img/xml/")
parser.add_argument("--train_annotation", default="../data/img/train.txt")
parser.add_argument("--test_annotation", default="../data/img/test.txt")
flags = parser.parse_args()
if os.path.exists(flags.train_annotation):os.remove(flags.train_annotation)
if os.path.exists(flags.test_annotation):os.remove(flags.test_annotation)
num1 = convert_voc_annotation(flags.data_path, 'train', flags.train_annotation, False)
num2 = convert_voc_annotation(flags.data_path, 'test', flags.test_annotation, False)
2.3 查看yolo格式的数据是否正确,画框
#! /usr/bin/env python
# coding=utf-8
#================================================================
# Copyright (C) 2019 * Ltd. All rights reserved.
#
# Editor : VIM
# File name : show_bboxes.py
# Author : YunYang1994
# Created date: 2019-05-29 01:18:24
# Description :
#
#================================================================
import cv2
import numpy as np
from PIL import Image
ID = 1
label_txt = "../data/img/test.txt"
image_info = open(label_txt).readlines()[ID].split()
image_path = image_info[0]
image = cv2.imread(image_path)
for bbox in image_info[1:]:
bbox = bbox.split(",")
image = cv2.rectangle(image,(int(float(bbox[0])),
int(float(bbox[1]))),
(int(float(bbox[2])),
int(float(bbox[3]))), (255,0,0), 2)
image = Image.fromarray(np.uint8(image))
image.show()
2.4 k-mean计算anchors
方法1 :效果一般
import glob
import random
import xml.etree.ElementTree as ET
import numpy as np
def cas_iou(box,cluster):
x = np.minimum(cluster[:,0],box[0])
y = np.minimum(cluster[:,1],box[1])
intersection = x * y
area1 = box[0] * box[1]
area2 = cluster[:,0] * cluster[:,1]
iou = intersection / (area1 + area2 -intersection)
return iou
def avg_iou(box,cluster):
return np.mean([np.max(cas_iou(box[i],cluster)) for i in range(box.shape[0])])
def kmeans(box,k):
# 取出一共有多少框
row = box.shape[0]
# 每个框各个点的位置
distance = np.empty((row,k))
# 最后的聚类位置
last_clu = np.zeros((row,))
np.random.seed()
# 随机选5个当聚类中心
cluster = box[np.random.choice(row,k,replace = False)]
# cluster = random.sample(row, k)
while True:
# 计算每一行距离五个点的iou情况。
for i in range(row):
distance[i] = 1 - cas_iou(box[i],cluster)
# 取出最小点
near = np.argmin(distance,axis=1)
if (last_clu == near).all():
break
# 求每一个类的中位点
for j in range(k):
cluster[j] = np.median(
box[near == j],axis=0)
last_clu = near
return cluster
def load_data(path):
data = []
# 对于每一个xml都寻找box
for xml_file in glob.glob('{}/*xml'.format(path)):
tree = ET.parse(xml_file)
height = int(tree.findtext('./size/height'))
width = int(tree.findtext('./size/width'))
if height<=0 or width<=0:
continue
# 对于每一个目标都获得它的宽高
for obj in tree.iter('object'):
xmin = int(float(obj.findtext('bndbox/xmin'))) / width
ymin = int(float(obj.findtext('bndbox/ymin'))) / height
xmax = int(float(obj.findtext('bndbox/xmax'))) / width
ymax = int(float(obj.findtext('bndbox/ymax'))) / height
xmin = np.float64(xmin)
ymin = np.float64(ymin)
xmax = np.float64(xmax)
ymax = np.float64(ymax)
# 得到宽高
data.append([xmax-xmin,ymax-ymin])
return np.array(data)
if __name__ == '__main__':
# 运行该程序会计算'./VOCdevkit/VOC2007/Annotations'的xml
# 会生成yolo_anchors.txt
SIZE = 416
anchors_num = 9
# 载入数据集,可以使用VOC的xml
path = r'../data/img/xml'
# 载入所有的xml
# 存储格式为转化为比例后的width,height
data = load_data(path)
# 使用k聚类算法
out = kmeans(data,anchors_num)
out = out[np.argsort(out[:,0])]
print('acc:{:.2f}%'.format(avg_iou(data,out) * 100))
print(out*SIZE)
data = out*SIZE
f = open("../data/img/yolo_anchors.txt", 'w')
row = np.shape(data)[0]
for i in range(row):
if i == 0:
x_y = "%d,%d" % (data[i][0], data[i][1])
else:
x_y = ", %d,%d" % (data[i][0], data[i][1])
f.write(x_y)
f.close()
方法2:效果较好
# coding=utf-8
from __future__ import division, print_function
import numpy as np
def iou(box, clusters):
"""
Calculates the Intersection over Union (IoU) between a box and k clusters.
param:
box: tuple or array, shifted to the origin (i. e. width and height)
clusters: numpy array of shape (k, 2) where k is the number of clusters
return:
numpy array of shape (k, 0) where k is the number of clusters
"""
x = np.minimum(clusters[:, 0], box[0])
y = np.minimum(clusters[:, 1], box[1])
# if np.count_nonzero(x == 0) > 0 or np.count_nonzero(y == 0) > 0:
# raise ValueError("Box has no area")
intersection = x * y
box_area = box[0] * box[1]
cluster_area = clusters[:, 0] * clusters[:, 1]
iou_ = np.true_divide(intersection, box_area + cluster_area - intersection + 1e-10)
# iou_ = intersection / (box_area + cluster_area - intersection + 1e-10)
return iou_
def avg_iou(boxes, clusters):
"""
Calculates the average Intersection over Union (IoU) between a numpy array of boxes and k clusters.
param:
boxes: numpy array of shape (r, 2), where r is the number of rows
clusters: numpy array of shape (k, 2) where k is the number of clusters
return:
average IoU as a single float
"""
return np.mean([np.max(iou(boxes[i], clusters)) for i in range(boxes.shape[0])])
def translate_boxes(boxes):
"""
Translates all the boxes to the origin.
param:
boxes: numpy array of shape (r, 4)
return:
numpy array of shape (r, 2)
"""
new_boxes = boxes.copy()
for row in range(new_boxes.shape[0]):
new_boxes[row][2] = np.abs(new_boxes[row][2] - new_boxes[row][0])
new_boxes[row][3] = np.abs(new_boxes[row][3] - new_boxes[row][1])
return np.delete(new_boxes, [0, 1], axis=1)
def kmeans(boxes, k, dist=np.median):
"""
Calculates k-means clustering with the Intersection over Union (IoU) metric.
param:
boxes: numpy array of shape (r, 2), where r is the number of rows
k: number of clusters
dist: distance function
return:
numpy array of shape (k, 2)
"""
rows = boxes.shape[0]
distances = np.empty((rows, k))
last_clusters = np.zeros((rows,))
np.random.seed()
# the Forgy method will fail if the whole array contains the same rows
clusters = boxes[np.random.choice(rows, k, replace=False)]
while True:
for row in range(rows):
distances[row] = 1 - iou(boxes[row], clusters)
nearest_clusters = np.argmin(distances, axis=1)
if (last_clusters == nearest_clusters).all():
break
for cluster in range(k):
clusters[cluster] = dist(boxes[nearest_clusters == cluster], axis=0)
last_clusters = nearest_clusters
return clusters
def parse_anno(annotation_path, target_size=None):
anno = open(annotation_path, 'r')
result = []
for line in anno:
s = line.strip().split(' ')
s = s[1:]
box_cnt = len(s)
for i in range(box_cnt):
point=s[i].split(',')
x_min, y_min, x_max, y_max = float(point[0]), float(point[1]), float(point[2]), float(point[3])
width = x_max - x_min
height = y_max - y_min
# if width <=0:
# print(line,x_max, x_min)
# assert width > 0
# assert height > 0
# use letterbox resize, i.e. keep the original aspect ratio
# get k-means anchors on the resized target image size
img_w=1920
img_h=1080
if target_size is not None:
resize_ratio = min(target_size[0] / img_w, target_size[1] / img_h)
width *= resize_ratio
height *= resize_ratio
result.append([width, height])
# get k-means anchors on the original image size
else:
result.append([width, height])
result = np.asarray(result)
return result
def get_kmeans(anno, cluster_num=9):
anchors = kmeans(anno, cluster_num)
ave_iou = avg_iou(anno, anchors)
anchors = anchors.astype('int').tolist()
anchors = sorted(anchors, key=lambda x: x[0] * x[1])
return anchors, ave_iou
if __name__ == '__main__':
# target resize format: [width, height]
# if target_resize is speficied, the anchors are on the resized image scale
# if target_resize is set to None, the anchors are on the original image scale
target_size = [416, 416]
annotation_path = "data2/train.txt"
anno_result = parse_anno(annotation_path, target_size=target_size)
anchors, ave_iou = get_kmeans(anno_result, 9)
anchor_string = ''
for anchor in anchors:
anchor_string += '{},{}, '.format(anchor[0], anchor[1])
anchor_string = anchor_string[:-2]
print('anchors are:')
print(anchor_string)
print('the average iou is:')
print(ave_iou)
2.5 编写标签文件
新建个文件,根据生成yolo文件的代码中的标签,按顺序写进去
3.json格式数据处理
3.1 json格式转xml格式
# -*- coding: utf-8 -*-
"""
Created on Sun May 31 10:19:23 2020
@author: ywx
"""
import os
from typing import List, Any
import numpy as np
import codecs
import json
from glob import glob
import cv2
import shutil
from sklearn.model_selection import train_test_split
# 1.标签路径
labelme_path = "../data/img/json/"
#原始labelme标注数据路径
saved_path = "../data/img/json2/"
# 保存路径
isUseTest=True#是否创建test集
# 2.创建要求文件夹
if not os.path.exists(saved_path + "Annotations"):
os.makedirs(saved_path + "Annotations")
if not os.path.exists(saved_path + "JPEGImages/"):
os.makedirs(saved_path + "JPEGImages/")
if not os.path.exists(saved_path + "ImageSets/"):
os.makedirs(saved_path + "ImageSets/")
# 3.获取待处理文件
files = glob(labelme_path + "*.json")
files = [i.replace("\\","/").split("/")[-1].split(".json")[0] for i in files]
print(files)
# 4.读取标注信息并写入 xml
for json_file_ in files:
json_filename = labelme_path + json_file_ + ".json"
json_file = json.load(open(json_filename, "r", encoding="utf-8"))
height, width, channels = cv2.imread(labelme_path + json_file_ + ".jpg").shape
with codecs.open(saved_path + "Annotations/" + json_file_ + ".xml", "w", "utf-8") as xml:
xml.write('<annotation>\n')
xml.write('\t<folder>' + 'WH_data' + '</folder>\n')
xml.write('\t<filename>' + json_file_ + ".jpg" + '</filename>\n')
xml.write('\t<source>\n')
xml.write('\t\t<database>WH Data</database>\n')
xml.write('\t\t<annotation>WH</annotation>\n')
xml.write('\t\t<image>flickr</image>\n')
xml.write('\t\t<flickrid>NULL</flickrid>\n')
xml.write('\t</source>\n')
xml.write('\t<owner>\n')
xml.write('\t\t<flickrid>NULL</flickrid>\n')
xml.write('\t\t<name>WH</name>\n')
xml.write('\t</owner>\n')
xml.write('\t<size>\n')
xml.write('\t\t<width>' + str(width) + '</width>\n')
xml.write('\t\t<height>' + str(height) + '</height>\n')
xml.write('\t\t<depth>' + str(channels) + '</depth>\n')
xml.write('\t</size>\n')
xml.write('\t\t<segmented>0</segmented>\n')
for multi in json_file["shapes"]:
points = np.array(multi["points"])
labelName=multi["label"]
xmin = min(points[:, 0])
xmax = max(points[:, 0])
ymin = min(points[:, 1])
ymax = max(points[:, 1])
label = multi["label"]
if xmax <= xmin:
pass
elif ymax <= ymin:
pass
else:
xml.write('\t<object>\n')
xml.write('\t\t<name>' + labelName+ '</name>\n')
xml.write('\t\t<pose>Unspecified</pose>\n')
xml.write('\t\t<truncated>1</truncated>\n')
xml.write('\t\t<difficult>0</difficult>\n')
xml.write('\t\t<bndbox>\n')
xml.write('\t\t\t<xmin>' + str(int(xmin)) + '</xmin>\n')
xml.write('\t\t\t<ymin>' + str(int(ymin)) + '</ymin>\n')
xml.write('\t\t\t<xmax>' + str(int(xmax)) + '</xmax>\n')
xml.write('\t\t\t<ymax>' + str(int(ymax)) + '</ymax>\n')
xml.write('\t\t</bndbox>\n')
xml.write('\t</object>\n')
print(json_filename, xmin, ymin, xmax, ymax, label)
xml.write('</annotation>')
# 5.复制图片到 VOC2007/JPEGImages/下
image_files = glob(labelme_path + "*.jpg")
print("copy image files to VOC007/JPEGImages/")
for image in image_files:
shutil.copy(image, saved_path + "JPEGImages/")
# 6.split files for txt
txtsavepath = saved_path + "ImageSets/"
ftrainval = open(txtsavepath + '/trainval.txt', 'w')
ftest = open(txtsavepath + '/test.txt', 'w')
ftrain = open(txtsavepath + '/train.txt', 'w')
fval = open(txtsavepath + '/val.txt', 'w')
total_files = glob(saved_path + "Annotations/*.xml")
total_files = [i.replace("\\","/").split("/")[-1].split(".xml")[0] for i in total_files]
trainval_files=[]
test_files=[]
if isUseTest:
trainval_files, test_files = train_test_split(total_files, test_size=0.2, random_state=55)
else:
trainval_files=total_files
for file in trainval_files:
ftrainval.write(file + "\n")
# split
train_files, val_files = train_test_split(trainval_files, test_size=0.2, random_state=55)
# train
for file in train_files:
ftrain.write(file + "\n")
# val
for file in val_files:
fval.write(file + "\n")
for file in test_files:
print(file)
ftest.write(file + "\n")
ftrainval.close()
ftrain.close()
fval.close()
ftest.close()
3.2 xml格式转yolo格式
json转成xml格式之后,即可按照xml转yolo的步骤进行,具体方法如上一节所示。
xml格式转yolo格式:
import os
import argparse
import xml.etree.ElementTree as ET
def convert_voc_annotation(data_path, data_type, anno_path, use_difficult_bbox=True):
classes = ['p', 'r']
img_inds_file = os.path.join(data_path,'ImageSets', data_type + '.txt')
with open(img_inds_file, 'r') as f:
txt = f.readlines()
image_inds = [line.strip() for line in txt]
with open(anno_path, 'a') as f:
for image_ind in image_inds:
image_path = os.path.join(data_path, 'JPEGImages', image_ind + '.jpg')
annotation = image_path
label_path = os.path.join(data_path, 'Annotations', image_ind + '.xml')
root = ET.parse(label_path).getroot()
objects = root.findall('object')
for obj in objects:
difficult = obj.find('difficult').text.strip()
if (not use_difficult_bbox) and(int(difficult) == 1):
continue
bbox = obj.find('bndbox')
class_ind = classes.index(obj.find('name').text.lower().strip())
xmin = bbox.find('xmin').text.strip()
xmax = bbox.find('xmax').text.strip()
ymin = bbox.find('ymin').text.strip()
ymax = bbox.find('ymax').text.strip()
annotation += ' ' + ','.join([xmin, ymin, xmax, ymax, str(class_ind)])
print(annotation)
f.write(annotation + "\n")
return len(image_inds)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--data_path", default="../data/img/json2/")
parser.add_argument("--train_annotation", default="../data/img/json2/train.txt")
parser.add_argument("--test_annotation", default="../data/img/json2/test.txt")
flags = parser.parse_args()
if os.path.exists(flags.train_annotation):os.remove(flags.train_annotation)
if os.path.exists(flags.test_annotation):os.remove(flags.test_annotation)
num1 = convert_voc_annotation(flags.data_path, 'train', flags.train_annotation, False)
num2 = convert_voc_annotation(flags.data_path, 'test', flags.test_annotation, False)
4.数据提供
总共十张图片,可以同时做训练+测试,进行过拟合训练测试模型。
train.txt test.txt
data/img/xml/10514.jpg 864,212,927,353,1 977,214,1047,346,1 718,200,744,240,1 756,197,788,249,1 785,194,819,227,1 556,222,592,278,1 503,226,556,283,1 364,269,419,328,1 312,277,376,339,1 219,286,273,355,1
data/img/xml/10725.jpg 935,276,1013,424,1 829,291,899,424,1 1078,273,1117,346,1 1129,267,1178,337,1
data/img/xml/11830.jpg 1,276,118,624,1 190,127,379,381,1 594,18,668,154,1 658,8,719,124,1
data/img/xml/14023.jpg 835,87,879,194,1 927,82,973,194,1
data/img/xml/1775.jpg 840,58,895,180,0 160,183,199,218,1 186,178,226,212,1 215,168,265,208,1 429,127,479,167,1 470,118,528,159,1 573,99,633,146,1 1145,60,1179,97,1 1220,64,1263,103,1 1399,80,1433,130,1 1436,81,1470,135,1 1467,82,1508,131,1 1554,118,1605,163,1 1520,112,1567,158,1 1647,123,1686,174,1 1681,128,1732,183,1
data/img/xml/2001.jpg 1,487,279,1078,1 1267,106,1296,141,1 1288,113,1323,151,1 1332,117,1369,156,1 1373,121,1412,163,1 1618,158,1656,201,1 1586,146,1620,190,1 1690,162,1729,219,1 383,115,422,139,1
data/img/xml/357.jpg 759,5,828,171,0 826,1,928,185,0 1529,65,1626,210,1 1633,82,1745,228,1 1429,27,1517,155,1 1245,1,1288,51,1 1283,1,1340,73,1
data/img/xml/7985.jpg 867,276,897,339,1 913,269,946,331,1
data/img/xml/5683.jpg 286,277,358,344,1 345,262,400,330,1 382,254,431,319,1 412,250,469,312,1 654,264,723,331,1 790,259,862,332,1 828,249,904,319,1 926,226,978,277,1
data/img/xml/7015.jpg 935,82,965,119,1 977,83,1013,115,1 1018,82,1054,119,1 1422,118,1451,154,1 1477,127,1504,156,1
anchors.txt
7,6, 6,7, 7,8, 8,7, 8,10, 7,12, 11,9, 11,14, 17,31
new.names
p
r
测试照片: