-
git clone大佬的github代码仓:ybcc2015/DeepLearning-Utils
-
因为我是用来聚类COCO数据集的,其中不包含,因此个人修改
datasets.py
文件的代码如下:
import xml.etree.ElementTree as ET
import numpy as np
import glob
import os
import json
import cv2
class AnnotParser(object):
def __init__(self, file_type):
assert file_type in ['csv', 'xml', 'json'], "Unsupported file type."
self.file_type = file_type
def parse(self, annot_dir):
"""
Parse annotation file, the file type must be csv or xml or json.
:param annot_dir: directory path of annotation files
:return: 2-d array, shape as (n, 2), each row represents a bbox, and each column
represents the corresponding width and height after normalized
"""
if self.file_type == 'xml':
return self.parse_xml(annot_dir)
elif self.file_type == 'json':
return self.parse_json(annot_dir)
else:
return self.parse_csv(annot_dir)
@staticmethod
def parse_xml(annot_dir):
"""
Parse xml annotation file in VOC.
"""
boxes = []
for xml_file in glob.glob(os.path.join(annot_dir, '*.xml')):
tree = ET.parse(xml_file)
h_img = int(tree.findtext('./size/height'))
w_img = int(tree.findtext('./size/width'))
for obj in tree.iter('object'):
xmin = int(round(float(obj.findtext('bndbox/xmin'))))
ymin = int(round(float(obj.findtext('bndbox/ymin'))))
xmax = int(round(float(obj.findtext('bndbox/xmax'))))
ymax = int(round(float(obj.findtext('bndbox/ymax'))))
w_norm = (xmax - xmin) / w_img
h_norm = (ymax - ymin) / h_img
boxes.append([w_norm, h_norm])
return np.array(boxes)
"""
@staticmethod
def parse_json(annot_dir):
boxes = []
for js_file in glob.glob(os.path.join(annot_dir, '*.json')):
with open(js_file) as f:
data = json.load(f)
h_img = data['imageHeight']
w_img = data['imageWidth']
for shape in data['shapes']:
points = shape['points']
xmin = int(round(points[0][0]))
ymin = int(round(points[0][1]))
xmax = int(round(points[1][0]))
ymax = int(round(points[1][1]))
w_norm = (xmax - xmin) / w_img
h_norm = (ymax - ymin) / h_img
boxes.append([w_norm, h_norm])
return np.array(boxes)
"""
@staticmethod
def parse_json(annot_dir):
"""
Parse labelme json annotation file.
"""
boxes = []
for js_file in glob.glob(os.path.join(annot_dir, '*.json')):
with open(js_file) as f:
data = json.load(f)
images = data['images']
annotations_index = {}
if 'annotations' in data:
for annotation in data['annotations']:
image_id = annotation['image_id']
if image_id not in annotations_index:
annotations_index[image_id] = []
annotations_index[image_id].append(annotation)
for idx, image in enumerate(images):
image_id = image['id']
if image_id not in annotations_index:
continue
else:
annotations_list = annotations_index[image['id']]
image_height = image['height']
image_width = image['width']
for object_annotations in annotations_list:
(x,y,width,height) = tuple(object_annotations['bbox'])
if width <= 0 or height <= 0:
continue
if x + width > image_width or y + height > image_height:
continue
#xmin = int(x)
#xmax = int(x + width)
#ymin = int(y)
#ymax = int(y + height)
w_norm = float(width) / image_width
h_norm = float(height) / image_height
boxes.append([w_norm, h_norm])
return np.array(boxes)
@staticmethod
def parse_csv(annot_dir):
"""
Parse csv annotation file.
"""
boxes = []
for csv_file in glob.glob(os.path.join(annot_dir, '*.csv')):
with open(csv_file) as f:
lines = f.readlines()
for line in lines:
items = line.strip().split(',')
img = cv2.imread(items[0])
h_img, w_img = img.shape[:2]
xmin, ymin, xmax, ymax = list(map(int, items[1:-1]))
w_norm = (xmax - xmin) / w_img
h_norm = (ymax - ymin) / h_img
boxes.append([w_norm, h_norm])
return np.array(boxes)
- 使用该工具对COCO数据集进行anchors 聚类,执行代码如下,即可得到:
python3 gen_anchors.py -d ***/COCO/annotations -t json -k 6
附录
# coding=utf-8
# k-means ++ for YOLOv2 anchors
# 通过k-means ++ 算法获取YOLOv2需要的anchors的尺寸
import numpy as np
# 定义Box类,描述bounding box的坐标
class Box():
def __init__(self, x, y, w, h):
self.x = x
self.y = y
self.w = w
self.h = h
# 计算两个box在某个轴上的重叠部分
# x1是box1的中心在该轴上的坐标
# len1是box1在该轴上的长度
# x2是box2的中心在该轴上的坐标
# len2是box2在该轴上的长度
# 返回值是该轴上重叠的长度
def overlap(x1, len1, x2, len2):
len1_half = len1 / 2
len2_half = len2 / 2
left = max(x1 - len1_half, x2 - len2_half)
right = min(x1 + len1_half, x2 + len2_half)
return right - left
# 计算box a 和box b 的交集面积
# a和b都是Box类型实例
# 返回值area是box a 和box b 的交集面积
def box_intersection(a, b):
w = overlap(a.x, a.w, b.x, b.w)
h = overlap(a.y, a.h, b.y, b.h)
if w < 0 or h < 0:
return 0
area = w * h
return area
# 计算 box a 和 box b 的并集面积
# a和b都是Box类型实例
# 返回值u是box a 和box b 的并集面积
def box_union(a, b):
i = box_intersection(a, b)
u = a.w * a.h + b.w * b.h - i
return u
# 计算 box a 和 box b 的 iou
# a和b都是Box类型实例
# 返回值是box a 和box b 的iou
def box_iou(a, b):
return box_intersection(a, b) / box_union(a, b)
# 使用k-means ++ 初始化 centroids,减少随机初始化的centroids对最终结果的影响
# boxes是所有bounding boxes的Box对象列表
# n_anchors是k-means的k值
# 返回值centroids 是初始化的n_anchors个centroid
def init_centroids(boxes,n_anchors):
centroids = []
boxes_num = len(boxes)
centroid_index = np.random.choice(boxes_num, 1)
centroids.append(boxes[centroid_index])
print(centroids[0].w,centroids[0].h)
for centroid_index in range(0,n_anchors-1):
sum_distance = 0
distance_thresh = 0
distance_list = []
cur_sum = 0
for box in boxes:
min_distance = 1
for centroid_i, centroid in enumerate(centroids):
distance = (1 - box_iou(box, centroid))
if distance < min_distance:
min_distance = distance
sum_distance += min_distance
distance_list.append(min_distance)
distance_thresh = sum_distance*np.random.random()
for i in range(0,boxes_num):
cur_sum += distance_list[i]
if cur_sum > distance_thresh:
centroids.append(boxes[i])
print(boxes[i].w, boxes[i].h)
break
return centroids
# 进行 k-means 计算新的centroids
# boxes是所有bounding boxes的Box对象列表
# n_anchors是k-means的k值
# centroids是所有簇的中心
# 返回值new_centroids 是计算出的新簇中心
# 返回值groups是n_anchors个簇包含的boxes的列表
# 返回值loss是所有box距离所属的最近的centroid的距离的和
def do_kmeans(n_anchors, boxes, centroids):
loss = 0
groups = []
new_centroids = []
for i in range(n_anchors):
groups.append([])
new_centroids.append(Box(0, 0, 0, 0))
for box in boxes:
min_distance = 1
group_index = 0
for centroid_index, centroid in enumerate(centroids):
distance = (1 - box_iou(box, centroid))
if distance < min_distance:
min_distance = distance
group_index = centroid_index
groups[group_index].append(box)# groups[group_index]就是第group_index簇包含该box对象
loss += min_distance
new_centroids[group_index].w += box.w
new_centroids[group_index].h += box.h
for i in range(n_anchors):
new_centroids[i].w /= len(groups[i])
new_centroids[i].h /= len(groups[i])
return new_centroids, groups, loss
# 计算给定bounding boxes的n_anchors数量的centroids
# label_path是训练集列表文件地址
# n_anchors 是anchors的数量
# loss_convergence是允许的loss的最小变化值
# grid_size * grid_size 是栅格数量
# iterations_num是最大迭代次数
# plus = 1时启用k means ++ 初始化centroids
def compute_centroids(label_path,n_anchors,loss_convergence,grid_size,iterations_num,plus):
boxes = []
label_files = []
f = open(label_path)
for line in f:
label_path = line.rstrip().replace('images', 'labels')
label_path = label_path.replace('JPEGImages', 'labels')
label_path = label_path.replace('.jpg', '.txt')
label_path = label_path.replace('.JPEG', '.txt')
label_files.append(label_path)
f.close()
for label_file in label_files:
f = open(label_file)
for line in f:
temp = line.strip().split(" ")
if len(temp) > 1:
boxes.append(Box(0, 0, float(temp[3]), float(temp[4])))
#这里是中心点初始化的两种方法??
if plus:
centroids = init_centroids(boxes, n_anchors)
else:
centroid_indices = np.random.choice(len(boxes), n_anchors)
centroids = []
for centroid_index in centroid_indices:
centroids.append(boxes[centroid_index])
# iterate k-means
centroids, groups, old_loss = do_kmeans(n_anchors, boxes, centroids)
iterations = 1
while (True):
centroids, groups, loss = do_kmeans(n_anchors, boxes, centroids)
iterations = iterations + 1
print("loss = %f" % loss)
#判断loss是不是在期望的阈值内,或者说已经收敛了
if abs(old_loss - loss) < loss_convergence or iterations > iterations_num:
break
old_loss = loss
for centroid in centroids:
print(centroid.w * grid_size, centroid.h * grid_size)
# print result
for centroid in centroids:
print("k-means result:\n")
print(centroid.w * grid_size, centroid.h * grid_size)
label_path = "/raid/pengchong_data/Data/Lists/paul_train.txt"
n_anchors = 5
loss_convergence = 1e-6
grid_size = 13
iterations_num = 100
plus = 0
compute_centroids(label_path,n_anchors,loss_convergence,grid_size,iterations_num,plus)