大家在使用yolov5的过程中,会遇到生成对应的目标txt文件,这里提供一份代码,主要是想提供一下当我们在使用opencv读取图像label的过程中,如何正确的将坐标转化为yolov5所需的坐标,并且在这个处理过程中,肯定会涉及图像连通域的判断
import os
import shutil
import cv2 as cv
def dir_process(path):
if not os.path.exists(path):
os.mkdir(path)
else:
shutil.rmtree(path)
os.mkdir(path)
return path + '/'
def parse_bmp_lists(path, suffix='bmp'):
bmp_files = []
ids = []
files = os.listdir(path)
for file in files:
if file.endswith(suffix):
bmp_files.append(os.path.join(path, file))
ids.append(file)
ids_seg = int(1 * len(bmp_files))
train_path = bmp_files[:ids_seg]
val_path = bmp_files[ids_seg + 1:]
return train_path, val_path, ids, ids_seg
def convert(size_h, size_w, x, y, w, h):
'''
将标注的xml文件标注转换为darknet形的坐标
'''
dw = 1. / (size_w)
dh = 1. / (size_h)
cent_x = (x + x+w) / 2.0
cent_y = (y + y+h) / 2.0
cent_w = w
cent_h = h
cent_x = cent_x * dw
cent_w = cent_w * dw
cent_y = cent_y * dh
cent_h = cent_h * dh
return cent_x, cent_y, cent_w, cent_h
train_val_bmp_path = 'G:/SegPC_2021_Train_data/x'
train_val_bound_path = 'G:/SegPC_2021_Train_data/y'
train_path, val_path, ids, ids_seg = parse_bmp_lists(train_val_bmp_path)
_, _, bound_ids, _ = parse_bmp_lists(train_val_bound_path)
train_label_path = 'G:/SegPC_2021_Train_data/datasets/score/labels/train'
val_label_path = 'G:/SegPC_2021_Train_data/datasets/score/labels/val'
train_images_path = 'G:/SegPC_2021_Train_data/datasets/score/images/train'
val_images_path = 'G:/SegPC_2021_Train_data/datasets/score/images/val'
dir_process(train_label_path)
dir_process(val_label_path)
dir_process(train_images_path)
dir_process(val_images_path)
for i, id in enumerate(ids):
img_name = id.replace('bmp', 'jpg')
txt_name = id.replace('bmp', 'txt')
if i <= ids_seg:
print('process file: %s-->%s' % (img_name, txt_name))
train_label_img_path = os.path.join(train_images_path, img_name)
train_label_txt_path = os.path.join(train_label_path, txt_name)
train_label_txt = open(train_label_txt_path, "a")
x_image = cv.imread(train_path[i])
x_image = cv.resize(x_image, (1024, 1024))
size_h, size_w = x_image.shape[:2]
cv.imwrite(train_label_img_path, x_image)
j = 0
for j in range(20):
bound_id = id[:-4]+'_'+str(j+1)+'.bmp'
if bound_id in bound_ids:
y_bmp_path = os.path.join(train_val_bound_path, bound_id)
y_image = cv.imread(y_bmp_path)
y_image = cv.resize(y_image, (1024, 1024))
gray = cv.cvtColor(y_image, cv.COLOR_BGR2GRAY)
bin_img = cv.threshold(gray, 10, 255, cv.THRESH_BINARY)[1]
contours, _ = cv.findContours(bin_img, cv.RETR_TREE, cv.CHAIN_APPROX_SIMPLE)
for contour in contours:
area = cv.contourArea(contour)
if area >= 2000:
x, y, w, h = cv.boundingRect(contour)
cent_x, cent_y, cent_w, cent_h = convert(size_h, size_w, x, y, w, h)
cls_id = 0
b = (float(cent_x), float(cent_y), float(cent_w), float(cent_h))
train_label_txt.write(str(cls_id) + " " + " ".join([str(a) for a in b]) + '\n')
else:
continue
else:
continue
else:
pass
再提供一份当我们需要对图像进行旋转时,对应的坐标变换,这里有一个点是,旋转后图像尺寸会发生改变,因为输入网络前得统一尺寸,在转化过程中需要做一个缩放
from PIL import Image
import numpy as np
import imutils
import os
import shutil
import cv2 as cv
import random
def dir_process(path):
if not os.path.exists(path):
os.mkdir(path)
else:
shutil.rmtree(path)
os.mkdir(path)
return path + '/'
def parse_bmp_lists(path, suffix='jpg'):
bmp_files = []
ids = []
files = os.listdir(path)
for file in files:
if file.endswith(suffix):
bmp_files.append(os.path.join(path, file))
ids.append(file[:-4])
return bmp_files, ids
def convert(size_h, size_w, x, y, w, h):
'''
将标注的xml文件标注转换为darknet形的坐标
'''
dw = 1. / (size_w)
dh = 1. / (size_h)
cent_x = (x + x + w) / 2.0
cent_y = (y + y + h) / 2.0
cent_w = w
cent_h = h
cent_x = cent_x * dw
cent_w = cent_w * dw
cent_y = cent_y * dh
cent_h = cent_h * dh
return cent_x, cent_y, cent_w, cent_h
def convert_(size_h, size_w, cent_x, cent_y, cent_w, cent_h):
dw = 1. * (size_w)
dh = 1. * (size_h)
cent_x = cent_x * dw
cent_w = cent_w * dw
cent_y = cent_y * dh
cent_h = cent_h * dh
w = cent_w
h = cent_h
x = (cent_x * 2 - w) / 2.0
y = (cent_y * 2 - h) / 2.0
return x, y, w, h
def rotate_box(box, M, shape):
rote = 1024/shape[0]
# print(box)
y1, x1, y2, x2 = box
p1 = np.array([x1, y1, 1]).reshape((3, 1))
p2 = np.array([x1, y2, 1]).reshape((3, 1))
p3 = np.array([x2, y2, 1]).reshape((3, 1))
p4 = np.array([x2, y1, 1]).reshape((3, 1))
p1 = np.matmul(M, p1)
p2 = np.matmul(M, p2)
p3 = np.matmul(M, p3)
p4 = np.matmul(M, p4)
x1 = np.min([p1[0, 0], p2[0, 0], p3[0, 0], p4[0, 0]])
x2 = np.max([p1[0, 0], p2[0, 0], p3[0, 0], p4[0, 0]])
y1 = np.min([p1[1, 0], p2[1, 0], p3[1, 0], p4[1, 0]])
y2 = np.max([p1[1, 0], p2[1, 0], p3[1, 0], p4[1, 0]])
if x1 < 0:
x1 = 0
if x1 > shape[1]:
x1 = shape[1] - 1
if x2 < 0:
x2 = 0
if x2 > shape[1]:
x2 = shape[1] - 1
if y1 < 0:
y1 = 0
if y1 > shape[0]:
y1 = shape[0] - 1
if y2 < 0:
y2 = 0
if y2 > shape[0]:
y2 = shape[0] - 1
box = [y1*rote, x1*rote, y2*rote, x2*rote]
# print(box)
# print('--------------')
return box
def random_rotate(cell, boxes, angle=45):
# boxes y, x, y+h, x+w
(h, w, c) = cell.shape
background = cell.copy()[0, 1, 0]
(cX, cY) = (w // 2, h // 2)
new_cell = imutils.rotate_bound(cell.astype('uint8'), angle)
# new_cell = cv.resize(new_cell, (1024, 1024))
M = cv.getRotationMatrix2D((cX, cY), -angle, 1.0)
cos = np.abs(M[0, 0])
sin = np.abs(M[0, 1])
# compute the new bounding dimensions of the image
nW = int((h * sin) + (w * cos))
nH = int((h * cos) + (w * sin))
# nW = 1024
# nH = 1024
# adjust the rotation matrix to take into account translation
M[0, 2] += (nW / 2) - cX
M[1, 2] += (nH / 2) - cY
new_boxes = []
for i in range(len(boxes)):
new_boxes.append(rotate_box(boxes[i], M, new_cell.shape))
if len(new_boxes) > 0:
new_boxes = np.array(new_boxes)
# new_cell = Image.fromarray(new_cell.astype('uint8')).convert('RGB')
# new_cell = cv.cvtColor(np.asarray(new_cell), cv.COLOR_RGB2BGR)
new_cell = np.where(new_cell == 0, background, new_cell)
return new_cell, new_boxes
train_path = 'G:/SegPC_2021_Train_data/score/images/train'
label_path = 'G:/SegPC_2021_Train_data/score/labels/train'
train_rotate_path = 'G:/SegPC_2021_Train_data/img_rotate'
label_rotate_path = 'G:/SegPC_2021_Train_data/lab_rotate'
dir_process(train_rotate_path)
dir_process(label_rotate_path)
bmp_files, ids = parse_bmp_lists(train_path)
for i, id in enumerate(ids):
print('process_file:', id)
angle_random = random.randint(1, 90)
x_jpg_path = os.path.join(train_rotate_path, id + '_' + str(angle_random) + '.jpg')
y_txt_path = os.path.join(label_rotate_path, id + '_' + str(angle_random) + '.txt')
train_label_txt = open(y_txt_path, "a")
x_image = cv.imread(bmp_files[i])
txt_path = os.path.join(label_path, id + '.txt')
boxes = []
with open(txt_path, "r") as f:
for line in f.readlines():
cent_x = float(line.split(' ')[1])
cent_y = float(line.split(' ')[2])
cent_w = float(line.split(' ')[3])
cent_h = float(line.split(' ')[4][:-1])
x, y, w, h = convert_(1024, 1024, cent_x, cent_y, cent_w, cent_h)
boxes.append([y, x, y + h, x + w])
box = np.array(boxes)
new_img, new_boxes = random_rotate(x_image, boxes, angle=angle_random)
new_img = cv.resize(new_img, (1024, 1024))
cv.imwrite(x_jpg_path, new_img)
for i, new_boxe in enumerate(new_boxes):
x, y, w, h = new_boxe[1], new_boxe[0], new_boxe[3]-new_boxe[1], new_boxe[2] - new_boxe[0]
cent_x, cent_y, cent_w, cent_h = convert(1024, 1024, x, y, w, h)
print(cent_x, cent_y, cent_w, cent_h)
print('--------------')
cls_id = 0
b = (float(cent_x), float(cent_y), float(cent_w), float(cent_h))
train_label_txt.write(str(cls_id) + " " + " ".join([str(a) for a in b]) + '\n')
train_label_txt.close()
在yolo系列的训练过程中,anchor的生成也是关键的一步,亲测会提升模型的效果,这里也提供一份用kmeans生成anchor的代码
import os
import cv2 as cv
import numpy as np
import xml.etree.ElementTree as ET
import glob
import random
def cas_iou(box, cluster):
x = np.minimum(cluster[:, 0], box[0])
y = np.minimum(cluster[:, 1], box[1])
intersection = x * y
area1 = box[0] * box[1]
area2 = cluster[:, 0] * cluster[:, 1]
iou = intersection / (area1 + area2 - intersection)
return iou
def avg_iou(box, cluster):
return np.mean([np.max(cas_iou(box[i], cluster)) for i in range(box.shape[0])])
def kmeans(box, k):
# 取出一共有多少框
row = box.shape[0]
# 每个框各个点的位置
distance = np.empty((row, k))
# 最后的聚类位置
last_clu = np.zeros((row,))
np.random.seed()
# 随机选5个当聚类中心
cluster = box[np.random.choice(row, k, replace=False)]
# cluster = random.sample(row, k)
while True:
# 计算每一行距离五个点的iou情况。
for i in range(row):
distance[i] = 1 - cas_iou(box[i], cluster)
# 取出最小点
near = np.argmin(distance, axis=1)
if (last_clu == near).all():
break
# 求每一个类的中位点
for j in range(k):
cluster[j] = np.median(
box[near == j], axis=0)
last_clu = near
return cluster
def load_data(path):
data = []
# 对于每一个xml都寻找box
for xml_file in glob.glob('{}/*xml'.format(path)):
tree = ET.parse(xml_file)
height = int(tree.findtext('./size/height'))
width = int(tree.findtext('./size/width'))
# 对于每一个目标都获得它的宽高
for obj in tree.iter('object'):
xmin = int(float(obj.findtext('bndbox/xmin'))) / width
ymin = int(float(obj.findtext('bndbox/ymin'))) / height
xmax = int(float(obj.findtext('bndbox/xmax'))) / width
ymax = int(float(obj.findtext('bndbox/ymax'))) / height
xmin = np.float64(xmin)
ymin = np.float64(ymin)
xmax = np.float64(xmax)
ymax = np.float64(ymax)
# 得到宽高
data.append([xmax - xmin, ymax - ymin])
return np.array(data)
def parse_bmp_lists(path, suffix='bmp'):
bmp_files = []
ids = []
files = os.listdir(path)
for file in files:
if file.endswith(suffix):
bmp_files.append(os.path.join(path, file))
ids.append(file[:-4])
return bmp_files, ids
def load_bound(path):
data = []
width = 1024
height = 1024
_, y_ids = parse_bmp_lists(path)
for i, id in enumerate(y_ids):
# 读取路径
y_bmp_path = os.path.join(path, id + '.bmp')
# 保存路径
y_image = cv.imread(y_bmp_path)
y_image = cv.resize(y_image, (1024, 1024))
gray = cv.cvtColor(y_image, cv.COLOR_BGR2GRAY)
bin_img = cv.threshold(gray, 10, 255, cv.THRESH_BINARY)[1]
contours, _ = cv.findContours(bin_img, cv.RETR_TREE, cv.CHAIN_APPROX_SIMPLE)
for contour in contours:
area = cv.contourArea(contour)
if area >= 2000:
x, y, w, h = cv.boundingRect(contour)
xmin = int(x) / width
ymin = int(y) / height
xmax = int(x+w) / width
ymax = int(y+h) / height
xmin = np.float64(xmin)
ymin = np.float64(ymin)
xmax = np.float64(xmax)
ymax = np.float64(ymax)
# 得到宽高
data.append([xmax - xmin, ymax - ymin])
return np.array(data)
if __name__ == '__main__':
# 运行该程序会计算'./VOCdevkit/VOC2007/Annotations'的xml
# 会生成yolo_anchors.txt
SIZE = 1024
anchors_num = 9
# 载入数据集,可以使用VOC的xml
# path = r'./VOCdevkit/VOC2007/Annotations'
train_val_bound_path = 'G:/SegPC_2021_Train_data/y'
# 载入所有的xml
# 存储格式为转化为比例后的width,height
data = load_bound(train_val_bound_path)
# 使用k聚类算法
out = kmeans(data, anchors_num)
out = out[np.argsort(out[:, 0])]
print('acc:{:.2f}%'.format(avg_iou(data, out) * 100))
print(out * SIZE)
data = out * SIZE
f = open("yolo_anchors.txt", 'w')
row = np.shape(data)[0]
for i in range(row):
if i == 0:
x_y = "%d,%d" % (data[i][0], data[i][1])
else:
x_y = ", %d,%d" % (data[i][0], data[i][1])
f.write(x_y)
f.close()