生成yolov5所需的目标文件

大家在使用yolov5的过程中,会遇到生成对应的目标txt文件,这里提供一份代码,主要是想提供一下当我们在使用opencv读取图像label的过程中,如何正确的将坐标转化为yolov5所需的坐标,并且在这个处理过程中,肯定会涉及图像连通域的判断

import os
import shutil
import cv2 as cv


def dir_process(path):
    if not os.path.exists(path):
        os.mkdir(path)
    else:
        shutil.rmtree(path)
        os.mkdir(path)
    return path + '/'


def parse_bmp_lists(path, suffix='bmp'):
    bmp_files = []
    ids = []
    files = os.listdir(path)
    for file in files:
        if file.endswith(suffix):
            bmp_files.append(os.path.join(path, file))
            ids.append(file)
    ids_seg = int(1 * len(bmp_files))
    train_path = bmp_files[:ids_seg]
    val_path = bmp_files[ids_seg + 1:]
    return train_path, val_path, ids, ids_seg


def convert(size_h, size_w, x, y, w, h):
    '''
    将标注的xml文件标注转换为darknet形的坐标
    '''
    dw = 1. / (size_w)
    dh = 1. / (size_h)
    cent_x = (x + x+w) / 2.0
    cent_y = (y + y+h) / 2.0
    cent_w = w
    cent_h = h
    cent_x = cent_x * dw
    cent_w = cent_w * dw
    cent_y = cent_y * dh
    cent_h = cent_h * dh
    return cent_x, cent_y, cent_w, cent_h


train_val_bmp_path = 'G:/SegPC_2021_Train_data/x'
train_val_bound_path = 'G:/SegPC_2021_Train_data/y'
train_path, val_path, ids, ids_seg = parse_bmp_lists(train_val_bmp_path)
_, _, bound_ids, _ = parse_bmp_lists(train_val_bound_path)
train_label_path = 'G:/SegPC_2021_Train_data/datasets/score/labels/train'
val_label_path = 'G:/SegPC_2021_Train_data/datasets/score/labels/val'
train_images_path = 'G:/SegPC_2021_Train_data/datasets/score/images/train'
val_images_path = 'G:/SegPC_2021_Train_data/datasets/score/images/val'
dir_process(train_label_path)
dir_process(val_label_path)
dir_process(train_images_path)
dir_process(val_images_path)

for i, id in enumerate(ids):
    img_name = id.replace('bmp', 'jpg')
    txt_name = id.replace('bmp', 'txt')
    if i <= ids_seg:
        print('process file: %s-->%s' % (img_name, txt_name))
        train_label_img_path = os.path.join(train_images_path, img_name)
        train_label_txt_path = os.path.join(train_label_path, txt_name)
        train_label_txt = open(train_label_txt_path, "a")
        x_image = cv.imread(train_path[i])
        x_image = cv.resize(x_image, (1024, 1024))
        size_h, size_w = x_image.shape[:2]
        cv.imwrite(train_label_img_path, x_image)
        j = 0
        for j in range(20):
            bound_id = id[:-4]+'_'+str(j+1)+'.bmp'
            if bound_id in bound_ids:
                y_bmp_path = os.path.join(train_val_bound_path, bound_id)
                y_image = cv.imread(y_bmp_path)
                y_image = cv.resize(y_image, (1024, 1024))
                gray = cv.cvtColor(y_image, cv.COLOR_BGR2GRAY)
                bin_img = cv.threshold(gray, 10, 255, cv.THRESH_BINARY)[1]
                contours, _ = cv.findContours(bin_img, cv.RETR_TREE, cv.CHAIN_APPROX_SIMPLE)
                for contour in contours:
                    area = cv.contourArea(contour)
                    if area >= 2000:
                        x, y, w, h = cv.boundingRect(contour)
                        cent_x, cent_y, cent_w, cent_h = convert(size_h, size_w, x, y, w, h)
                        cls_id = 0
                        b = (float(cent_x), float(cent_y), float(cent_w), float(cent_h))
                        train_label_txt.write(str(cls_id) + " " + " ".join([str(a) for a in b]) + '\n')
                    else:
                        continue
            else:
                continue
    else:
        pass

再提供一份当我们需要对图像进行旋转时,对应的坐标变换,这里有一个点是,旋转后图像尺寸会发生改变,因为输入网络前得统一尺寸,在转化过程中需要做一个缩放

from PIL import Image
import numpy as np
import imutils
import os
import shutil
import cv2 as cv
import random


def dir_process(path):
    if not os.path.exists(path):
        os.mkdir(path)
    else:
        shutil.rmtree(path)
        os.mkdir(path)
    return path + '/'


def parse_bmp_lists(path, suffix='jpg'):
    bmp_files = []
    ids = []
    files = os.listdir(path)
    for file in files:
        if file.endswith(suffix):
            bmp_files.append(os.path.join(path, file))
            ids.append(file[:-4])
    return bmp_files, ids


def convert(size_h, size_w, x, y, w, h):
    '''
    将标注的xml文件标注转换为darknet形的坐标
    '''
    dw = 1. / (size_w)
    dh = 1. / (size_h)
    cent_x = (x + x + w) / 2.0
    cent_y = (y + y + h) / 2.0
    cent_w = w
    cent_h = h
    cent_x = cent_x * dw
    cent_w = cent_w * dw
    cent_y = cent_y * dh
    cent_h = cent_h * dh
    return cent_x, cent_y, cent_w, cent_h


def convert_(size_h, size_w, cent_x, cent_y, cent_w, cent_h):
    dw = 1. * (size_w)
    dh = 1. * (size_h)
    cent_x = cent_x * dw
    cent_w = cent_w * dw
    cent_y = cent_y * dh
    cent_h = cent_h * dh
    w = cent_w
    h = cent_h
    x = (cent_x * 2 - w) / 2.0
    y = (cent_y * 2 - h) / 2.0
    return x, y, w, h


def rotate_box(box, M, shape):
    rote = 1024/shape[0]
    # print(box)
    y1, x1, y2, x2 = box
    p1 = np.array([x1, y1, 1]).reshape((3, 1))
    p2 = np.array([x1, y2, 1]).reshape((3, 1))
    p3 = np.array([x2, y2, 1]).reshape((3, 1))
    p4 = np.array([x2, y1, 1]).reshape((3, 1))
    p1 = np.matmul(M, p1)
    p2 = np.matmul(M, p2)
    p3 = np.matmul(M, p3)
    p4 = np.matmul(M, p4)
    x1 = np.min([p1[0, 0], p2[0, 0], p3[0, 0], p4[0, 0]])
    x2 = np.max([p1[0, 0], p2[0, 0], p3[0, 0], p4[0, 0]])
    y1 = np.min([p1[1, 0], p2[1, 0], p3[1, 0], p4[1, 0]])
    y2 = np.max([p1[1, 0], p2[1, 0], p3[1, 0], p4[1, 0]])
    if x1 < 0:
        x1 = 0
    if x1 > shape[1]:
        x1 = shape[1] - 1
    if x2 < 0:
        x2 = 0
    if x2 > shape[1]:
        x2 = shape[1] - 1

    if y1 < 0:
        y1 = 0
    if y1 > shape[0]:
        y1 = shape[0] - 1
    if y2 < 0:
        y2 = 0
    if y2 > shape[0]:
        y2 = shape[0] - 1

    box = [y1*rote, x1*rote, y2*rote, x2*rote]
    # print(box)
    # print('--------------')
    return box


def random_rotate(cell, boxes, angle=45):
    # boxes y, x, y+h, x+w
    (h, w, c) = cell.shape
    background = cell.copy()[0, 1, 0]
    (cX, cY) = (w // 2, h // 2)
    new_cell = imutils.rotate_bound(cell.astype('uint8'), angle)
    # new_cell = cv.resize(new_cell, (1024, 1024))
    M = cv.getRotationMatrix2D((cX, cY), -angle, 1.0)
    cos = np.abs(M[0, 0])
    sin = np.abs(M[0, 1])

    # compute the new bounding dimensions of the image
    nW = int((h * sin) + (w * cos))
    nH = int((h * cos) + (w * sin))

    # nW = 1024
    # nH = 1024
    # adjust the rotation matrix to take into account translation
    M[0, 2] += (nW / 2) - cX
    M[1, 2] += (nH / 2) - cY
    new_boxes = []
    for i in range(len(boxes)):
        new_boxes.append(rotate_box(boxes[i], M, new_cell.shape))
    if len(new_boxes) > 0:
        new_boxes = np.array(new_boxes)
    # new_cell = Image.fromarray(new_cell.astype('uint8')).convert('RGB')
    # new_cell = cv.cvtColor(np.asarray(new_cell), cv.COLOR_RGB2BGR)
    new_cell = np.where(new_cell == 0, background, new_cell)
    return new_cell, new_boxes


train_path = 'G:/SegPC_2021_Train_data/score/images/train'
label_path = 'G:/SegPC_2021_Train_data/score/labels/train'
train_rotate_path = 'G:/SegPC_2021_Train_data/img_rotate'
label_rotate_path = 'G:/SegPC_2021_Train_data/lab_rotate'
dir_process(train_rotate_path)
dir_process(label_rotate_path)
bmp_files, ids = parse_bmp_lists(train_path)
for i, id in enumerate(ids):
    print('process_file:', id)
    angle_random = random.randint(1, 90)
    x_jpg_path = os.path.join(train_rotate_path, id + '_' + str(angle_random) + '.jpg')
    y_txt_path = os.path.join(label_rotate_path, id + '_' + str(angle_random) + '.txt')
    train_label_txt = open(y_txt_path, "a")
    x_image = cv.imread(bmp_files[i])
    txt_path = os.path.join(label_path, id + '.txt')
    boxes = []
    with open(txt_path, "r") as f:
        for line in f.readlines():
            cent_x = float(line.split(' ')[1])
            cent_y = float(line.split(' ')[2])
            cent_w = float(line.split(' ')[3])
            cent_h = float(line.split(' ')[4][:-1])
            x, y, w, h = convert_(1024, 1024, cent_x, cent_y, cent_w, cent_h)
            boxes.append([y, x, y + h, x + w])
    box = np.array(boxes)
    new_img, new_boxes = random_rotate(x_image, boxes, angle=angle_random)
    new_img = cv.resize(new_img, (1024, 1024))
    cv.imwrite(x_jpg_path, new_img)
    for i, new_boxe in enumerate(new_boxes):
        x, y, w, h = new_boxe[1], new_boxe[0], new_boxe[3]-new_boxe[1], new_boxe[2] - new_boxe[0]
        cent_x, cent_y, cent_w, cent_h = convert(1024, 1024, x, y, w, h)
        print(cent_x, cent_y, cent_w, cent_h)
        print('--------------')
        cls_id = 0
        b = (float(cent_x), float(cent_y), float(cent_w), float(cent_h))
        train_label_txt.write(str(cls_id) + " " + " ".join([str(a) for a in b]) + '\n')
    train_label_txt.close()

 在yolo系列的训练过程中,anchor的生成也是关键的一步,亲测会提升模型的效果,这里也提供一份用kmeans生成anchor的代码

import os
import cv2 as cv
import numpy as np
import xml.etree.ElementTree as ET
import glob
import random


def cas_iou(box, cluster):
    x = np.minimum(cluster[:, 0], box[0])
    y = np.minimum(cluster[:, 1], box[1])

    intersection = x * y
    area1 = box[0] * box[1]

    area2 = cluster[:, 0] * cluster[:, 1]
    iou = intersection / (area1 + area2 - intersection)

    return iou


def avg_iou(box, cluster):
    return np.mean([np.max(cas_iou(box[i], cluster)) for i in range(box.shape[0])])


def kmeans(box, k):
    # 取出一共有多少框
    row = box.shape[0]

    # 每个框各个点的位置
    distance = np.empty((row, k))

    # 最后的聚类位置
    last_clu = np.zeros((row,))

    np.random.seed()

    # 随机选5个当聚类中心
    cluster = box[np.random.choice(row, k, replace=False)]
    # cluster = random.sample(row, k)
    while True:
        # 计算每一行距离五个点的iou情况。
        for i in range(row):
            distance[i] = 1 - cas_iou(box[i], cluster)

        # 取出最小点
        near = np.argmin(distance, axis=1)

        if (last_clu == near).all():
            break

        # 求每一个类的中位点
        for j in range(k):
            cluster[j] = np.median(
                box[near == j], axis=0)

        last_clu = near

    return cluster


def load_data(path):
    data = []
    # 对于每一个xml都寻找box
    for xml_file in glob.glob('{}/*xml'.format(path)):
        tree = ET.parse(xml_file)
        height = int(tree.findtext('./size/height'))
        width = int(tree.findtext('./size/width'))
        # 对于每一个目标都获得它的宽高
        for obj in tree.iter('object'):
            xmin = int(float(obj.findtext('bndbox/xmin'))) / width
            ymin = int(float(obj.findtext('bndbox/ymin'))) / height
            xmax = int(float(obj.findtext('bndbox/xmax'))) / width
            ymax = int(float(obj.findtext('bndbox/ymax'))) / height

            xmin = np.float64(xmin)
            ymin = np.float64(ymin)
            xmax = np.float64(xmax)
            ymax = np.float64(ymax)
            # 得到宽高
            data.append([xmax - xmin, ymax - ymin])
    return np.array(data)


def parse_bmp_lists(path, suffix='bmp'):
    bmp_files = []
    ids = []
    files = os.listdir(path)
    for file in files:
        if file.endswith(suffix):
            bmp_files.append(os.path.join(path, file))
            ids.append(file[:-4])
    return bmp_files, ids


def load_bound(path):
    data = []
    width = 1024
    height = 1024
    _, y_ids = parse_bmp_lists(path)
    for i, id in enumerate(y_ids):
        # 读取路径
        y_bmp_path = os.path.join(path, id + '.bmp')
        # 保存路径
        y_image = cv.imread(y_bmp_path)
        y_image = cv.resize(y_image, (1024, 1024))
        gray = cv.cvtColor(y_image, cv.COLOR_BGR2GRAY)
        bin_img = cv.threshold(gray, 10, 255, cv.THRESH_BINARY)[1]
        contours, _ = cv.findContours(bin_img, cv.RETR_TREE, cv.CHAIN_APPROX_SIMPLE)
        for contour in contours:
            area = cv.contourArea(contour)
            if area >= 2000:
                x, y, w, h = cv.boundingRect(contour)
                xmin = int(x) / width
                ymin = int(y) / height
                xmax = int(x+w) / width
                ymax = int(y+h) / height

                xmin = np.float64(xmin)
                ymin = np.float64(ymin)
                xmax = np.float64(xmax)
                ymax = np.float64(ymax)
                # 得到宽高
                data.append([xmax - xmin, ymax - ymin])
    return np.array(data)


if __name__ == '__main__':
    # 运行该程序会计算'./VOCdevkit/VOC2007/Annotations'的xml
    # 会生成yolo_anchors.txt
    SIZE = 1024
    anchors_num = 9
    # 载入数据集,可以使用VOC的xml
    # path = r'./VOCdevkit/VOC2007/Annotations'
    train_val_bound_path = 'G:/SegPC_2021_Train_data/y'

    # 载入所有的xml
    # 存储格式为转化为比例后的width,height
    data = load_bound(train_val_bound_path)

    # 使用k聚类算法
    out = kmeans(data, anchors_num)
    out = out[np.argsort(out[:, 0])]
    print('acc:{:.2f}%'.format(avg_iou(data, out) * 100))
    print(out * SIZE)
    data = out * SIZE
    f = open("yolo_anchors.txt", 'w')
    row = np.shape(data)[0]
    for i in range(row):
        if i == 0:
            x_y = "%d,%d" % (data[i][0], data[i][1])
        else:
            x_y = ", %d,%d" % (data[i][0], data[i][1])
        f.write(x_y)
    f.close()

 

 

  • 0
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

GlassySky0816

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值