keras-yolo3项目之四：kmeans.py注释

最新推荐文章于 2023-04-08 23:07:01 发布

great-wind

最新推荐文章于 2023-04-08 23:07:01 发布

阅读量650

点赞数

分类专栏：深度学习

本文链接：https://blog.csdn.net/csdn1e/article/details/109206700

版权

深度学习专栏收录该内容

21 篇文章 1 订阅

订阅专栏

kmeans.py文件主要是对训练数据集的标注框进行聚类，最终输出9个标注框，并将聚类好的9个标注框存储在yolo_anchors.txt文件中。

import numpy as np


class YOLO_Kmeans:
    """
    YOLO_Kmeans聚类模型
    """

    def __init__(self, cluster_number, filename):
        """
        初始化参数

        参数介绍：
        cluster_number：类别数量
        filename：文件名
        """
        self.cluster_number = cluster_number
        self.filename = "2012_train.txt"

    def iou(self, boxes, clusters):  # 1 box -> k clusters
        """
        计算面积的交并比
        boxes:标注框，格式：[[width, height],[width, height],...]
        clusters:从boxe中随机选择的标注框,默认9个
        """
        n = boxes.shape[0]    # 标注框的数量
        k = self.cluster_number    # 类别数量

        box_area = boxes[:, 0] * boxes[:, 1]    # width*height 计算标注框的面积,shape:(n,)
        box_area = box_area.repeat(k)   # 扩充数组元素，每个元素重复k次
        box_area = np.reshape(box_area, (n, k))   # shape:(n,k)

        cluster_area = clusters[:, 0] * clusters[:, 1]   #选中的标注框面积,shape:(9,)
        cluster_area = np.tile(cluster_area, [1, n])  #将面积整体(每9个为一组)重复n次,shape:(1,9*n)
        cluster_area = np.reshape(cluster_area, (n, k))

        box_w_matrix = np.reshape(boxes[:, 0].repeat(k), (n, k))    # 所有标注框的宽,shape:(n,k)
        cluster_w_matrix = np.reshape(np.tile(clusters[:, 0], (1, n)), (n, k))  #将选中的标注框的宽,shape:(n,k)
        min_w_matrix = np.minimum(cluster_w_matrix, box_w_matrix)   #取对应位置的最小值

        box_h_matrix = np.reshape(boxes[:, 1].repeat(k), (n, k))
        cluster_h_matrix = np.reshape(np.tile(clusters[:, 1], (1, n)), (n, k))
        min_h_matrix = np.minimum(cluster_h_matrix, box_h_matrix)
        inter_area = np.multiply(min_w_matrix, min_h_matrix)    # 对应位置元素的乘积

        result = inter_area / (box_area + cluster_area - inter_area)
        return result

    def avg_iou(self, boxes, clusters):
        """
        计算交并比的平均值
        """
        accuracy = np.mean([np.max(self.iou(boxes, clusters), axis=1)])
        return accuracy

    def kmeans(self, boxes, k, dist=np.median):
        """
        对标注框进行聚类，默认9类
        boxe：形如[[width, height],[width, height],...]
        k:最终聚成的类别数量，默认9
        """
        box_number = boxes.shape[0]    # 所有标注框的数量
        distances = np.empty((box_number, k))    #生成给定形状的数组
        last_nearest = np.zeros((box_number,))
        np.random.seed()
        # 随机选取k个标注框
        clusters = boxes[np.random.choice(
            box_number, k, replace=False)]  # init k clusters
        while True:

            distances = 1 - self.iou(boxes, clusters)

            current_nearest = np.argmin(distances, axis=1)
            if (last_nearest == current_nearest).all():
                break  # clusters won't change
            for cluster in range(k):
                clusters[cluster] = dist(  # update clusters
                    boxes[current_nearest == cluster], axis=0)

            last_nearest = current_nearest

        return clusters

    def result2txt(self, data):
        """
        聚类结果格式转换
        即，将聚类后的结果存储在文件yolo_anchors.txt中
        """
        f = open("yolo_anchors.txt", 'w')    # 存储聚类结果文件
        row = np.shape(data)[0]
        for i in range(row):
            if i == 0:
                x_y = "%d,%d" % (data[i][0], data[i][1])
            else:
                x_y = ", %d,%d" % (data[i][0], data[i][1])
            f.write(x_y)
        f.close()

    def txt2boxes(self):
        """
        从txt文件中提取标注框，每张图片中的标注框以空格为分隔符
        标注框格式：x_min,y_min,x_max,y_max,class_id;
        返回一个数组，包含每个标注框的宽和高，格式：[[width, height],[width, height],...]
        """
        f = open(self.filename, 'r')
        dataSet = []
        for line in f:
            infos = line.split(" ")
            length = len(infos)
            # 提取标注框
            for i in range(1, length):
                width = int(infos[i].split(",")[2]) - \
                    int(infos[i].split(",")[0])
                height = int(infos[i].split(",")[3]) - \
                    int(infos[i].split(",")[1])
                dataSet.append([width, height])
        result = np.array(dataSet)
        f.close()
        return result

    def txt2clusters(self):
        """
        对txt文件中提取出的标注框进行聚类操作
        """
        all_boxes = self.txt2boxes()    # 提取标注框
        result = self.kmeans(all_boxes, k=self.cluster_number)
        result = result[np.lexsort(result.T[0, None])]
        self.result2txt(result)
        print("K anchors:\n {}".format(result))
        print("Accuracy: {:.2f}%".format(
            self.avg_iou(all_boxes, result) * 100))


if __name__ == "__main__":
    """
    2012_train.txt文件内容格式如下：
    image_file_path box1 box2 ... boxN;
    box格式：x_min,y_min,x_max,y_max,class_id;
    path/to/img1.jpg 50,100,150,200,0 30,50,200,120,3
    path/to/img2.jpg 120,300,250,600,2
    """
    cluster_number = 9
    filename = "2012_train.txt"
    kmeans = YOLO_Kmeans(cluster_number, filename)
    kmeans.txt2clusters()