问题的提出
- yolov3 默认的9个anchors是在 Pascol VOC 数据集上聚类产生的,由于更换了自己的数据集,便想尝试聚类出自己的 anchors 对网络的精度是否有提升
anchors 获得方法的比较
- Faster RCNN 的9个 anchors 是大佬们根据工程经验手动设置的,三个尺度三个比例
- yolov3 的anchors是依靠 K-means 自动聚类生成的
实现思路
- 将标注信息中的 bbox 的大小(w, h)提取至列表中
- 将 boxes 列表输入 K-means 聚类出9个 anchors
- 将 anchors 写入 txt 文件
代码实现
import xml.etree.ElementTree as ET
import numpy as np
import os
def load_dataset(root, dirlist):
dataset = []
for dirname in dirlist:
tree = ET.parse(os.path.join(root, dirname))
height = int(tree.findtext("./size/height"))
width = int(tree.findtext("./size/width"))
for obj in tree.iter("object"):
# 转换成相对坐标
xmin = int(obj.findtext("bndbox/xmin")) / width
ymin = int(obj.findtext("bndbox/ymin")) / height
xmax = int(obj.findtext("bndbox/xmax")) / width
ymax = int(obj.findtext("bndbox/ymax")) / height
dataset.append([xmax - xmin, ymax - ymin])
return np.array(dataset)
def iou(box, clusters):
x = np.minimum(clusters[:, 0], box[0])
y = np.minimum(clusters[:, 1], box[1])
if np.count_nonzero(x == 0) > 0 or np.count_nonzero(y == 0) > 0:
raise ValueError("Box has no area")
# 相交部分的面积
intersection = x * y
box_area = box[0] * box[1]
cluster_area = clusters[:, 0] * clusters[:, 1]
iou_ = intersection / (box_area + cluster_area - intersection)
return iou_
def avg_iou(boxes, clusters):
return np.mean([np.max(iou(boxes[i], clusters)) for i in range(boxes.shape[0])])
def kmeans(boxes, k, dist=np.median):
rows = boxes.shape[0]
distances = np.empty((rows, k))
last_clusters = np.zeros((rows,))
np.random.seed()
clusters = boxes[np.random.choice(rows, k, replace=False)]
while True:
for row in range(rows):
distances[row] = 1 - iou(boxes[row], clusters)
nearest_clusters = np.argmin(distances, axis=1)
if (last_clusters == nearest_clusters).all():
break
for cluster in range(k):
clusters[cluster] = dist(boxes[nearest_clusters == cluster], axis=0)
last_clusters = nearest_clusters
return clusters
def main():
root_path = "./annotation"
filename = os.listdir(root_path)
data = load_dataset(root_path, filename)
print(data.shape)
# print(data)
out = kmeans(data, k=9)
# 512 为 input_size
anchors = 512 * out
with open("anchors.txt", "w") as w:
w.write(str(anchors) + "\n")
# 聚类的评价指标
w.write("Accuracy:" + str(avg_iou(data, out)))
print("9 Clusters is: ")
print(anchors)
print("Accuracy: {:.2f}%".format(avg_iou(data, out) * 100))
if __name__ == "__main__":
main()
结果及分析
anchors | width | height |
---|---|---|
anchor 1 | 26.5974026 | 30.72 |
anchor 2 | 3.34883721 | 107.52 |
anchor 3 | 188.9298893 | 245.76 |
anchor 4 | 61.44 | 71.68 |
anchor 5 | 40.96 | 53.76 |
anchor 6 | 100.07272727 | 143.36 |
anchor 7 | 292.81605351 | 366.08 |
anchor 8 | 135.01369863 | 181.76 |
anchor 9 | 134.26709265 | 97.28 |
- mAP(0.5)反而下降了0.8%,可能是数据集大小的影响,毕竟 Pascol VOC 数据量还是挺大的
– 自己数据集的有效标注目标个数 2445,聚类精度 78.41%
– Pascol VOC 有效标注目标个数 33502, 聚类精度 67.45% - 由此可以看出,目标个数的影响大于聚类的影响