yolo:you only look once.(只看一次,速度快)
数据处理
筛选建议框(根据目标筛选形状和大小)
在数据集中有一堆框,用 K-Means 聚类算法找到 9 个框,代表整个数据集。
IMG_HEIGHT = 416
IMG_WIDTH = 416
CLASS_NUM = 10
ANCHORS_GROUP = {
13: [[51, 22], [52, 22], [53, 22]],
26: [[54, 22], [55, 22], [56, 22]],
52: [[57, 22], [58, 22], [59, 22]]
}
ANCHORS_GROUP_AREA = {
13: [x * y for x, y in ANCHORS_GROUP[13]],
26: [x * y for x, y in ANCHORS_GROUP[26]],
52: [x * y for x, y in ANCHORS_GROUP[52]],
}
设计标签
(cls,cx,cy,w,h)→(cls,cx_offset,cy_offset,w_p,h_p)
中心点:与缩放比例有关。
cx_offset = cx / 缩放比例 = 前边有几个格子.cx 相对于当前格子的偏移量
cy_offset = cy / 缩放比例 = 前边有几个格子.cy 相对于当前格子的偏移量
宽高:与建议框有关。
w_p = log(实际w / 建议w)
h_p = log(实际h / 建议h)
from torch.utils.data import Dataset
from torchvision import transforms
from PIL import Image
import numpy as np
import os
import cfg
def one_hot(cls_num, v):
result = np.zeros(cls_num)
result[v] = 1.
return result
class MyDataset(Dataset):
def __init__(self, path, label_path):
self.path = path
self.transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])
with open(label_path) as label:
self.data = label.readlines()
def __len__(self):
return len(self.data)
def __getitem__(self, index):
# key:13, 26, 52
# value:[13, 13, 3, 15] [26, 26, 3, 15] [52, 52, 3, 15]
labels = {
}
# img1 cls1 cx1 cy1 w1 h1 img2 cls2 cx2 cy2 w2 h2
infos = self.data[index].strip().split(" ")
img = Image.open(os.path.join(self.path, infos[0]))
img = self.transform(img)
# cls1 cx1 cy1 w1 h1 cls2 cx2 cy2 w2 h2
boxes = np.array([float(box) for box in infos[1:]]</