准备:
pytorch 模型训练程序下载
基于vgg16,weights下载(放在./weights/vgg16_reducedfc.pth)
----------------开始-----------------
Part-1 数据集准备
我采用coco的数据格式,那么什么是coco的格式呢?
很多是没有用的key,最终我的.json文件是这样的
自己能写的这步跳过!数据集制作程序:程序
Part-2 数据集加载相关代码修改
概述:
1-./data/config.py
2-./data/new_coco.py
3- ./data/__init__.py
4-./data/new_coco/coco_labels.txt
5-./data/new_coco/XXX_coco_trainval.json(来源于数据及准备步骤)
step-1: ./data/config.py
HOME = os.path.expanduser("./") # 【改】成你的项目路径
... ...
voc = { ... ... }
coco = { ... ... }
# 【增】 你的数据集配置
mycoco = {
'num_classes': 12 ,# 你的分类数,如果是11种就写12 因为算上背景,少一个会报错
... ... # 其他项自己看着改,我懒我没改
}
step-2: ./data/new_coco.py 【增】创建一个数据集加载文件,复制coco.py的内容,开始修改
COCO_ROOT = osp.join(HOME, 'data/new_coco/')
IMAGES = r' ... ... \Images'
COCO_API = 'PythonAPI'
INSTANCES_SET = 'XXX_coco_trainval.json'
#类别
My_COCO_CLASSES = ('person','dog', ... ...)
class COCOAnnotationTransform(object):
... ...
def __call__(self):
... ...
# label_idx = self.label_map[obj['category_id']]-1
label_idx = self.label_map[obj['category_id']] # 因为我是从0开始标注的,不改报错
... ...
class COCODetection(data.Dataset):
def __init__(self,image_set='随便写', transform=None,target_transform=COCOAnnotationTransform(),dataset_name='随便写'):
sys.path.append(osp.join(COCO_ROOT, COCO_API))
from pycocotools.coco import COCO
self.root = IMAGES # 【改】
self.coco = COCO(osp.join(COCO_ROOT,INSTANCES_SET)) # 【改】
... ...
如果图片是中文路径,
将所有的cv2.imread()
都换成
# 读图片(支持中文路径)->ndarray
def myImread(path):
return cv2.imdecode(np.fromfile(path, dtype=np.uint8), -1)
step-3: ./data/__init__.py 全部注释掉,换上你新创建的XXX_coco.py
#from .voc0712 import VOCDetection, VOCAnnotationTransform, VOC_CLASSES, VOC_ROOT
#from .coco import COCODetection, COCOAnnotationTransform, COCO_CLASSES, COCO_ROOT, get_label_map
from .new_coco import COCODetection, COCOAnnotationTransform, XXX_COCO_CLASSES, COCO_ROOT, get_label_map
step-4: ./data/new_coco/coco_labels.txt 从./data/coco_labels.txt复制,根据自己分类写
0,0,person
1,1,dog
... ...
step-5: ./data/new_coco/XXX_coco_trainval.json 找到刚刚生成json文件粘贴到这里
Part3 训练程序修改
概述:
1- ./train.py
2-./ssd.py
3-./layers/modules/multibox_loss.py
step-1:./train.py
... ...
parser.add_argument('--dataset', default='MYCOCO', choices=['VOC', 'COCO','MYCOCO'],
type=str, help='VOC or MYCOCO') # 【改】
# parser.add_argument('--dataset_root', default=VOC_ROOT, 【删】没有用注释掉就行了
# help='Dataset root directory path')
parser.add_argument('--lr', '--learning-rate', default=1e-5, type=float,
help='initial learning rate') # 1e-5 【改】不改就可能梯度爆炸
... ...
def train():
if args.dataset == 'MYCOCO':
cfg = my_coco
dataset = COCODetection(transform=SSDAugmentation(cfg['min_dim'],MEANS))
... ...
# load train data
# images, targets = next(batch_iterator) 【改】修改成下面两行
try:
images, targets = next(batch_iterator)
except StopIteration:
batch_iterator = iter(data_loader)
images, targets = next(batch_iterator)
... ...
# loc_loss += loss_l.data[0]
# conf_loss += loss_c.data[0]------------------------【改】 所有的data[0]都换成data.item()
loc_loss += loss_l.data.item()
conf_loss += loss_c.data.item()
... ...
if iteration % 10 == 0:
print('timer: %.4f sec.' % (t1 - t0))
#print('iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss.data[0]), end=' ')--------------【改】
print('iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss.data.item()), end=' ')
if args.visdom:
# update_vis_plot(iteration, loss_l.data[0], loss_c.data[0], --【改】
# iter_plot, epoch_plot, 'append')
update_vis_plot(iteration, loss_l.data.item(), loss_c.data.item(),
iter_plot, epoch_plot, 'append')
... ...
step-2:./ssd.py
from data import voc, coco,my_coco
... ...
self.cfg = (coco, voc,my_coco)[-1]#--------直接选你那个
self.priorbox = PriorBox(self.cfg)
# self.priors = Variable(self.priorbox.forward(), volatile=True) ----【改】改成下面两行
with torch.no_grad():
self.priors = self.priorbox.forward()
step-3:./layers/modules/multibox_loss.py
from data import my_coco as cfg # 【改】【选】改不改都行,啥也不影响
... ...
class MultiBoxLoss(nn.Module):
def forward(self, predictions, targets):
... ...
# Hard Negative Mining
loss_c = loss_c.view(pos.size()[0],pos.size()[1]) # ---【增】
loss_c[pos] = 0 # filter out pos
... ...
#loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False)#----【改】
loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum')
... ...
# N = num_pos.data.sum() #-------------------【改】
N = num_pos.data.sum().double()
loss_l = loss_l.double()
loss_c = loss_c.double()
# -----------------------
loss_l /= N
loss_c /= N
直接运行train.py即可
---------------------------end
其他:
如果你想训练别的模型的特征提取链接:
bug汇总
1:报错:IndexError: invalid index of a 0-dim tensor. Use tensor.item() to convert a 0-dim tensor to a Python number 解决:Part-3/step-1/data[0]的问题
2:报错:IndexError: The shape of the mask [32, 23321] at index 0 does not match the shape of the indexed tensor [93280, 1] at index 0 解决:Part-3/step-3
3: 报错:RuntimeError: copy_if failed to synchronize: device-side assert triggered 解决:分类数没有+1 Part-2/step-1
4: 报错:untimeError: invalid argument 2: non-empty vector or matrix expected at 解决:标签没有-1 Part-2/step-2
5:报错:nan梯度爆炸 iter 50 || Loss: nan|| timer: 0.3400 sec. 解决:调低学习率 Part-3/step-1