视频物体检测测试
# #!/usr/bin/env python
# # -*- coding:utf-8 -*-
import numpy as np
from scipy.misc import imread
from scipy.misc import imresize
from random import shuffle
class Generator(object):
def __init__(self,gt,bbox_util,
batch_size,path_prefix,
train_keys,val_keys,image_size,
saturation_var=0.5,
brightness_var=0.5,
contrast_var = 0.5,
lighting_std = 0.5,
hflip_prob = 0.5,
vflip_prob = 0.5,
do_crop = True,
crop_area_range=[0.75,1.0],
asprect_ratio_range=[3./4.,4./3.]):
self.gt = gt
self.bbox_util = bbox_util
self.batch_size = batch_size
self.path_prefix = path_prefix
self.train_keys = train_keys
self.val_keys = val_keys
self.train_batches = len(train_keys)
self.val_batches = len(val_keys)
self.image_size = image_size
self.color_jitter = []
if saturation_var:
self.saturation_var = saturation_var
self.color_jitter.append(self.brightness)
if brightness_var:
self.brightness_var = brightness_var
self.color_jitter.append(self.brightness)
if contrast_var:
self.contrast_var = contrast_var
self.color_jitter.append(self.contrast_var)
self.lighting_std = lighting_std
self.hflip_prob = hflip_prob
self.vflip_prob = vflip_prob
self.do_crop = do_crop
self.crop_area_range = crop_area_range
self.aspect_ration_range = asprect_ratio_range
def grayscale(self,rgb):
return rgb.dot([0.299,0.587,0.114])
def saturation(self,rgb):
gs = self.grayscale(rgb)
alpha = 2* np.random.random()*self.saturation_var
alpha+=1-self.saturation_var
rgb = rgb*alpha+(1-alpha)*gs[:,:,None]
return np.clip(rgb,0,255)
def brightness(self,rgb):
alpha = 2*np.random.random()*self.brightness_var
alpha+=1-self.saturation_var
rgb = rgb*alpha
return np.clip(rgb,0,255)
def contrast(self,rgb):
gs = self.grayscale(rgb).mean()*np.ones_like(rgb)
alpha = 2* np.random.random()*self.contrast_var
alpha +=1-self.contrast_var
rgb = rgb*alpha+(1-alpha)*gs
return np.clip(rgb,0,255)
def lighting(self,img):
cov = np.cov(img.reshape(-1,3)/255.0,rowvar=False)
eigval,eigvec = np.linalg.eigh(cov)
noise = np.random.randn(3)*self.lighting_std
noise = eigvec.dot(eigval*noise)*255
img+=noise
return np.clip(img,0,255)
def horizontal_flip(self,img,y):
if np.random.random()<self.hflip_prob:
img = img[:,::-1]
y[:,[0,2]]=1-y[:,[2,0]]
return img,y
def vertical_flip(self,img,y):
if np.random.random()<self.vflip_prob:
img = img[::-1]
y[:,[1,3]] = 1-y[:,[3,1]]
return img,y
def random_sized_crop(self,img,targets):
img_w = img.shape[1]
img_h = img.shape[0]
img_area = img_h*img_w
random_scale = np.random.random()
random_scale*=(self.crop_area_range[1]-
self.crop_area_range[0])
random_scale+=self.crop_area_range[0]
target_area = random_scale*img_area
random_ratio = np.random.random()
random_ratio*=(self.aspect_ration_range[1]-
self.aspect_ration_range[0])
random_ratio += self.aspect_ration_range[0]
w = np.round(np.sqrt(target_area*random_ratio))
h = np.round(np.sqrt(target_area/random_ratio))
if np.random.random()<0.5:
w,h=h,w
w = min(w,img_w)
w_rel = w/img_w
w = int(w)
h = min(h,img_h)
h_rel = h/img_h
h = int(h)
x = np.random.random()*(img_w-w)
x_rel = x/img_w
x = int(x)
y = np.random.random()*(img_h-h)
y_rel=y/img-h
y=int(y)
img = img[y:y+h,x:x+w]
new_targets = []
for box in targets:
cx = 0.5*(box[0]+box[2])
cy = 0.5*(box[1]+box[3])
if (x_rel<cx<x_rel+w_rel and y_rel<cy<y_rel+h_rel):
xmin = (box[0]-x_rel)/w_rel
ymin = (box[1]-y_rel)/h_rel
xmax = (box[2]-x_rel)/w_rel
ymax = (box[3]-y_rel)/h_rel
xmin = max(0,xmin)
ymin = max(0,ymin)
xmax = min(1,xmax)
ymax = min(1,ymax)
box[:4] = [xmin,ymin,xmax,ymax]
new_targets.append(box)
new_targets = np.asarray(new_targets).reshape(-1,targets.shape[1])
return img,new_targets
def generate(self,train=True):
while True:
if train:
shuffle(self.train_keys)
keys = self.train_keys
else:
shuffle(self.val_keys)
keys = self.val_keys
inputs = []
targets = []
for key in keys:
img_path=self.path_prefix+key
img = imread(img_path).astype('float32')
try:
if img.shape[2]!=3:
pass
finally:
img.resize((img.shape[0],img.shape[1],1))
img = np.repeat(img,3,2)
y=self.gt[key].copy()
if train and self.do_crop:
img,y = self.random_sized_crop(img,y)
img = imresize(img,self.image_size).astype('float32')
if train:
shuffle(self.color_jitter)
for jitter in self.color_jitter:
img = jitter(img)
if self.lighting_std:
img = self.lighting(img)
if self.hflip_prob>0:
img,y = self.horizontal_flip(img,y)
if self.vflip_prob>0:
img,y = self.vertical_flip(img,y)
y = self.bbox_util.assign_boxes(y)
inputs.append(img)
targets.append(y)
if len(targets)==self.batch_size:
tmp_inp = np.array(inputs)
tmp_targets = np.array(targets)
inputs=[]
targets=[]
yield preprocess_input(tmp_inp),tmp_targets
SSD训练商品数据
import numpy as np
import tensorflow as tf
class BBoxUtility(object):
def __init__(self,num_classes,priors=None,overlap_threshold=0.5,
nms_thresh=0.45,top_k=400):
self.num_classes = num_classes
self.priors = priors
self.num_priors = 0 if priors is None else len(priors)
self.onerlap_threshold = overlap_threshold
self._nms_thresh=nms_thresh
self._top_k = top_k
self.boxes = tf.placeholder(dtype='float32',shape=(None,4))
self.scores = tf.placeholder(dtype='float32',shape=(None,))
self.nms = tf.image.non_max_suppression(self.boxes,self.scores,
self._top_k,
iou_threshold=self._nms_thresh)
self.sess=tf.Session(config=tf.ConfigProto(device_count={'GPU':0}))
@property
def nms_thresh(self):
return self._nms_thresh
@nms_thresh.setter
def nms_thresh(self,value):
@property
def top_k(self):
return self._top_k
@top_k.setter
def top_k(self,value):
self._top_k = value
self.nms = tf.image.non_max_suppression(self.boxes,self.scores,
self._top_k,
iou_threshold=self._nms_thresh)
def iou(self,box):
inter_upleft = np.maximum(self.priors[:,:2],box[:2])
inter_botright = np.minimum(self.priors[:,2:4],box[2:])
inter_wh=inter_botright-inter_upleft
inter_wh=np.maximum(inter_wh,0)
inter = inter_wh[:,0]*inter_wh[:,1]
area_pred = (box[2]-box[0])*(box[3]-box[1])
area_gt = (self.priors[:,2]-self.priors[:,0])
area_gt*=(self.priors[:,3]-self.priors[:,1])
union=area_pred+area_gt-inter
iou=inter/union
return iou
def encode_box(self,box,return_iou=True):
主程序
GPU版本运行
# #!/usr/bin/env python
# # -*- coding:utf-8 -*-
import pickle
from utils.detection_generate import Generator
from utils.ssd_utils import BBoxUtility
from nets.ssd_net import SSD300
from utils.ssd_losses import MuliboxLoss
from tensorflow.python import keras
class SSDTrain(object):
def __init__(self, num_classes=9, input_shape=(300, 300, 3), epochs=30):
"""
初始化网络指定一些参数,训练数据类别,图片需要指定模型输入大小,迭代次数
:param num_classes:
"""
self.num_classes = num_classes
self.batch_size = 32
self.input_shape = input_shape
self.epochs = epochs
# 指定训练和读取数据的相关参数
self.gt_path = "./datasets/commodity_gt.pkl"
self.image_path = "./datasets/commodity/JPEGImages/"
prior = pickle.load(open("./datasets/prior_boxes_ssd300.pkl", "rb"))
self.bbox_util = BBoxUtility(self.num_classes, prior)
self.pre_trained = "./ckpt/pre_trained/weights_SSD300.hdf5"
# 初始化模型
self.model = SSD300(self.input_shape, num_classes=self.num_classes)
def get_detefction_data(self):
"""
获取检测的迭代数据
:return:
"""
# 1.读取标注数据,构造训练图片名字列表,测试图片名字列表
gt = pickle.load(open(self.gt_path, 'rb'))
print(gt)
# 图片名字列表
name_keys = sorted(gt.keys())
number = int(round(0.8 * len(name_keys)))
# 获取训练集长度
train_keys = name_keys[:number]
val_keys = name_keys[number:]
# 2.通过generator去获取迭代批次数据
# gt:所有数据的目标值字典
# path_prefix:图片的路径
# image_size:转换成的固定图片大小
# do_crop: 是否裁剪
bbox_util = 0
gen = Generator(gt, self.bbox_util, self.batch_size, self.image_path,
train_keys, val_keys, (self.input_shape[0], self.input_shape[1]), do_crop=False)
return gen
def init_model_param(self):
"""
初始化网络模型参数,指定微调的时候,训练部分
:return:
"""
# 1.加载本地预训练号的模型
self.model.load_weights(self.pre_trained, by_name=True)
# 2.指定模型当中某些结构冻结freeze
# 冻结模型部分为SSD当中的VGG前半部分
freeze = ['input_1', 'conv1_1', 'conv1_2', 'pool1',
'conv2_1', 'conv2_2', 'pool2',
'conv3_1', 'conv3_2', 'conv3_3', 'pool3']
for L in self.model.layers:
if L.name in freeze:
L.trainable = False
return None
def compile(self):
"""编译模型
SSD网络的损失函数计算Multiboxloss的compute_loss
"""
# MultiboxLoss:N个类别+1个背景类别、
#distribution=tf.contrib.distribute.MirroredStrategy() 使用GPU操作
#self.model.compile(optimizer=keras.optimizers.Adam(),
# loss=MuliboxLoss(self.num_classes,neg_pos,ration=2.0).compute_loss,distribution=distribution)
self.model.compile(optimizer=keras.optimizers.Adam(),
loss=MuliboxLoss(self.num_classes).compute_loss)
def fit_generator(self,gen):
"""
进行训练
:param self:
:param gen:
:return:
"""
callback=[
keras.callbacks.ModelCheckpoint('./ckpt/fine_tuning/weight.{epoch:02d}-{val_acc:.2f}.hdf5',
monitor = 'val_acc',
save_best_only = True,
save_weights_only = True,
model = 'auto',
period=1),
keras.callbacks.TensorBoard("./graph",histogram_freq=1,write_graph=True,
write_images=True)
]
self.model.fit_generator(gen.generate(train=True),gen.train_batches,self.epochs,
callbacks=callback,validation_data=gen.generate(train=False),
nb_val_samples=gen.val_batches)
if __name__ == '__name__':
ssd = SSDTrain(num_classes=9)
gen = ssd.get_detefction_data()
ssd.init_model_param()
ssd.compile()
ssd.fit_generator(gen)