这个项目名字叫car_detector,其中用到技术包括“梯度直方图(HOG)、随机森林分类器、图像金字塔、滑动窗口、非极大值抑制”
第一步模型训练:
1.载入8500张汽车样本以及8500张非汽车样本
2.提取所有样本的HOG特征、颜色特征、spatial binning特征(HOG特征)
3.将80%的汽车和非汽车样本用作训练,剩余的20%的样本用作测试模型的精准度(随机森林分类)
4.将训练出来的模型使用pickle库保存起来,方便侦测程序调用(模型的准确度大概为98%)
train_model.py
#!/usr/local/bin/python
#-*-coding:utf-8-*-
import cv2
import numpy as np
from os import walk
from os.path import join
import pickle
import random
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
POS_PATH = './data/vehicles'
NEG_PATH = './data/non-vehicles'
POS_SAMPLES = 8500
NEG_SAMPLES = 8500
TRAIN_TEST_RATIO = 0.8
MODEL_PATH = './car_detector_svm'
SCALER_PATH = './car_detector_scaler'
def get_files(folder):
image_paths = []
for root, dirs, files in walk(folder):
image_paths.extend([join(root, f) for f in files])
return image_paths
def save_pickle(data, path):
with open(path, 'wb+') as f:
pickle.dump(data, f)
def load_pickle(path):
with open(path, 'rb') as f:
return pickle.load(f)
def get_hog_descriptor():
winSize = (64,64)
blockSize = (16,16)
blockStride = (8,8)
cellSize = (8,8)
nbins = 20
derivAperture = 1
winSigma = -1.
histogramNormType = 0
L2HysThreshold = 0.2
gammaCorrection = True
nlevels = 64
signedGradients = False
return cv2.HOGDescriptor(winSize,
blockSize,
blockStride,
cellSize,
nbins,
derivAperture,
winSigma,
histogramNormType,
L2HysThreshold,
gammaCorrection,
nlevels,
signedGradients)
def extract_feature_vector(roi, hog):
if roi.shape[0] != 64 or roi.shape[1] != 64:
roi = cv2.reshape(roi, (64,64), interpolation=cv2.INTER_AREA)
feature_vector = np.array([])
# hog
feature_vector = np.hstack((feature_vector, hog.compute(roi)[:,0]))
# color histogram features
if len(roi.shape) < 3:
roi = roi[:, :, np.newaxis]
hist_vector = np.array([])
for channel in range(roi.shape[2]):
channel_hist = np.histogram(roi[:, :, channel], bins=16, range=(0, 255))[0]
hist_vector = np.hstack((hist_vector, channel_hist))
feature_vector = np.hstack((feature_vector, hist_vector))
# spatial binning features
spatial_roi = cv2.resize(roi, (20,20), interpolation=cv2.INTER_AREA)
spatial_vector = spatial_roi.ravel()
feature_vector = np.hstack((feature_vector, spatial_vector))
return feature_vector
def train():
print 'loading trainning dataset'
pos_filepaths = get_files(POS_PATH)
neg_filepaths = get_files(NEG_PATH)
pos_filepaths = random.sample(pos_filepaths , len(pos_filepaths))[:POS_SAMPLES]
neg_filepaths = random.sample(neg_filepaths , len(neg_filepaths))[:NEG_SAMPLES]
num_pos = len(pos_filepaths)
num_neg = len(neg_filepaths)
pos = pos_filepaths[:int(TRAIN_TEST_RATIO*num_pos)]
pos_test = pos_filepaths[int(TRAIN_TEST_RATIO*num_pos):]
neg = neg_filepaths[:int(TRAIN_TEST_RATIO*num_neg)]
neg_test = neg_filepaths[int(TRAIN_TEST_RATIO*num_neg):]
print 'creating hog descriptor'
hog = get_hog_descriptor()
print 'extracting features'
pos_f = []
pos_test_f = []
neg_f = []
neg_test_f = []
train_labels = []
for fp in pos:
try:
pos_f.append(extract_feature_vector(cv2.imread(fp), hog))
train_labels.append(1)
except:
pass
for fp in neg:
try:
neg_f.append(extract_feature_vector(cv2.imread(fp), hog))
train_labels.append(-1)
except:
pass
for fp in pos_test:
try:
pos_test_f.append(extract_feature_vector(cv2.imread(fp), hog))
except:
pass
for fp in neg_test:
try:
neg_test_f.append(extract_feature_vector(cv2.imread(fp), hog))
except:
pass
print 'scaling features'
scaler = StandardScaler().fit(pos_f + neg_f + pos_test_f + neg_test_f)
pos_f = scaler.transform(pos_f)
neg_f = scaler.transform(neg_f)
pos_test_f = scaler.transform(pos_test_f)
neg_test_f = scaler.transform(neg_test_f)
train_set = np.vstack((pos_f, neg_f))
print 'trainning classifier model'
clf = RandomForestClassifier(n_estimators=40, min_samples_leaf=5, n_jobs=-1)
clf.fit(train_set, train_labels)
print 'testing model'
pos_test_y = clf.predict(pos_test_f)
tp = float(len(pos_test_y[pos_test_y==1])) / len(pos_test_y) * 100
print 'true positives: %d/%d %0.2f%%' % (len(pos_test_y[pos_test_y==1]), len(pos_test_y), tp)
neg_test_y = clf.predict(neg_test_f)
tn = float(len(neg_test_y[neg_test_y==-1])) / len(neg_test_y) * 100
print 'true negetives: %d/%d %0.2f%%' % (len(neg_test_y[neg_test_y==-1]), len(neg_test_y), tn)
print 'saving model'
save_pickle(clf, MODEL_PATH)
save_pickle(scaler, SCALER_PATH)
print 'model is saved'
if __name__ == '__main__':
train()
使用方法:python train_model.py
第二步,编写侦测程序
1. 载入测试文件,可以是图片或视频
2. 利用“图像金字塔+滑动窗口”技术从图像中提取需要被检测的块,这两个技术能确保任何大小和任何位置的目标对象都能被检测到(图像金字塔)
pyramid.py
#!/usr/local/bin/python
#-*-coding:utf-8-*-
import cv2
def resize(img, scaleFactor):
return cv2.resize(img, (int(img.shape[1] * (1/scaleFactor)), int(img.shape[0] * (1/scaleFactor))), interpolation=cv2.INTER_AREA)
def pyramid(image, scale=1.5, minSize=(192,128)):
yield image
while True:
image = resize(image, scale)
if image.shape[0] < minSize[1] or image.shape[1] < minSize[0]:
break
yield image
sliding_window.py
#!/usr/local/bin/python
#-*-coding:utf-8-*-
def sliding_window(image, stepSize, windowSize):
for y in xrange(0, image.shape[0], stepSize):
for x in xrange(0, image.shape[1], stepSize):
yield (x, y, image[y:y+windowSize[1], x:x+windowSize[0]])
3. 使用预先训练好的分类器将所有从图像中提取出来的块进行分类预测,最后使用非极大值抑制技术来消除重叠的正结果的区域
non_maximum.py
#!/usr/local/bin/python
#-*-coding:utf-8-*-
import numpy as np
def non_max_suppression_fast(boxes, overlapThresh):
# if there are no boxes, return an empty list
if len(boxes) == 0:
return []
# if the bounding boxes integers, convert them to floats
# this is important since we'll be doing a bunch of divisions
if boxes.dtype.kind == 'i':
boxes = boxes.astype('float')
# initialize the list of picked indexes
pick = []
# grab the coordinates of the bounding boxes
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2]
y2 = boxes[:, 3]
scores = boxes[:, 4]
# compute the area of the bounding boxes and sort the bounding
# boxes by the score/probability of the bounding box
area = (x2 - x1 + 1) * (y2 - y1 + 1)
idxs = np.argsort(scores)
# keep looping while some indexes still remain in the indexes list
while len(idxs) > 0:
# grab the last index in the indexes list and add the
# index value to the list of picked indexes
last = len(idxs) - 1
i = idxs[last]
pick.append(i)
# find the largest (x, y) coordinates for the start of
# the bounding box and the smallest (x, y) coordinates
# for the end of the bounding box
xx1 = np.maximum(x1[i], x1[idxs[:last]])
yy1 = np.maximum(y1[i], y1[idxs[:last]])
xx2 = np.minimum(x2[i], x2[idxs[:last]])
yy2 = np.minimum(y2[i], y2[idxs[:last]])
# compute the width and height of the bounding box
w = np.maximum(0, xx2 - xx1 + 1)
h = np.maximum(0, yy2 - yy1 + 1)
# compute the ratio of overlap
overlap = (w*h) / area[idxs[:last]]
# delete all indexes from the index list that have
idxs = np.delete(idxs, np.concatenate(([last], np.where(overlap > overlapThresh)[0])))
# return only the bounding boxes that were picked using the integer data type
return boxes[pick]
detector.py
#!/usr/local/bin/python
#-*- coding:utf-8 -*-
import cv2
import numpy as np
from non_maximum import non_max_suppression_fast as nms
from pyramid import pyramid
from sliding_window import sliding_window
from train_model import load_pickle, get_hog_descriptor, extract_feature_vector, MODEL_PATH, SCALER_PATH
import sys
import time
def detect(img):
if img.shape[1] > 500:
# you can adjust the size of image, the larger size will take longer to process
# but the result will be more accurate
ratio = img.shape[1] / 500.
img = cv2.resize(img, (int(img.shape[1]/ratio), int(img.shape[0]/ratio)))
hog = get_hog_descriptor()
clf = load_pickle(MODEL_PATH)
scaler = load_pickle(SCALER_PATH)
rectangles = []
w, h = 64, 64
counter = 1
# you can adjust scale, the smaller scale will take longer to process
# but the result will be more accurate
scaleFactor = 1.4
scale = 1
stepSize = 20
tm_begin = time.clock()
features = []
roi_scales = []
roi_x = []
roi_y = []
for resized in pyramid(img, scaleFactor):
scale = float(img.shape[1]) / float(resized.shape[1])
for (x, y, roi) in sliding_window(resized, stepSize, (w, h)):
if roi.shape[1] != w or roi.shape[0] != h:
continue
try:
features.append(extract_feature_vector(roi, hog))
roi_scales.append(scale)
roi_x.append(x)
roi_y.append(y)
except:
pass
counter += 1
features = scaler.transform(features)
scores = clf.predict_proba(features)[:, 1]
tm_end = time.clock()
print 'roi count: %d' % (len(features))
print 'prediction time used: %0.2f' % (tm_end - tm_begin)
for i in xrange(len(scores)):
if scores[i] > .7:
rx, ry, rx2, ry2 = int(roi_x[i] * roi_scales[i]), int(roi_y[i] * roi_scales[i]), int((roi_x[i]+w) * roi_scales[i]), int((roi_y[i]+h) * roi_scales[i])
rectangles.append([rx, ry, rx2, ry2, abs(scores[i])])
windows = np.array(rectangles)
boxes = nms(windows, 0.2)
return boxes, img
if __name__ == '__main__':
if len(sys.argv) < 2:
print 'please input the path of test file'
exit()
test = sys.argv[1]
# video file
if test.find('.mp4') != -1:
camera = cv2.VideoCapture(test)
while True:
ret, frame = camera.read()
# start to detect
boxes, frame = detect(frame)
# draw these scores of probability and box up these objects by rectangle
font = cv2.FONT_HERSHEY_PLAIN
for (x, y, x2, y2, score) in boxes:
cv2.rectangle(frame, (int(x), int(y)), (int(x2), int(y2)), (0,255,0), 2)
cv2.putText(frame, "%f" % score, (int(x), int(y)), font, 1, (0,0,255), 1)
# show the result
cv2.imshow('camera', frame)
if cv2.waitKey(25) & 0xff == ord('q'):
break
camera.release()
cv2.destroyAllWindows()
# image file
else:
img = cv2.imread(test)
# start to detect
boxes, img = detect(img)
# draw these scores of probability and box up these objects by rectangle
font = cv2.FONT_HERSHEY_PLAIN
for (x, y, x2, y2, score) in boxes:
cv2.rectangle(img, (int(x), int(y)), (int(x2), int(y2)), (0,255,0), 2)
cv2.putText(img, "%f" % score, (int(x), int(y)), font, 1, (0,0,255), 1)
cv2.imshow('img', img)
cv2.waitKey(0)
使用方法:python detector.py ./test/test_2.mp4
源码下载地址
链接: https://pan.baidu.com/s/126QlWsGfiZFVKNELiPz63w 提取码: aidv