利用“HOG特征+随机森林分类器”制作汽车检测程序（完整源码下载）

最新推荐文章于 2023-12-19 21:18:11 发布

sumkee911

最新推荐文章于 2023-12-19 21:18:11 发布

阅读量1.3k

点赞数 1

分类专栏： python opencv 机器学习文章标签：汽车检测汽车分析图形检测图形分析

本文链接：https://blog.csdn.net/sumkee911/article/details/100299376

版权

python 同时被 3 个专栏收录

2 篇文章 0 订阅

订阅专栏

opencv

2 篇文章 1 订阅

订阅专栏

机器学习

1 篇文章 0 订阅

订阅专栏

这个项目名字叫car_detector，其中用到技术包括“梯度直方图（HOG）、随机森林分类器、图像金字塔、滑动窗口、非极大值抑制”

第一步模型训练：

1.载入8500张汽车样本以及8500张非汽车样本

2.提取所有样本的HOG特征、颜色特征、spatial binning特征（HOG特征）

3.将80%的汽车和非汽车样本用作训练，剩余的20%的样本用作测试模型的精准度（随机森林分类）

4.将训练出来的模型使用pickle库保存起来，方便侦测程序调用（模型的准确度大概为98%）

train_model.py

#!/usr/local/bin/python
#-*-coding:utf-8-*-

import cv2
import numpy as np
from os import walk
from os.path import join
import pickle
import random
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier

POS_PATH = './data/vehicles'
NEG_PATH = './data/non-vehicles'
POS_SAMPLES = 8500
NEG_SAMPLES = 8500
TRAIN_TEST_RATIO = 0.8
MODEL_PATH = './car_detector_svm'
SCALER_PATH = './car_detector_scaler'

def get_files(folder):
    image_paths = []
    for root, dirs, files in walk(folder):
        image_paths.extend([join(root, f) for f in files])
    return image_paths
    
def save_pickle(data, path):
    with open(path, 'wb+') as f:
        pickle.dump(data, f)
        
def load_pickle(path):
    with open(path, 'rb') as f:
        return pickle.load(f)
        
def get_hog_descriptor():   
    winSize = (64,64)
    blockSize = (16,16)
    blockStride = (8,8)
    cellSize = (8,8)
    nbins = 20
    derivAperture = 1
    winSigma = -1.
    histogramNormType = 0 
    L2HysThreshold = 0.2
    gammaCorrection = True
    nlevels = 64
    signedGradients = False
    
    return cv2.HOGDescriptor(winSize,
                            blockSize,
                            blockStride,
                            cellSize,
                            nbins,
                            derivAperture,
                            winSigma,
                            histogramNormType,
                            L2HysThreshold,
                            gammaCorrection,
                            nlevels,
                            signedGradients)
                            
def extract_feature_vector(roi, hog):
    if roi.shape[0] != 64 or roi.shape[1] != 64:
        roi = cv2.reshape(roi, (64,64), interpolation=cv2.INTER_AREA)
        
    feature_vector = np.array([])
    
    # hog
    feature_vector = np.hstack((feature_vector, hog.compute(roi)[:,0]))
    
    # color histogram features
    if len(roi.shape) < 3:
        roi = roi[:, :, np.newaxis]

    hist_vector = np.array([])
    for channel in range(roi.shape[2]):
        channel_hist = np.histogram(roi[:, :, channel], bins=16, range=(0, 255))[0]
        hist_vector = np.hstack((hist_vector, channel_hist))
    feature_vector = np.hstack((feature_vector, hist_vector))
    
    # spatial binning features
    spatial_roi = cv2.resize(roi, (20,20), interpolation=cv2.INTER_AREA)
    spatial_vector = spatial_roi.ravel()
    feature_vector = np.hstack((feature_vector, spatial_vector))
    
    return feature_vector

def train():
    print 'loading trainning dataset'
    pos_filepaths = get_files(POS_PATH)
    neg_filepaths = get_files(NEG_PATH)
    pos_filepaths  = random.sample(pos_filepaths , len(pos_filepaths))[:POS_SAMPLES]
    neg_filepaths  = random.sample(neg_filepaths , len(neg_filepaths))[:NEG_SAMPLES]
    
    num_pos = len(pos_filepaths)
    num_neg = len(neg_filepaths)
    pos = pos_filepaths[:int(TRAIN_TEST_RATIO*num_pos)]
    pos_test = pos_filepaths[int(TRAIN_TEST_RATIO*num_pos):]
    neg = neg_filepaths[:int(TRAIN_TEST_RATIO*num_neg)]
    neg_test = neg_filepaths[int(TRAIN_TEST_RATIO*num_neg):]
    
    print 'creating hog descriptor'
    hog = get_hog_descriptor()
    
    print 'extracting features'
    pos_f = []
    pos_test_f = []
    neg_f = []
    neg_test_f = []
    train_labels = []
    
    for fp in pos:
        try:
            pos_f.append(extract_feature_vector(cv2.imread(fp), hog))
            train_labels.append(1)
        except:
            pass
            
    for fp in neg:
        try:
            neg_f.append(extract_feature_vector(cv2.imread(fp), hog))
            train_labels.append(-1)
        except:
            pass
            
    for fp in pos_test:
        try:
            pos_test_f.append(extract_feature_vector(cv2.imread(fp), hog))
        except:
            pass
            
    for fp in neg_test:
        try:
            neg_test_f.append(extract_feature_vector(cv2.imread(fp), hog))
        except:
            pass
            
    print 'scaling features'
    scaler = StandardScaler().fit(pos_f + neg_f + pos_test_f + neg_test_f)
    pos_f = scaler.transform(pos_f)
    neg_f = scaler.transform(neg_f)
    pos_test_f = scaler.transform(pos_test_f)
    neg_test_f = scaler.transform(neg_test_f)
    train_set = np.vstack((pos_f, neg_f))

    print 'trainning classifier model'
    clf = RandomForestClassifier(n_estimators=40, min_samples_leaf=5, n_jobs=-1)
    clf.fit(train_set, train_labels)
    
    print 'testing model'
    pos_test_y = clf.predict(pos_test_f)
    tp = float(len(pos_test_y[pos_test_y==1])) / len(pos_test_y) * 100
    print 'true positives: %d/%d %0.2f%%' % (len(pos_test_y[pos_test_y==1]), len(pos_test_y), tp)
    
    neg_test_y = clf.predict(neg_test_f)
    tn = float(len(neg_test_y[neg_test_y==-1])) / len(neg_test_y) * 100
    print 'true negetives: %d/%d %0.2f%%' % (len(neg_test_y[neg_test_y==-1]), len(neg_test_y), tn)
    
    print 'saving model'
    save_pickle(clf, MODEL_PATH)
    save_pickle(scaler, SCALER_PATH)
    print 'model is saved'

if __name__ == '__main__':
    train()

使用方法：python train_model.py

第二步，编写侦测程序

1. 载入测试文件，可以是图片或视频

2. 利用“图像金字塔+滑动窗口”技术从图像中提取需要被检测的块，这两个技术能确保任何大小和任何位置的目标对象都能被检测到（图像金字塔）

pyramid.py

#!/usr/local/bin/python
#-*-coding:utf-8-*-

import cv2

def resize(img, scaleFactor):
    return cv2.resize(img, (int(img.shape[1] * (1/scaleFactor)), int(img.shape[0] * (1/scaleFactor))), interpolation=cv2.INTER_AREA)
    
def pyramid(image, scale=1.5, minSize=(192,128)):
    yield image
    
    while True:
        image = resize(image, scale)
        if image.shape[0] < minSize[1] or image.shape[1] < minSize[0]:
            break
            
        yield image

sliding_window.py

#!/usr/local/bin/python
#-*-coding:utf-8-*-

def sliding_window(image, stepSize, windowSize):
    for y in xrange(0, image.shape[0], stepSize):
        for x in xrange(0, image.shape[1], stepSize):
            yield (x, y, image[y:y+windowSize[1], x:x+windowSize[0]])

3. 使用预先训练好的分类器将所有从图像中提取出来的块进行分类预测，最后使用非极大值抑制技术来消除重叠的正结果的区域

non_maximum.py

#!/usr/local/bin/python
#-*-coding:utf-8-*-

import numpy as np

def non_max_suppression_fast(boxes, overlapThresh):
    # if there are no boxes, return an empty list
    if len(boxes) == 0:
        return []
        
    # if the bounding boxes integers, convert them to floats
    # this is important since we'll be doing a bunch of divisions
    if boxes.dtype.kind == 'i':
        boxes = boxes.astype('float')
        
    # initialize the list of picked indexes
    pick = []
    
    # grab the coordinates of the bounding boxes
    x1 = boxes[:, 0]
    y1 = boxes[:, 1]
    x2 = boxes[:, 2]
    y2 = boxes[:, 3]
    scores = boxes[:, 4]
    
    # compute the area of the bounding boxes and sort the bounding
    # boxes by the score/probability of the bounding box
    area = (x2 - x1 + 1) * (y2 - y1 + 1)
    idxs = np.argsort(scores)
    
    # keep looping while some indexes still remain in the indexes list
    while len(idxs) > 0:
        # grab the last index in the indexes list and add the
        # index value to the list of picked indexes
        last = len(idxs) - 1
        i = idxs[last]
        pick.append(i)
        
        # find the largest (x, y) coordinates for the start of
        # the bounding box and the smallest (x, y) coordinates
        # for the end of the bounding box
        xx1 = np.maximum(x1[i], x1[idxs[:last]])
        yy1 = np.maximum(y1[i], y1[idxs[:last]])
        xx2 = np.minimum(x2[i], x2[idxs[:last]])
        yy2 = np.minimum(y2[i], y2[idxs[:last]])
        
        # compute the width and height of the bounding box
        w = np.maximum(0, xx2 - xx1 + 1)
        h = np.maximum(0, yy2 - yy1 + 1)
        
        # compute the ratio of overlap
        overlap = (w*h) / area[idxs[:last]]
        
        # delete all indexes from the index list that have
        idxs = np.delete(idxs, np.concatenate(([last], np.where(overlap > overlapThresh)[0])))
        
    # return only the bounding boxes that were picked using the integer data type
    return boxes[pick]

detector.py

#!/usr/local/bin/python
#-*- coding:utf-8 -*-

import cv2
import numpy as np
from non_maximum import non_max_suppression_fast as nms
from pyramid import pyramid
from sliding_window import sliding_window
from train_model import load_pickle, get_hog_descriptor, extract_feature_vector, MODEL_PATH, SCALER_PATH
import sys
import time

def detect(img):
    if img.shape[1] > 500:
        # you can adjust the size of image, the larger size will take longer to process
        # but the result will be more accurate
        ratio = img.shape[1] / 500.
        img = cv2.resize(img, (int(img.shape[1]/ratio), int(img.shape[0]/ratio)))
        
    hog = get_hog_descriptor()
    
    clf = load_pickle(MODEL_PATH)
    scaler = load_pickle(SCALER_PATH)
    
    rectangles = []
    w, h = 64, 64
    counter = 1
    
    # you can adjust scale, the smaller scale will take longer to process
    # but the result will be more accurate
    scaleFactor = 1.4
    
    scale = 1
    stepSize = 20
    
    tm_begin = time.clock()
    
    features = []
    roi_scales = []
    roi_x = []
    roi_y = []
    for resized in pyramid(img, scaleFactor):
        scale = float(img.shape[1]) / float(resized.shape[1])
        
        for (x, y, roi) in sliding_window(resized, stepSize, (w, h)):
            if roi.shape[1] != w or roi.shape[0] != h:
                continue
                    
            try:
                features.append(extract_feature_vector(roi, hog))
                roi_scales.append(scale)
                roi_x.append(x)
                roi_y.append(y)
            except:
                pass

            counter += 1
            
    features = scaler.transform(features)
    scores = clf.predict_proba(features)[:, 1]
    
    tm_end = time.clock()
    print 'roi count: %d' % (len(features))
    print 'prediction time used: %0.2f' % (tm_end - tm_begin)
    
    for i in xrange(len(scores)):
        if scores[i] > .7:
            rx, ry, rx2, ry2 = int(roi_x[i] * roi_scales[i]), int(roi_y[i] * roi_scales[i]), int((roi_x[i]+w) * roi_scales[i]), int((roi_y[i]+h) * roi_scales[i])
            rectangles.append([rx, ry, rx2, ry2, abs(scores[i])])
            
    windows = np.array(rectangles)
    boxes = nms(windows, 0.2)
    
    return boxes, img

if __name__ == '__main__':
    if len(sys.argv) < 2:
        print 'please input the path of test file'
        exit()
        
    test = sys.argv[1]
    # video file
    if test.find('.mp4') != -1:
        camera = cv2.VideoCapture(test)
        
        while True:
            ret, frame = camera.read()
            
            # start to detect
            boxes, frame = detect(frame)
        
            # draw these scores of probability and box up these objects by rectangle
            font = cv2.FONT_HERSHEY_PLAIN
            for (x, y, x2, y2, score) in boxes:
                cv2.rectangle(frame, (int(x), int(y)), (int(x2), int(y2)), (0,255,0), 2)
                cv2.putText(frame, "%f" % score, (int(x), int(y)), font, 1, (0,0,255), 1)
           
            # show the result
            cv2.imshow('camera', frame)
            if cv2.waitKey(25) & 0xff == ord('q'):
                break
                
        camera.release()
        cv2.destroyAllWindows()
    # image file
    else:
        img = cv2.imread(test)
        
        # start to detect
        boxes, img = detect(img)
    
        # draw these scores of probability and box up these objects by rectangle
        font = cv2.FONT_HERSHEY_PLAIN
        for (x, y, x2, y2, score) in boxes:
            cv2.rectangle(img, (int(x), int(y)), (int(x2), int(y2)), (0,255,0), 2)
            cv2.putText(img, "%f" % score, (int(x), int(y)), font, 1, (0,0,255), 1)
       
        cv2.imshow('img', img)
        cv2.waitKey(0)

使用方法：python detector.py ./test/test_2.mp4

源码下载地址

链接: https://pan.baidu.com/s/126QlWsGfiZFVKNELiPz63w 提取码: aidv