机器学习(10)实例:自动驾驶,使用SVM跟踪视频中的汽车

最终视频效果

点击图片查看视频
ScreenShot

  • 本文的目的是如视频所显示的效果,使用SVM算法在视频中找出汽车的位置
  • github

过程

  • 编写提取特征的函数
  • 使用svm和提取到的特征,训练一个二分类器,区分汽车和非汽车
  • 编写滑动窗口函数,并把每一个窗口使用svm判断是否汽车
  • 编写热力图函数,把多个判断为汽车的窗口合并为一个窗口
  • opencv读取视频和图片转换为视频

提取特征

像素特征

像素特征的提取比较简单,直接把图像矩阵转换为向量就好,但是并不需要太多的像素点,所以需要resize小一点

def bin_spatial(img, size=(16, 16)):
    # 默认resize为16*16大小后,拉直成向量特征
    features = cv2.resize(img, size).ravel()   
    return features

颜色直方图特征

颜色直方图可以理解为每中颜色(0~255)占用的数量
使用numpy的histogram函数得到颜色直方图,注意这里的输入只能是单通道的,

def color_hist(img, nbins=32, bins_range=(0, 256)):
    # 计算三个颜色通道的直方图
    channel1_hist = np.histogram(img[:,:,0], bins=nbins, range=bins_range)[0]
    channel2_hist = np.histogram(img[:,:,1], bins=nbins, range=bins_range)[0]
    channel3_hist = np.histogram(img[:,:,2], bins=nbins, range=bins_range)[0]
    # 拼接成一个向量特征
    hist_features = np.hstack((channel1_hist[0], channel2_hist[0], channel3_hist[0]))
    return hist_features

方向梯度特征(hog)和高效获取方向梯度特征的方法

hog特征实际效果类似于边缘检测
这里写图片描述

# orient:方向梯度的方向数,pix_per_cell:每个cell的像素数,cell_per_block:每个块的cell数
def get_hog_features(img, orient, pix_per_cell, cell_per_block,
                        vis=False, feature_vec=True):
    # 如vis为true,返回hog的图像
    if vis == True:
        features, hog_image = hog(img, orientations=orient,
                                  pixels_per_cell=(pix_per_cell, pix_per_cell),
                                  cells_per_block=(cell_per_block, cell_per_block),
                                  transform_sqrt=True,
                                  visualise=vis, feature_vector=feature_vec)
        return features, hog_image
    else:
        features = hog(img, orientations=orient,
                       pixels_per_cell=(pix_per_cell, pix_per_cell),
                       cells_per_block=(cell_per_block, cell_per_block),
                       transform_sqrt=True,
                       visualise=vis, feature_vector=feature_vec)
        return features

合并三种特征

# 合并一张图的特征
def img_features(feature_image, spatial_feat, hist_feat, hog_feat, hist_bins, orient,
                        pix_per_cell, cell_per_block, hog_channel, spatial_size):
    file_features = []
    if spatial_feat == True:
        spatial_features = bin_spatial(feature_image, size=spatial_size)        
        file_features.append(spatial_features)
    if hist_feat == True:        
        hist_features = color_hist(feature_image, nbins=hist_bins)       
        file_features.append(hist_features)
    if hog_feat == True:  
        if hog_channel == 'ALL':
	        # 获取图片三个通道的hog特征
            hog_features = []
            for channel in range(feature_image.shape[2]):
                hog_features.append(get_hog_features(feature_image[:,:,channel],
                                        orient, pix_per_cell, cell_per_block,
                                        vis=False, feature_vec=True))
                hog_features = np.ravel(hog_features)
        else:
            feature_image = cv2.cvtColor(feature_image, cv2.COLOR_LUV2RGB)
            # 转化为灰度图
            feature_image = cv2.cvtColor(feature_image, cv2.COLOR_RGB2GRAY)
            hog_features = get_hog_features(feature_image[:,:], orient,
                            pix_per_cell, cell_per_block, vis=False, feature_vec=True)
                #print 'hog', hog_features.shape
            # Append the new feature vector to the features list
        file_features.append(hog_features)
    return file_features

# 批量操作
def extract_features(imgs, color_space='RGB', spatial_size=(32, 32),
                        hist_bins=32, orient=9,
                        pix_per_cell=8, cell_per_block=2, hog_channel=0,
                        spatial_feat=True, hist_feat=True, hog_feat=True):   
    features = []
    # 遍历所有图片
    for file_p in imgs:
        file_features = []
        image = cv2.imread(file_p) # 从路径读取图片
        # 可转化为不同的颜色空间
        if color_space != 'RGB':
            if color_space == 'HSV':
                feature_image = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
            elif color_space == 'LUV':
                feature_image = cv2.cvtColor(image, cv2.COLOR_RGB2LUV)
            elif color_space == 'HLS':
                feature_image = cv2.cvtColor(image, cv2.COLOR_RGB2HLS)
            elif color_space == 'YUV':
                feature_image = cv2.cvtColor(image, cv2.COLOR_RGB2YUV)
            elif color_space == 'YCrCb':
                feature_image = cv2.cvtColor(image, cv2.COLOR_RGB2YCrCb)
        else: feature_image = np.copy(image)
        file_features = img_features(feature_image, spatial_feat, hist_feat, hog_feat, hist_bins, orient,
                        pix_per_cell, cell_per_block, hog_channel, spatial_size)
        features.append(np.concatenate(file_features))
        feature_image=cv2.flip(feature_image,1) # Augment the dataset with flipped images
        file_features = img_features(feature_image, spatial_feat, hist_feat, hog_feat, hist_bins, orient,
                        pix_per_cell, cell_per_block, hog_channel, spatial_size)
        features.append(np.concatenate(file_features))
    return features 

返回一张图片的特征

# 返回一张图片的特征
def single_img_features(img, color_space='RGB', spatial_size=(32, 32),
                        hist_bins=32, orient=9,
                        pix_per_cell=8, cell_per_block=2, hog_channel=0,
                        spatial_feat=True, hist_feat=True, hog_feat=True):
    #1) Define an empty list to receive features
    img_features = []
    #2) Apply color conversion if other than 'RGB'
    if color_space != 'RGB':
        if color_space == 'HSV':
            feature_image = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
        elif color_space == 'LUV':
            feature_image = cv2.cvtColor(img, cv2.COLOR_RGB2LUV)
        elif color_space == 'HLS':
            feature_image = cv2.cvtColor(img, cv2.COLOR_RGB2HLS)
        elif color_space == 'YUV':
            feature_image = cv2.cvtColor(img, cv2.COLOR_RGB2YUV)
        elif color_space == 'YCrCb':
            feature_image = cv2.cvtColor(img, cv2.COLOR_RGB2YCrCb)
    else: feature_image = np.copy(img)
    #3) Compute spatial features if flag is set
    if spatial_feat == True:
        spatial_features = bin_spatial(feature_image, size=spatial_size)
        #4) Append features to list
        img_features.append(spatial_features)
    #5) Compute histogram features if flag is set
    if hist_feat == True:
        hist_features = color_hist(feature_image, nbins=hist_bins)
        #6) Append features to list
        img_features.append(hist_features)
    #7) Compute HOG features if flag is set
    if hog_feat == True:
        if hog_channel == 'ALL':
            hog_features = []
            for channel in range(feature_image.shape[2]):
                hog_features.extend(get_hog_features(feature_image[:,:,channel],
                                    orient, pix_per_cell, cell_per_block,
                                    vis=False, feature_vec=True))
        else:
            hog_features = get_hog_features(feature_image[:,:,hog_channel], orient,
                        pix_per_cell, cell_per_block, vis=False, feature_vec=True)
        #8) Append features to list
        img_features.append(hog_features)

训练SVM

数据

  • 车的数据:https://s3.amazonaws.com/udacity-sdc/Vehicle_Tracking/vehicles.zip
    这里写图片描述
  • 非车的数据:https://s3.amazonaws.com/udacity-sdc/Vehicle_Tracking/non-vehicles.zip
    非车

使用glob获取所有训练图片的路径

import glob
def GetData():    
    # glob 通过匹配查找文件,*表示所有
    images = glob.glob('data_more/*/*/*.png')
    notcars = []
    cars = []
    # 由于我把非车的图片数据都放在non-vehicles文件夹下,可以通过non-判断该图片是车还是非车
    for image in images:
        if 'non-' in image:
            notcars.append(image)
        else:
            cars.append(image)
    return cars, notcars

训练svm


def train_test():
    cars, notcars = GetData()
    # 批量获取特征
    car_features = extract_features(cars, color_space=color_space,
                                    spatial_size=spatial_size, hist_bins=hist_bins,
                                    orient=orient, pix_per_cell=pix_per_cell,
                                    cell_per_block=cell_per_block,
                                    hog_channel=hog_channel, spatial_feat=spatial_feat,
                                    hist_feat=hist_feat, hog_feat=hog_feat)
    print('Car samples: ', len(car_features))
    notcar_features = extract_features(notcars, color_space=color_space,
                                       spatial_size=spatial_size, hist_bins=hist_bins,
                                       orient=orient, pix_per_cell=pix_per_cell,
                                       cell_per_block=cell_per_block,
                                       hog_channel=hog_channel, spatial_feat=spatial_feat,
                                       hist_feat=hist_feat, hog_feat=hog_feat)
    print('notCar samples: ', len(notcar_features))
    # 拼接车和非车数据集的特征
    X = np.vstack((car_features, notcar_features)).astype(np.float64)
    # from sklearn.preprocessing import StandardScaler 需要先引入
    # StandardScaler:标准化数据,svm需要做标准化
    X_scaler = StandardScaler().fit(X)
    scaled_X = X_scaler.transform(X)

    # 定义label,是车为1,非车为0
    y = np.hstack((np.ones(len(car_features)), np.zeros(len(notcar_features))))

    # 切割训练集为测试集合和训练集,test_size:20%为测试集
    X_train, X_test, y_train, y_test = train_test_split(scaled_X, y, test_size=0.2, random_state=22)
    print('Using:', orient, 'orientations', pix_per_cell,
          'pixels per cell and', cell_per_block, 'cells per block')
    print('Feature vector length:', len(X_train[0]))

	# from sklearn.svm import LinearSVC
    # 定义训练模型SVC,线性核,hinge损失函数
    svc = LinearSVC(loss='hinge')
    t = time.time()
    # 执行训练
    svc.fit(X_train, y_train)
    t2 = time.time()
    print(round(t2 - t, 2), 'Seconds to train SVC...')
    # 查看训练分数
    print(svc.score(X_test, y_test))
    # from sklearn.externals import joblib
    # 保存训练的参数,不用每次都重新训练,同时也要保存标准化的参数
    joblib.dump(X_scaler,'save/scaler.m')
    joblib.dump(svc,'save/model.m')
    return svc, X_scaler

ok,现在我们已经得到了一个训练好的分类器,但是我们不需要每次运行都重新训练,那会非常耗时,我们可以先尝试加载已保存的参数,如果没有加载到,再重新训练

try:
    svc = joblib.load('save/model.m')
    X_scaler = joblib.load('save/scaler.m')
except:
    svc, X_scaler = train_test()

窗口滑动

当我们拿到一张图片时,需要在图片上进行窗口的滑动,获取图片中的各个小窗口,再使用svm分类器判断这些小窗口是否是汽车。

def slide_window(img, x_start_stop=[None, None], y_start_stop=[None, None],
                 xy_window=(64, 64), xy_overlap=(0.5, 0.5)):
    # 如果未设置区域,则默认为图片的大小
    if x_start_stop[0] == None:
        x_start_stop[0] = 0
    if x_start_stop[1] == None:
        x_start_stop[1] = img.shape[1]
    if y_start_stop[0] == None:
        y_start_stop[0] = 0
    if y_start_stop[1] == None:
        y_start_stop[1] = img.shape[0]
    # 计算需要寻找区域的跨度
    xspan = x_start_stop[1] - x_start_stop[0]
    yspan = y_start_stop[1] - y_start_stop[0]
    # Compute the number of pixels per step in x/y
    nx_pix_per_step = np.int(xy_window[0] * (1 - xy_overlap[0]))
    ny_pix_per_step = np.int(xy_window[1] * (1 - xy_overlap[1]))
    # Compute the number of windows in x/y
    nx_buffer = np.int(xy_window[0] * (xy_overlap[0]))
    ny_buffer = np.int(xy_window[1] * (xy_overlap[1]))
    nx_windows = np.int((xspan - nx_buffer) / nx_pix_per_step)
    ny_windows = np.int((yspan - ny_buffer) / ny_pix_per_step)
    # Initialize a list to append window positions to
    window_list = []
    # Loop through finding x and y window positions
    # Note: you could vectorize this step, but in practice
    # you'll be considering windows one by one with your
    # classifier, so looping makes sense
    for ys in range(ny_windows):
        for xs in range(nx_windows):
            # Calculate window position
            startx = xs * nx_pix_per_step + x_start_stop[0]
            endx = startx + xy_window[0]
            starty = ys * ny_pix_per_step + y_start_stop[0]
            endy = starty + xy_window[1]

            # Append window position to list
            window_list.append(((startx, starty), (endx, endy)))
    # Return the list of windows
    return window_list

预测一张图片

目的:传入一张图片,找到图片汽车的位置

遍历所有的窗口并返回有车的窗口

#寻找有车的窗口
def search_windows(img, windows, clf, scaler, color_space='RGB',
                    spatial_size=(32, 32), hist_bins=32,
                    hist_range=(0, 256), orient=9,
                    pix_per_cell=8, cell_per_block=2,
                    hog_channel=0, spatial_feat=True,
                    hist_feat=True, hog_feat=True):   
    on_windows = []
    # 遍历所有窗口
    for window in windows:
        # 获取窗口所在的图片
        test_img = cv2.resize(img[window[0][1]:window[1][1], window[0][0]:window[1][0]], (64, 64))
        # 获取窗口对应图片的特征
        features = single_img_features(test_img, color_space=color_space,
                            spatial_size=spatial_size, hist_bins=hist_bins,
                            orient=orient, pix_per_cell=pix_per_cell,
                            cell_per_block=cell_per_block,
                            hog_channel=hog_channel, spatial_feat=spatial_feat,
                            hist_feat=hist_feat, hog_feat=hog_feat)
        # 标准化
        test_features = scaler.transform(np.array(features).reshape(1, -1))
        # 预测结果
        prediction = clf.predict(test_features)
        # 如果预测为车辆,则添加到返回的list中
        if prediction == 1:
            on_windows.append(window)
    #8) Return windows for positive detections
    return on_windows

画方框


def draw_boxes(img, bboxes, color=(0, 0, 255), thick=6):
    imcopy = np.copy(img)
    for bbox in bboxes:
        cv2.rectangle(imcopy, bbox[0], bbox[1], color, thick)
    return imcopy

测试一张图

def test_one(image, svc, X_scaler,ifGetOne=True):
    draw_image = np.copy(image)
    # 获取所有滑动的窗口
    windows = slide_window(image, x_start_stop=[None, None], y_start_stop=[400, 600],
                           xy_window=(128, 128), xy_overlap=(0.85, 0.85))
	# 获取有车的窗口
    hot_windows = search_windows(image, windows, svc, X_scaler, color_space=color_space,
                                 spatial_size=spatial_size, hist_bins=hist_bins,
                                 orient=orient, pix_per_cell=pix_per_cell,
                                 cell_per_block=cell_per_block,
                                 hog_channel=hog_channel, spatial_feat=spatial_feat,
                                 hist_feat=hist_feat, hog_feat=hog_feat)
	# 把所有有车的窗口画上一个方框
    window_img = draw_boxes(draw_image, hot_windows, color=(0, 0, 255), thick=6)        
    return window_img

测试代码

    try:
        svc = joblib.load('save/model.m')
        X_scaler = joblib.load('save/scaler.m')
    except:
        svc, X_scaler = train_test()
    img = cv2.imread(str)
    finalImg = test_one(img, svc, X_scaler)
    cv2.imshow("image",finalImg)

这里写图片描述

热力图

热力图是定义一个和图像一样大小的0矩阵,把被分类为汽车窗口所在的值+1,这样子,被多个窗口圈中的像素点就是汽车所在区域可信度最高的地方。而且可以通过过滤仅仅被圈中一次或多次的区域,去除错误的区域
这里写图片描述

这里写图片描述

#热力图
# 把检测出车的区域热力图+1
def add_heat(heatmap, bbox_list):
    # Iterate through list of bboxes
    for box in bbox_list:
        # Add += 1 for all pixels inside each bbox
        # Assuming each "box" takes the form ((x1, y1), (x2, y2))
        heatmap[box[0][1]:box[1][1], box[0][0]:box[1][0]] += 1

    # Return updated heatmap
    return heatmap# Iterate through list of bboxes

# 取出大于等于阈值threshold的热力图
def apply_threshold(heatmap, threshold):
    # Zero out pixels below the threshold
    heatmap[heatmap <= threshold] = 0
    # Return thresholded map
    return heatmap

# 绘制热力图
def draw_labeled_bboxes_old(img, labels):
    # Iterate through all detected cars
    for car_number in range(1, labels[1]+1):
        # Find pixels with each car_number label value
        nonzero = (labels[0] == car_number).nonzero()
        # Identify x and y values of those pixels
        nonzeroy = np.array(nonzero[0])
        nonzerox = np.array(nonzero[1])
        # Define a bounding box based on min/max x and y
        bbox = ((np.min(nonzerox), np.min(nonzeroy)), (np.max(nonzerox), np.max(nonzeroy)))
        # Draw the box on the image
        cv2.rectangle(img, bbox[0], bbox[1], (0,0,255), 6)
    # Return the image
    return img


# 获取热力图集中的区域
def GetMultiHeat(image, hot_windows, track_list, Y_MIN, THRES_LEN):
    heat = np.zeros_like(image[:, :, 0]).astype(np.float)
    heat = add_heat(heat, hot_windows)
    # 移除错误的区域
    heat = apply_threshold(heat, 1)
    # 限制数量在可显示的范围内0~255
    heatmap = np.clip(heat, 0, 255)
    labels = label(heatmap)
    #cars_boxes = draw_labeled_bboxes(labels)
    boxes, track_list = draw_labeled_bboxes(labels, track_list, Y_MIN, THRES_LEN)
    draw_img = np.copy(image)
    for bbox in boxes:
        cv2.rectangle(draw_img, bbox[0], bbox[1], (0, 0, 255), 6)
    return draw_img, track_list

修改test_one函数


def test_one(img, svc, X_scaler,ifGetOne=True):
    global track_list, Y_MIN, THRES_LEN    
    image = img 
    draw_image = np.copy(image)
    windows = slide_window(image, x_start_stop=[None, None], y_start_stop=[400, 600],
                           xy_window=(128, 128), xy_overlap=(0.85, 0.85))

    hot_windows = search_windows(image, windows, svc, X_scaler, color_space=color_space,
                                 spatial_size=spatial_size, hist_bins=hist_bins,
                                 orient=orient, pix_per_cell=pix_per_cell,
                                 cell_per_block=cell_per_block,
                                 hog_channel=hog_channel, spatial_feat=spatial_feat,
                                 hist_feat=hist_feat, hog_feat=hog_feat)
    if ifGetOne:
        window_img,track_list = GetMultiHeat(image, hot_windows,track_list, Y_MIN, THRES_LEN)
    else:
        window_img = draw_boxes(draw_image, hot_windows, color=(0, 0, 255), thick=6)
    return window_img

测试代码

try:
    svc = joblib.load('save/model.m')
    X_scaler = joblib.load('save/scaler.m')
except:
    svc, X_scaler = train_test()
img = cv2.imread(str)
finalImg = test_one(img, svc, X_scaler)
cv2.imshow("image",finalImg)

这里写图片描述

读取视频文件和定位视频文件中汽车

cap = cv2.VideoCapture('project_video.mp4')
try:
    svc = joblib.load('save/model.m')
    X_scaler = joblib.load('save/scaler.m')
except:
    svc, X_scaler = train_test()
while (cap.isOpened()):
	ret, img = cap.read()
    if img is None:
	    break
    finalImg = test_one(img, svc, X_scaler)
    cv2.imshow('Contours', finalImg)
    k = cv2.waitKey(10)
    if k == 27:
	    break
  • 1
    点赞
  • 17
    收藏
    觉得还不错? 一键收藏
  • 3
    评论
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值