roi+camshift+kalman行人跟踪

 

import cv2
import numpy as np
import os.path as path
import argparse

parser = argparse.ArgumentParser()
parser.add_argument("-a", "--algorithm", help = "m (or nothing) for meanShift and c for camshift")
args = vars(parser.parse_args())


def center(points):
    x = (points[0][0] + points[1][0] + points[2][0] + points[3][0]) / 4
    y = (points[0][1] + points[1][1] + points[2][1] + points[3][1]) / 4
    return np.array([np.float32(x), np.float32(y)], np.float32)

font = cv2.FONT_HERSHEY_PLAIN


class Pedestrain():
    def __init__(self, id, frame, track_window):
        self.id = int(id)
        x, y, w, h = track_window
        self.track_window = track_window
        self.roi = cv2.cvtColor(frame[y:y+h, x:x+w], cv2.COLOR_BGR2HSV)
        roi_hist = cv2.calcHist([self.roi], [0], None, [16], [0, 180])
        self.roi_hist = cv2.normalize(roi_hist, roi_hist, 0, 255, cv2.NORM_MINMAX)

        self.kalman = cv2.KalmanFilter(4, 2)
        self.kalman.measurementMatrix = np.array([[1, 0, 0, 0], [0, 1, 0, 0]], np.float32)
        self.kalman.transitionMatrix = np.array([[1, 0, 1, 0], [0, 1, 0, 1], [0, 0, 1, 0], [0, 0, 0, 1]], np.float32)
        self.kalman.processNoiseCov = np.array([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]], np.float32) * 0.03
        self.measurement = np.array((2, 1), np.float32)
        self.prediction = np.zeros((2, 1), np.float32)
        self.term_crit = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 1)
        self.center = None
        self.update(frame)

    def __del_(self):
        print "Pedestrain %d destoryed" % self.id

    def update(self, frame):
        hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)

        back_project = cv2.calcBackProject([hsv], [0], self.roi_hist, [0, 180], 1)

        if args.get("algorithm") == "c":
            ret, self.track_window = cv2.CamShift(back_project, self.track_window, self.term_crit)
            pts = cv2.boxPoints(ret)
            pts = np.int0(pts)
            self.center = center(pts)
            cv2.polylines(frame, [pts], True, 255, 1)

        if not args.get("algorithm") or args.get("algorithm") == "m":
            ret, self.track_window = cv2.meanShift(back_project, self.track_window, self.term_crit)
            x, y, w, h = self.track_window
            self.center = center([[x, y], [x+w, y], [x, y+h], [x+w, y+h]])
            cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 255, 0), 2)

        self.kalman.correct(self.center)
        prediction = self.kalman.predict()
        cv2.circle(frame, (int(prediction[0]), int(prediction[1])), 4, (255, 0, 0), -1)
        cv2.putText(frame, "ID:%d -> %s" % (self.id, self.center), (11, (self.id +1)*25+1), font, 0.6, (0, 0, 0), 1, cv2.LINE_AA)
        cv2.putText(frame, "ID:%d -> %s" % (self.id, self.center), (10, (self.id +1)*25), font, 0.6, (0, 255, 0), 1, cv2.LINE_AA)


def main():
    camera = cv2.VideoCapture(path.join(path.dirname(__file__), "/home/utryjc/Pictures/vtest.avi"))
    history = 20
    bs = cv2.createBackgroundSubtractorKNN()
    cv2.namedWindow("surveillance")
    pedestrains = {}
    firstFrame = True
    frames = 0
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter('output.avi', fourcc, 20.0, (640, 480))
    while True:
        print "----------------------FRAME %d------------------" % frames
        grabbed, frame = camera.read()
        if (grabbed is False):
            print "fail to grab frame"
            break

        fgmask = bs.apply(frame)

        if frames < history:
            frames += 1
            continue

        th = cv2.threshold(fgmask.copy(), 127, 255, cv2.THRESH_BINARY)[1]
        th = cv2.erode(th, cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3)), iterations = 2)
        dilated = cv2.dilate(th, cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (8, 3)), iterations = 2)
        image, contours, hier = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        counter = 0
        for c in contours:
            if cv2.contourArea(c) > 500:
                (x, y, w, h) = cv2.boundingRect(c)
                cv2.rectangle(frame, (x, y), (x+w, y+h),(0, 255, 0), 1)
                if firstFrame is True:
                    pedestrains[counter] = Pedestrain(counter, frame, (x, y, w, h))
                    counter += 1

        for i, p in pedestrains.iteritems():
            p.update(frame)

        firstFrame = False
        frames += 1

        cv2.imshow("surveillance", frame)
        out.write(frame)

        if cv2.waitKey(110) & 0xff == 27:
            break

    out.release()
    camera.release()


if __name__ == '__main__':
    main()

程序解释 

def main():
    camera = cv2.VideoCapture(path.join(path.dirname(__file__), "/home/utryjc/Pictures/vtest.avi"))
初始化背景分割器,初始化20针作为影响背景模型的针
    history = 20
    bs = cv2.createBackgroundSubtractorKNN()
    cv2.namedWindow("surveillance")
设置任意字典和firstFrame标志(该标志使得背景分割器能利用这些帧来构建历史)
    pedestrains = {}
    firstFrame = True
设立一个帧计数器来更好的识别运动的对象
    frames = 0
    
/定义视频编码器 //这里有必要提一下fourcc //FourCC全称Four-Character Codes,代表四字符代码 (four character code), 它是一个32位的标示符,其实就是typedef unsigned int FOURCC;是一种独立标示视频数据流格式的四字符代码。 //因此cv2.VideoWriter_fourcc()函数的作用是输入四个字符代码即可得到对应的视频编码器。 fourcc = cv2.VideoWriter_fourcc(*'XVID')//使用XVID编码器 out = cv2.VideoWriter('output.avi',fourcc, 20.0, (640,480))//出分别是:保存文件名、编码器、帧率、视频宽高
fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter('output.avi', fourcc, 20.0, (640, 480))
一行一行的读取视频帧
    while True:
        print "----------------------FRAME %d------------------" % frames
        grabbed, frame = camera.read()
        if (grabbed is False):
            print "fail to grab frame"
            break

        fgmask = bs.apply(frame)

        if frames < history:
            frames += 1
            continue

cv2.threshold(src, thresh, maxval, type[, dst]) → retval, dst

src:表示的是图片源
thresh:表示的是阈值(起始值)
maxval:表示的是最大值
type:表示的是这里划分的时候使用的是什么类型的算法,常用值为0(cv2.THRESH_BINARY)cv2.threshold(fgmask.copy(), 127, 255, cv2.THRESH_BINARY)[1]#图像阈值处理
th = cv2.erode(th, cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3)), iterations = 2)#对输入图像用特定结构元素进行腐蚀操作

dilated = cv2.dilate(th, cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (8, 3)), iterations = 2) 函数可以对输入图像用特定结构元素进行膨胀操作

image, contours, hier = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

一旦识别到了轮廓,便只是对第一帧中行人的每个轮廓进行实例化,这里对轮廓设置了最小区域,以便能对轮廓进行降噪
counter = 0
for c in contours:#每一个行人对象
    if cv2.contourArea(c) > 500:
        (x, y, w, h) = cv2.boundingRect(c)
        cv2.rectangle(frame, (x, y), (x+w, y+h),(0, 255, 0), 1)
        if firstFrame is True:
             pedestrains[counter] = Pedestrain(counter, frame, (x, y, w, h))
             counter += 1

对于每个检测到的行人,都执行update()函数来传递当前帧,这需要在其原来的色彩空间进行,因为行人对象负责绘制它自己的信息
  for i, p in pedestrains.iteritems():

这里更新的时候又调用了kalman滤波来预测行人
  p.update(frame)


firstFrame设置为False表示不会跟踪更多的行人,只会跟踪现有的行人
  firstFrame = False
  frames += 1

  cv2.imshow("surveillance", frame)
  out.write(frame)

  if cv2.waitKey(110) & 0xff == 27:
      break

out.release()
camera.release()

 

import cv2
import numpy as np
import os.path as path

这个参数主要是在命令行使用
import argparse

parser = argparse.ArgumentParser()

假如我们在命令行输入一个文件名加 m或者c,则对应algorithm就为m或者c      help用来描述这个选项的作用是选择meanshift还是camshift
parser.add_argument("-a", "--algorithm", help = "m (or nothing) for meanShift and c for camshift")
获取属性的值
args = vars(parser.parse_args())


def center(points):
    x = (points[0][0] + points[1][0] + points[2][0] + points[3][0]) / 4
    y = (points[0][1] + points[1][1] + points[2][1] + points[3][1]) / 4
    return np.array([np.float32(x), np.float32(y)], np.float32)

font = cv2.FONT_HERSHEY_PLAIN

创建Pedestrian(行人)类的主要原因是卡尔曼滤波器的性质。卡尔曼滤波器可以通过历史观测来预测对象的位置,然后根据实际数据来矫正预测,但只能对一个对象执行这些操作。因此,每个被跟踪的对象都需要一个卡尔曼滤波器。由此,Pedestrian类会包含卡尔曼滤波器、彩色直方图以及感兴趣区域的信息,这些信息会被CAMShift算法使用。此外,为了获得一些实时信息,需要存储每个行人的ID。
该程序的核心在于背景分割器对象,它能识别感兴趣的区域和与之相对应的运动目标。
当程序启动时,会提取每个区域,实例化Pedestrian类,传递ID,帧以及跟踪窗口的坐标(因此可以提取感兴趣区域(Region of interest,ROI),进而提取ROI的HSV直方图)
class Pedestrain():
    def __init__(self, id, frame, track_window):#计算给定的ROI直方图,设置kalman滤波器,并将其与对象属性关联        self.id = int(id)
        x, y, w, h = track_window
        self.track_window = track_window
        self.roi = cv2.cvtColor(frame[y:y+h, x:x+w], cv2.COLOR_BGR2HSV)
        roi_hist = cv2.calcHist([self.roi], [0], None, [16], [0, 180])
        self.roi_hist = cv2.normalize(roi_hist, roi_hist, 0, 255, cv2.NORM_MINMAX)

        self.kalman = cv2.KalmanFilter(4, 2) #cv2.KalmanFilter(4,2)表示转移矩阵维度为4,测量矩阵维度为2
        self.kalman.measurementMatrix = np.array([[1, 0, 0, 0], [0, 1, 0, 0]], np.float32)#设置测量矩阵
        self.kalman.transitionMatrix = np.array([[1, 0, 1, 0], [0, 1, 0, 1], [0, 0, 1, 0], [0, 0, 0, 1]], np.float32)#设置状态矩阵
        self.kalman.processNoiseCov = np.array([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]], np.float32) * 0.03#协方差矩阵
        self.measurement = np.array((2, 1), np.float32)
        self.prediction = np.zeros((2, 1), np.float32)
        self.term_crit = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 1)
        self.center = None
        self.update(frame)

    def __del_(self):
        print "Pedestrain %d destoryed" % self.id

    def update(self, frame):
        hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)

        back_project = cv2.calcBackProject([hsv], [0], self.roi_hist, [0, 180], 1)#计算hsv直方图的反向投影

#使用CAMShift来跟踪行人的运动,并根据行人的实际位置校正kalman滤波器
        if args.get("algorithm") == "c":
            ret, self.track_window = cv2.CamShift(back_project, self.track_window, self.term_crit)
            pts = cv2.boxPoints(ret)
            pts = np.int0(pts)
            self.center = center(pts)
            cv2.polylines(frame, [pts], True, 255, 1)
使用均值漂移来跟踪行人的运动,并根据行人的实际位置校正kalman滤波器        if not args.get("algorithm") or args.get("algorithm") == "m":
            ret, self.track_window = cv2.meanShift(back_project, self.track_window, self.term_crit)
            x, y, w, h = self.track_window
            self.center = center([[x, y], [x+w, y], [x, y+h], [x+w, y+h]])
            cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 255, 0), 2)

        self.kalman.correct(self.center)
        prediction = self.kalman.predict()
        cv2.circle(frame, (int(prediction[0]), int(prediction[1])), 4, (255, 0, 0), -1)
在图的左上角打印行人信息
        cv2.putText(frame, "ID:%d -> %s" % (self.id, self.center), (11, (self.id +1)*25+1), font, 0.6, (0, 0, 0), 1, cv2.LINE_AA)
        cv2.putText(frame, "ID:%d -> %s" % (self.id, self.center), (10, (self.id +1)*25), font, 0.6, (0, 255, 0), 1, cv2.LINE_AA)



if __name__ == '__main__':
main()

实验效果

参考链接

http://sugar918.com/index.php/2018/03/28/opencv_3_ji_suan_ji_shi_jue_8_yi_ge_ji_yu_xing_ren_gen_zong_de_li_zi/

https://blog.csdn.net/sinat_21258931/article/details/61418681

  • 1
    点赞
  • 20
    收藏
    觉得还不错? 一键收藏
  • 2
    评论
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值