准备工具:
python3.6.2
pycharm-professional-2017.2.7
cuda_9.0.176_windows
cudnn-9.0-windows7-x64-v7.1
tensorflow-gpu v1.9.0
protoc-3.4.0-win32
ssd_mobilenet_v2_coco_2018_03_29.tar
Git-2.22.0-64-bit
下载tensorfow models https://github.com/tensorflow/models
在C:\Program Files\Python36\Lib\site-packages路径目录下,创建文件 tensorflow_model.pth
F:\tensorflow\models\research\slim
F:\tensorflow\models\research\object_detection
F:\tensorflow\models\research
下载protoc-3.4.0-win32.zip 编译tensorflow models支持python
https://github.com/protocolbuffers/protobuf/releases/tag/v3.4.0
准备数据:
视频数据 >> http://www.robots.ox.ac.uk/ActiveVision/Research/Projects/2009 bbenfold_headpose/Datasets/TownCentreXVID.avi
标注信息 >> http://www.robots.ox.ac.uk/ActiveVision/Research/Projects/2009 bbenfold_headpose/Datasets/TownCentre-groundtruth.top
将视频数据提取图片保存为jpg
import tensorflow as tf
import cv2 as cv
import os
def video2ims(src,train_path="images",test_path="test_images",factor=2):
os.mkdir(train_path)
os.mkdir(test_path)
frame = 0
cap = cv.VideoCapture(src)
counts = int(cap.get(cv.CAP_PROP_FRAME_COUNT))
w = int(cap.get(cv.CAP_PROP_FRAME_WIDTH))
h = int(cap.get(cv.CAP_PROP_FRAME_HEIGHT))
print("number of frames : %d"%counts)
while True:
ret, im = cap.read()
if ret is True:
if frame < 3600:
path = train_path
else:
path = test_path
im = cv.resize(im,(w//factor,h//factor))
cv.imwrite(os.path.join(path,str(frame)+".jpg"),im)
frame += 1
else:
break
cap.release()
if __name__=="__main__":
video2ims("F:/opencv/python/video/TownCentreXVID.avi")
生成图片的xml描述文件,需要把生成的jpg图片拷贝到JPEGImages文件夹下,执行代码后,将生成的xml文件拷贝到Annotations文件夹下
import os
import pandas as pd
if __name__ == '__main__':
GT = pd.read_csv('F:/opencv/python/video/TownCentre-groundtruth.top', header=None)
indent = lambda x, y: ''.join([' ' for _ in range(y)]) + x
factor = 2
train_size = 3600
os.mkdir('xmls')
name = 'pedestrian'
width, height = 1920 // factor, 1080 // factor
for frame_number in range(train_size):
Frame = GT.loc[GT[1] == frame_number]
x1 = list(Frame[8])
y1 = list(Frame[11])
x2 = list(Frame[10])
y2 = list(Frame[9])
points = [[(round(x1_), round(y1_)), (round(x2_), round(y2_))] for x1_, y1_, x2_, y2_ in zip(x1, y1, x2, y2)]
with open(os.path.join('xmls', str(frame_number) + '.xml'), 'w') as file:
file.write('<annotation>\n')
file.write(indent('<folder>voc2012</folder>\n', 1))
file.write(indent('<filename>' + str(frame_number) + '.jpg' + '</filename>\n', 1))
file.write(indent('<path>F:/pedestrian_data/VOC2012/JPEGImages/' + str(frame_number) + '.jpg' + '</path>\n', 1))
file.write(indent('<size>\n', 1))
file.write(indent('<width>' + str(width) + '</width>\n', 2))
file.write(indent('<height>' + str(height) + '</height>\n', 2))
file.write(indent('<depth>3</depth>\n', 2))
file.write(indent('</size>\n', 1))
for point in points:
top_left = point[0]
bottom_right = point[1]
if top_left[0] > bottom_right[0]:
xmax, xmin = top_left[0] // factor, bottom_right[0] // factor
else:
xmin, xmax = top_left[0] // factor, bottom_right[0] // factor
if top_left[1] > bottom_right[1]:
ymax, ymin = top_left[1] // factor, bottom_right[1] // factor
else:
ymin, ymax = top_left[1] // factor, bottom_right[1] // factor
file.write(indent('<object>\n', 1))
file.write(indent('<name>' + name + '</name>\n', 2))
file.write(indent('<pose>Unspecified</pose>\n', 2))
file.write(indent('<truncated>' + str(0) + '</truncated>\n', 2))
file.write(indent('<difficult>' + str(0) + '</difficult>\n', 2))
file.write(indent('<bndbox>\n', 2))
file.write(indent('<xmin>' + str(xmin) + '</xmin>\n', 3))
file.write(indent('<ymin>' + str(ymin) + '</ymin>\n', 3))
file.write(indent('<xmax>' + str(xmax) + '</xmax>\n', 3))
file.write(indent('<ymax>' + str(ymax) + '</ymax>\n', 3))
file.write(indent('</bndbox>\n', 2))
file.write(indent('</object>\n', 1))
file.write('</annotation>\n')
print('File:', frame_number, end='\r')
在ImageSets文件夹下创建文件夹Main
在Main文件夹下创建文件,开始创建的pedestrian_train.txt和pedestrian_val.txt的文件后,再后续的操作会直接提示错误 不能打开aeroplane_train.txt和aeroplane_val.txt文件
表示第n张图是否存在行人,1表示存在,0表示不存在,val同理
训练数据目录
在data目录下创建 label_map.pbtxt,这里只有一个对象,所以只需要写一个
参考pascal_label_map.pbtxt
打开cmd,生成 tf record
python object_detection/dataset_tools/create_pascal_tf_record.py --label_map_path=F:/tensorflow/pedestrian_train/data/label_map.pbtxt --data_dir=F:\pedestrian_data --year=VOC2012 --set=train --output_path=F:/tensorflow/pedestrian_train/data/pascal_train.record
python object_detection/dataset_tools/create_pascal_tf_record.py --label_map_path=F:/tensorflow/pedestrian_train/data/label_map.pbtxt --data_dir=F:\pedestrian_data --year=VOC2012 --set=val --output_path=F:/tensorflow/pedestrian_train/data/pascal_val.record