本文为复现B站上面视频,利用YOLO3官方权重进行识别,方便自己温习。
B站视频链接:搭建yolov3+tensorflow2.0开发环境 模型训练 小白篇 机器视觉 神经网络学习
步骤如下:
1·查看自己的显卡
2·搭建tensorflow2.0-cpu/gpu 开发环境
参考博客:
安装tensorflow2.0
3·win+R 输入cmd,使用cd命令进入到目标文件夹
4.输入 python convert.py 生成tf可用的模型
代码convert.py:
from absl import app, flags, logging
from absl.flags import FLAGS
import numpy as np
from yolov3_tf2.models import YoloV3, YoloV3Tiny
from yolov3_tf2.utils import load_darknet_weights
flags.DEFINE_string('weights', './data/yolov3.weights', 'path to weights file')#官方权重文件所在位置
flags.DEFINE_string('output', './checkpoints/yolov3.tf', 'path to output')#输出在该文件夹下
flags.DEFINE_boolean('tiny', False, 'yolov3 or yolov3-tiny')#降低了一些识别效果获得更高的帧率
flags.DEFINE_integer('num_classes', 80, 'number of classes in the model')#使用coco数据集为80分类
def main(_argv):
if FLAGS.tiny:
yolo = YoloV3Tiny(classes=FLAGS.num_classes)
else:
yolo = YoloV3(classes=FLAGS.num_classes)
yolo.summary()
logging.info('model created')
load_darknet_weights(yolo, FLAGS.weights, FLAGS.tiny)
logging.info('weights loaded')
img = np.random.random((1, 320, 320, 3)).astype(np.float32)
output = yolo(img)
logging.info('sanity check passed')
yolo.save_weights(FLAGS.output)
logging.info('weights saved')
if __name__ == '__main__':
try:
app.run(main)
except SystemExit:
pass
运行convert.py代码之前:
运行convert.py代码之后:
生成了适合tensorflow的模型文件。
5.识别
5.1 对图片(./data/people.jpg)进行预测,并且输出:
在cmd终端中输入:
python detect.py --image ./data/people.jpg
代码detect.py:
import time
from absl import app, flags, logging
from absl.flags import FLAGS
import cv2
import numpy as np
import tensorflow as tf
from yolov3_tf2.models import (
YoloV3, YoloV3Tiny)
from yolov3_tf2.dataset import transform_images, load_tfrecord_dataset
from yolov3_tf2.utils import draw_outputs
#传入参数 ./data/coco.names下存放着coco的80分类的名称
flags.DEFINE_string('classes', './data/coco.names', 'path to classes file')
#./checkpoints/yolov3.tf下是官方权重文件模型
flags.DEFINE_string('weights', './checkpoints/yolov3.tf',
'path to weights file')
flags.DEFINE_boolean('tiny', False, 'yolov3 or yolov3-tiny')
flags.DEFINE_integer('size', 416, 'resize images to')
flags.DEFINE_string('image', './data/test.jpg', 'path to input image')
flags.DEFINE_string('tfrecord', None, 'tfrecord instead of image')
#./output.jpg为预测图片后的输出结果
flags.DEFINE_string('output', './output.jpg', 'path to output image')
# 80分类 无需修改
flags.DEFINE_integer('num_classes', 80, 'number of classes in the model')
def main(_argv):
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if len(physical_devices) > 0:
tf.config.experimental.set_memory_growth(physical_devices[0], True)
if FLAGS.tiny:
yolo = YoloV3Tiny(classes=FLAGS.num_classes)
else:
yolo = YoloV3(classes=FLAGS.num_classes)
yolo.load_weights(FLAGS.weights).expect_partial()
logging.info('weights loaded')
class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
logging.info('classes loaded')
if FLAGS.tfrecord:
dataset = load_tfrecord_dataset(
FLAGS.tfrecord, FLAGS.classes, FLAGS.size)
dataset = dataset.shuffle(512)
img_raw, _label = next(iter(dataset.take(1)))
else:
img_raw = tf.image.decode_image(
open(FLAGS.image, 'rb').read(), channels=3)
img = tf.expand_dims(img_raw, 0)
img = transform_images(img, FLAGS.size)
t1 = time.time()
boxes, scores, classes, nums = yolo(img)
t2 = time.time()
logging.info('time: {}'.format(t2 - t1))
logging.info('detections:')
for i in range(nums[0]):
logging.info('\t{}, {}, {}'.format(class_names[int(classes[0][i])],
np.array(scores[0][i]),
np.array(boxes[0][i])))
img = cv2.cvtColor(img_raw.numpy(), cv2.COLOR_RGB2BGR)
img = draw_outputs(img, (boxes, scores, classes, nums), class_names)
cv2.imwrite(FLAGS.output, img)
logging.info('output saved to: {}'.format(FLAGS.output))
if __name__ == '__main__':
try:
app.run(main)
except SystemExit:
pass
运行detect.py之前:
运行:
运行之后多了一个:
打开之后是这样的:
5.2 打开摄像头进行预测
在终端中输入:python detect_video.py --video 0
其中0表示调用电脑的本地摄像头,如果写1,表示为外接的摄像头
代码detect_video.py:
import time
from absl import app, flags, logging
from absl.flags import FLAGS
import cv2
import tensorflow as tf
from yolov3_tf2.models import (
YoloV3, YoloV3Tiny)
from yolov3_tf2.dataset import transform_images
from yolov3_tf2.utils import draw_outputs
flags.DEFINE_string('classes', './data/coco.names', 'path to classes file')
flags.DEFINE_string('weights', './checkpoints/yolov3.tf',
'path to weights file')
flags.DEFINE_boolean('tiny', False, 'yolov3 or yolov3-tiny')
flags.DEFINE_integer('size', 416, 'resize images to')
#
flags.DEFINE_string('video', './data/video.mp4',
'path to video file or number for webcam)')
flags.DEFINE_string('output', None, 'path to output video')
flags.DEFINE_string('output_format', 'XVID', 'codec used in VideoWriter when saving video to file')
flags.DEFINE_integer('num_classes', 80, 'number of classes in the model')
def main(_argv):
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if len(physical_devices) > 0:
tf.config.experimental.set_memory_growth(physical_devices[0], True)
if FLAGS.tiny:
yolo = YoloV3Tiny(classes=FLAGS.num_classes)
else:
yolo = YoloV3(classes=FLAGS.num_classes)
yolo.load_weights(FLAGS.weights)
logging.info('weights loaded')
class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
logging.info('classes loaded')
times = []
try:
vid = cv2.VideoCapture(int(FLAGS.video))
except:
vid = cv2.VideoCapture(FLAGS.video)
out = None
if FLAGS.output:
# by default VideoCapture returns float instead of int
width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(vid.get(cv2.CAP_PROP_FPS))
codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))
while True:
_, img = vid.read()
if img is None:
logging.warning("Empty Frame")
time.sleep(0.1)
continue
img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img_in = tf.expand_dims(img_in, 0)
img_in = transform_images(img_in, FLAGS.size)
t1 = time.time()
boxes, scores, classes, nums = yolo.predict(img_in)
t2 = time.time()
times.append(t2-t1)
times = times[-20:]
img = draw_outputs(img, (boxes, scores, classes, nums), class_names)
img = cv2.putText(img, "Time: {:.2f}ms".format(sum(times)/len(times)*1000), (0, 30),
cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
if FLAGS.output:
out.write(img)
cv2.imshow('output', img)
if cv2.waitKey(1) == ord('q'):
break
cv2.destroyAllWindows()
if __name__ == '__main__':
try:
app.run(main)
except SystemExit:
pass
效果:
按Q退出。
5.3 对视频流进行预测,并且输出
在cmd终端中输入:python detect_video.py --video test.mp4 --output ./test_output.avi
test.mp4在当前路径下:
test.mp4内容截图:
在终端中输入:
代码运行之后会打开原始test.mp4,这时画面可能会加载得比较慢,可以按Q退出。
代码运行之后会生成一个:
打开该视频截图:
代码detect_video.py:
import time
from absl import app, flags, logging
from absl.flags import FLAGS
import cv2
import tensorflow as tf
from yolov3_tf2.models import (
YoloV3, YoloV3Tiny
)
from yolov3_tf2.dataset import transform_images
from yolov3_tf2.utils import draw_outputs
flags.DEFINE_string('classes', './data/coco.names', 'path to classes file')
flags.DEFINE_string('weights', './checkpoints/yolov3.tf',
'path to weights file')
flags.DEFINE_boolean('tiny', False, 'yolov3 or yolov3-tiny')
flags.DEFINE_integer('size', 416, 'resize images to')
flags.DEFINE_string('video', './data/video.mp4',
'path to video file or number for webcam)')
flags.DEFINE_string('output', None, 'path to output video')
flags.DEFINE_string('output_format', 'XVID', 'codec used in VideoWriter when saving video to file')
flags.DEFINE_integer('num_classes', 80, 'number of classes in the model')
def main(_argv):
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if len(physical_devices) > 0:
tf.config.experimental.set_memory_growth(physical_devices[0], True)
if FLAGS.tiny:
yolo = YoloV3Tiny(classes=FLAGS.num_classes)
else:
yolo = YoloV3(classes=FLAGS.num_classes)
yolo.load_weights(FLAGS.weights)
logging.info('weights loaded')
class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
logging.info('classes loaded')
times = []
try:
vid = cv2.VideoCapture(int(FLAGS.video))
except:
vid = cv2.VideoCapture(FLAGS.video)
out = None
if FLAGS.output:
# by default VideoCapture returns float instead of int
width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(vid.get(cv2.CAP_PROP_FPS))
codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))
while True:
_, img = vid.read()
if img is None:
logging.warning("Empty Frame")
time.sleep(0.1)
continue
img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img_in = tf.expand_dims(img_in, 0)
img_in = transform_images(img_in, FLAGS.size)
t1 = time.time()
boxes, scores, classes, nums = yolo.predict(img_in)
t2 = time.time()
times.append(t2-t1)
times = times[-20:]
img = draw_outputs(img, (boxes, scores, classes, nums), class_names)
img = cv2.putText(img, "Time: {:.2f}ms".format(sum(times)/len(times)*1000), (0, 30),
cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
if FLAGS.output:
out.write(img)
cv2.imshow('output', img)
if cv2.waitKey(1) == ord('q'):
break
cv2.destroyAllWindows()
if __name__ == '__main__':
try:
app.run(main)
except SystemExit:
pass