YOLO_tensorflow-master代码下载:
https://github.com/gliese581gg/YOLO_tensorflow
1.模型介绍
YOLO_tensorflow
(Version 0.2, Last updated :2016.02.16)
1.Introduction
This is tensorflow implementation of the YOLO:Real-Time Object Detection
It can only do predictions using pretrained YOLO_small & YOLO_tiny network for now.
I'm gonna support training later.
I extracted weight values from darknet's (.weight) files.
Original code(C implementation) & paper : http://pjreddie.com/darknet/yolo/
2.Install
(1) Download code
(2) Download YOLO weight file from
YOLO_small : https://drive.google.com/file/d/0B2JbaJSrWLpza08yS2FSUnV2dlE/view?usp=sharing
YOLO_tiny : https://drive.google.com/file/d/0B2JbaJSrWLpza0FtQlc3ejhMTTA/view?usp=sharing
(3) Put the 'YOLO_(version).ckpt' in the 'weight' folder of downloaded code
3.Usage
(1) direct usage with default settings (display on console, show output image, no output file writing)
python YOLO_(small or tiny)_tf.py -fromfile (input image filename)
(2) direct usage with custom settings
python YOLO_(small or tiny)_tf.py argvs
where argvs are
-fromfile (input image filename) : input image file
-disp_console (0 or 1) : whether display results on terminal or not
-imshow (0 or 1) : whether display result image or not
-tofile_img (output image filename) : output image file
-tofile_txt (output txt filename) : output text file (contains class, x, y, w, h, probability)
(3) import on other scripts
import YOLO_(small or tiny)_tf
yolo = YOLO_(small or tiny)_tf.YOLO_TF()
yolo.disp_console = (True or False, default = True)
yolo.imshow = (True or False, default = True)
yolo.tofile_img = (output image filename)
yolo.tofile_txt = (output txt filename)
yolo.filewrite_img = (True or False, default = False)
yolo.filewrite_txt = (True of False, default = False)
yolo.detect_from_file(filename)
yolo.detect_from_cvmat(cvmat)
4.Requirements
- Tensorflow
- Opencv2
5.Copyright
According to the LICENSE file of the original code,
- Me and original author hold no liability for any damages
- Do not use this on commercial!
6.Changelog
2016/02/15 : First upload!
2016/02/16 : Added YOLO_tiny, Fixed bug that ignores one of the boxes in grid when both boxes detected valid objects
2016/08/26 : Uploaded weight file converter! (darknet weight -> tensorflow ckpt)
2.模型使用
我使用YOLO时
出现
cv2.imread('./test/person.jpg')
读取到的图片为None.
解决办法 在代码最前面加
import cv2如果把 import cv2添加到 import YOLO_tiny_tf后面一样报None.获取不到图片
下面是我调研yolo的所有代码。
#encoding:utf-8 import cv2 import YOLO_tiny_tf yolo = YOLO_tiny_tf.YOLO_TF() yolo.disp_console = True yolo.imshow = True yolo.tofile_img = './test/ttt.jpg' yolo.tofile_txt = './test/ttt.txt' yolo.filewrite_img = True yolo.filewrite_txt = True filename = './test/person.jpg' # 读入图像 #im = cv2.imread('./test/person.jpg') yolo.detect_from_file(filename) #yolo.detect_from_cvmat(im) 运行结果
3.模型保存与运行
(1).将YOLO_ting_tf.py中的输入添加名字input,代码如下:
def build_networks(self): if self.disp_console : print "Building YOLO_tiny graph..." self.x = tf.placeholder('float32',[None,448,448,3],name="input")
(2).运行save_graph模型和权重一起保存
import os import cv2 import tensorflow as tf import numpy as np from tensorflow.python.framework import test_util import freeze_graph from YOLO_tiny_tf import YOLO_TF def save_graph(sess,output_path,checkpoint,checkpoint_state_name,input_graph_name,output_graph_name): checkpoint_prefix = os.path.join(output_path,checkpoint) saver = tf.train.Saver(tf.all_variables()) saver.save(sess, checkpoint_prefix, global_step=0,latest_filename=checkpoint_state_name) tf.train.write_graph(sess.graph.as_graph_def(),output_path, input_graph_name) # We save out the graph to disk, and then call the const conversion # routine. input_graph_path = os.path.join(output_path, input_graph_name) input_saver_def_path = "" input_binary = False input_checkpoint_path = checkpoint_prefix + "-0" output_node_names = "19_fc" restore_op_name = "save/restore_all" filename_tensor_name = "save/Const:0" output_graph_path = os.path.join(output_path, output_graph_name) clear_devices = False freeze_graph.freeze_graph(input_graph_path, input_saver_def_path, input_binary, input_checkpoint_path, output_node_names, restore_op_name, filename_tensor_name, output_graph_path,clear_devices, "") yolo=YOLO_TF() #with open("weights/small_model.pb","wb") as f: # f.write(yolo.sess.graph.as_graph_def().SerializeToString()) save_graph(yolo.sess,"/home/acer/pbMake/yolo","saved_checkpoint","checkpoint_state","yoloting_input_graph.pb","yoloting_output_graph.pb")(3).读取刚才保持的 yoloting_output_graph.pb,进行测试和检测
from __future__ import absolute_import from __future__ import division from __future__ import print_function import cv2 import tensorflow as tf import numpy as np def iou(box1, box2): tb = min(box1[0] + 0.5 * box1[2], box2[0] + 0.5 * box2[2]) - max(box1[0] - 0.5 * box1[2], box2[0] - 0.5 * box2[2]) lr = min(box1[1] + 0.5 * box1[3], box2[1] + 0.5 * box2[3]) - max(box1[1] - 0.5 * box1[3], box2[1] - 0.5 * box2[3]) if tb < 0 or lr < 0: intersection = 0 else: intersection = tb * lr return intersection / (box1[2] * box1[3] + box2[2] * box2[3] - intersection) def interpret_output(output): alpha = 0.1 threshold = 0.2 iou_threshold = 0.5 num_class = 20 num_box = 2 grid_size = 7 classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] w_img = 640 h_img = 480 probs = np.zeros((7, 7, 2, 20)) class_probs = np.reshape(output[0:980], (7, 7, 20)) scales = np.reshape(output[980:1078], (7, 7, 2)) boxes = np.reshape(output[1078:], (7, 7, 2, 4)) offset = np.transpose(np.reshape(np.array([np.arange(7)] * 14), (2, 7, 7)), (1, 2, 0)) boxes[:, :, :, 0] += offset boxes[:, :, :, 1] += np.transpose(offset, (1, 0, 2)) boxes[:, :, :, 0:2] = boxes[:, :, :, 0:2] / 7.0 boxes[:, :, :, 2] = np.multiply(boxes[:, :, :, 2], boxes[:, :, :, 2]) boxes[:, :, :, 3] = np.multiply(boxes[:, :, :, 3], boxes[:, :, :, 3]) boxes[:, :, :, 0] *= w_img boxes[:, :, :, 1] *= h_img boxes[:, :, :, 2] *= w_img boxes[:, :, :, 3] *= h_img for i in range(2): for j in range(20): probs[:, :, i, j] = np.multiply(class_probs[:, :, j], scales[:, :, i]) filter_mat_probs = np.array(probs >= threshold, dtype='bool') filter_mat_boxes = np.nonzero(filter_mat_probs) boxes_filtered = boxes[filter_mat_boxes[0], filter_mat_boxes[1], filter_mat_boxes[2]] probs_filtered = probs[filter_mat_probs] classes_num_filtered = np.argmax(filter_mat_probs, axis=3)[ filter_mat_boxes[0], filter_mat_boxes[1], filter_mat_boxes[2]] argsort = np.array(np.argsort(probs_filtered))[::-1] boxes_filtered = boxes_filtered[argsort] probs_filtered = probs_filtered[argsort] classes_num_filtered = classes_num_filtered[argsort] for i in range(len(boxes_filtered)): if probs_filtered[i] == 0: continue for j in range(i + 1, len(boxes_filtered)): if iou(boxes_filtered[i], boxes_filtered[j]) > iou_threshold: probs_filtered[j] = 0.0 filter_iou = np.array(probs_filtered > 0.0, dtype='bool') boxes_filtered = boxes_filtered[filter_iou] probs_filtered = probs_filtered[filter_iou] classes_num_filtered = classes_num_filtered[filter_iou] result = [] for i in range(len(boxes_filtered)): result.append( [classes[classes_num_filtered[i]], boxes_filtered[i][0], boxes_filtered[i][1], boxes_filtered[i][2], boxes_filtered[i][3], probs_filtered[i]]) return result def show_results(img, results): filewrite_img = False filewrite_txt = True img_cp = img.copy() if filewrite_txt: ftxt = open('./test/xsss.txt', 'w') for i in range(len(results)): x = int(results[i][1]) y = int(results[i][2]) w = int(results[i][3]) // 2 h = int(results[i][4]) // 2 cv2.rectangle(img_cp, (x - w, y - h), (x + w, y + h), (0, 255, 0), 2) cv2.rectangle(img_cp, (x - w, y - h - 20), (x + w, y - h), (125, 125, 125), -1) cv2.putText(img_cp, results[i][0] + ' : %.2f' % results[i][5], (x - w + 5, y - h - 7), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1) ftxt.write(results[i][0] + ',' + str(x) + ',' + str(y) + ',' + str(w) + ',' + str(h) + ',' + str( results[i][5]) + '\n') cv2.imwrite('./test/xlsld.jpg', img_cp) # produces the expected result. with tf.Graph().as_default(): output_graph_def = tf.GraphDef() output_graph_path = '/home/acer/pbMake/yolo/yoloting_output_graph.pb' x = tf.placeholder('float32', [None, 448, 448, 3]) with open(output_graph_path, "rb") as f: output_graph_def.ParseFromString(f.read()) _ = tf.import_graph_def(output_graph_def, name="") with tf.Session() as sess: tf.initialize_all_variables().run() input_x = sess.graph.get_tensor_by_name("input:0") print(input_x) output = sess.graph.get_tensor_by_name("19_fc:0") print(output) filename = './test/person.jpg' img = cv2.imread(filename) h_img, w_img, _ = img.shape img_resized = cv2.resize(img, (448, 448)) img_RGB = cv2.cvtColor(img_resized, cv2.COLOR_BGR2RGB) img_resized_np = np.asarray(img_RGB) inputs = np.zeros((1, 448, 448, 3), dtype='float32') inputs[0] = (img_resized_np / 255.0) * 2.0 - 1.0 #input_node = sess.graph.get_operation_by_name("input") in_dict = {input_x: inputs} net_output = sess.run(output, {input_x: inputs}) print("net_output", net_output) #net_output = sess.run(output_node, feed_dict=in_dict) result = interpret_output(net_output[0]) show_results(img, result)
运行结果,与直接运行模型的结果一样。