在OpenCV3.3版本发布中把DNN模块从扩展模块移到了OpenCV正式发布模块中,DNN模块最早来自Tiny-dnn,可以加载预先训练好的Caffe模型数据,后来OpenCV近一步扩展支持主流的深度学习框架模型数据的加载,常见的有如下:Caffe,TensorFlow,Torch/PyTorch 。OpenCV中DNN模块已经支持了下面这些经典的神经网络模块:
AlexNet
GoogLeNet v1 (also referred to as Inception-5h)
ResNet-34/50/...
SqueezeNet v1.1
VGG-based FCN (semantical segmentation network)
ENet (lightweight semantical segmentation network)
VGG-based SSD (object detection network) MobileNet-based SSD (light-weight object detection network)
本文将通过一个简单的例子来演示如何通过opencv调用自己构建并训练好的tensorflow模型。首先通过tensorflow自己构建一个简单的网络结构并进行训练,然后将训练好的网络结构保存为下来,供opencv调用。
第一步: 构建一个简单的卷积网络结构。
#导入包
import tensorflow as tf
import numpy as np
#import cv2
from matplotlib import pyplot as plt
from tensorflow.python.framework import graph_util
import time
logdir='./output/'
#导入npy格式的训练和测试数据
INPUT_DATA = 'F:\\py\\solder_processed_data2.npy'
BATCH=5
processed_data = np.load(INPUT_DATA)
inputsize=224
n_training_example = len(processed_data[0])
n_tr=n_training_example
n_v=len(processed_data[2])
n_t=len(processed_data[4])
training_images = np.reshape(processed_data[0],[n_tr,inputsize,inputsize,3])
training_labels = np.reshape(processed_data[1],[n_tr,2])
validation_images = np.reshape( processed_data[2],[n_v,inputsize,inputsize,3])
validation_labels = np.reshape( processed_data[3],[n_v,2])
testing_images =np.reshape(processed_data[4],[n_t,inputsize,inputsize,3])
testing_labels = np.reshape(processed_data[5],[n_t,2])
print("%d training examples, %d validation examples and %d testing examples." % (
n_training_example, len(validation_labels), len(testing_labels)))
#创建会话,定义两个卷积层
sess=tf.InteractiveSession()
def weight_variable(shape):
initial=tf.truncated_normal(shape, stddev=0.1, dtype=tf.float32)
return tf.Variable(initial)
def bias__variable(shape):
initial=tf.constant(0.1, dtype=None, shape=shape)
return tf.Variable(initial)
def conv2d(x,w):
return tf.nn.conv2d(x,w,strides=[1,1,1,1],padding='SAME');
def max_pool_2x2(x):
return tf.nn.max_pool(x,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME');
x=tf.placeholder(tf.float32,[None,inputsize,inputsize,3],name="input")
y_=tf.placeholder(tf.float32,[None,2],name="labels")
w_conv1=weight_variable([3,3,3,32])
b_conv1=bias__variable([32])
h_conv1=tf.nn.relu(conv2d(x,w_conv1)+b_conv1)
h_pool1=max_pool_2x2(h_conv1)
w_conv2=weight_variable([3,3,32,64])
b_conv2=bias__variable([64])
h_conv2=tf.nn.relu(conv2d(h_pool1,w_conv2)+b_conv2)
h_pool2=max_pool_2x2(h_conv2)
#全连接层
w_fc1=weight_variable([56*56*64,1024])
b_fc1=bias__variable([1024])
h_pool2_flag=tf.reshape(h_pool2,[-1,56*56*64])
h_fc1=tf.nn.relu(tf.matmul(h_pool2_flag,w_fc1)+b_fc1)
#全连接层
w_fc2=weight_variable([1024,512])
b_fc2=bias__variable([512])
h_fc2=tf.nn.relu(tf.matmul(h_fc1,w_fc2)+b_fc2)
#输出层
w_fc3=weight_variable([512,2])
b_fc3=bias__variable([2])
y_conv=tf.nn.softmax(tf.matmul(h_fc2,w_fc3)+b_fc3,name="output")
cross_entropy=tf.reduce_mean(-tf.reduce_sum(y_*tf.log(tf.clip_by_value(y_conv,1e-10,1.0)),reduction_indices=[1]))
train_step=tf.train.AdamOptimizer(0.00001).minimize(cross_entropy)
correct_prediction=tf.equal(tf.argmax(y_conv,1),tf.argmax(y_,1))
accurace=tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
tf.global_variables_initializer().run()
start = 0
end = BATCH
duaration=0
#开始训练
for i in range(500):
if i%50==0:
train_accurace=accurace.eval(feed_dict={x:training_images[0:n_tr],y_:np.reshape(training_labels[0:n_tr],(n_tr,2))})
print("step %d,train accurace %g"%(i,train_accurace))
print("step %d,one step/second %g"%(i,duaration))
start_time=time.time()
train_step.run(feed_dict={x:training_images[start:end],y_:np.reshape(training_labels[start: end],(end-start,2))})
duaration=time.time()-start_time
start = end
if start == n_training_example:
start = 0
end = start + BATCH
if end > n_training_example:
end = n_training_example
print("test accurace %g"%accurace.eval(feed_dict={x:testing_images[0: n_t],y_:np.reshape(testing_labels[0:n_t],(n_t,2))}))
#将训练好的模型保存下来为.pb文件
constant_graph = graph_util.convert_variables_to_constants(sess, sess.graph_def, ["output"])
with tf.gfile.FastGFile(logdir+'expert-graph2.pb', mode='wb') as f:
f.write(constant_graph.SerializeToString())
第二步: 本文采用opencv3.4中的DNN模块调用训练好的模型,输出预测结果。
#include <opencv2/dnn.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <Windows.h>
#include <fstream>
#include <iostream>
#include <cstdlib>
using namespace cv;
using namespace cv::dnn;
using namespace std;
//自己新建一个txt文件,写入分类的标签(一行写一个标签,例如二分类,第一行写good,第二行bad)
String labels_txt_file = "F:\\py\\实现卷积神经网络\\output\\expert-graph.txt";
String tf_pb_file = "F:\\py\\实现卷积神经网络\\output\\expert-graph.pb";
vector <String> readClassNames();
void main()
{
Mat src = imread("F:\\py\\solder\\bad\\20.jpg");
if (src.empty())
{
cout << "error:no img" << endl;
}
vector <String> labels=readClassNames();
Mat rgb;
int w = 224;
int h = 224;
resize(src, src, Size(w, h));
cvtColor(src, rgb, COLOR_BGR2RGB);
Net net = readNetFromTensorflow(tf_pb_file);
DWORD timestart = GetTickCount();
if (net.empty())
{
cout << "error:no model" << endl;
}
Mat inputBlob = blobFromImage(src, 0.00390625f, Size(w, h), Scalar(), true, false);
//inputBlob -= 117.0;
//执行图像分类
Mat prob;
net.setInput(inputBlob, "input");
prob = net.forward("output");
cout << prob << endl;
//prob=net.forward("softmax2");
//得到最大分类概率
Mat probMat= prob.reshape(1,1);
Point classNumber;
double classProb;
minMaxLoc(probMat, NULL, &classProb, NULL, &classNumber);
DWORD timeend = GetTickCount();
int classidx = classNumber.x;
printf("\n current image classification : %s, possible : %.2f\n" ,labels.at( classidx).c_str(),classProb);
cout << "用时(毫秒):" << timeend - timestart << endl;
// 显示文本
putText (src,labels.at( classidx),Point(20,20),FONT_HERSHEY_SIMPLEX,1.0,Scalar(0,0,255),2,8);
imshow("Image Classfication", src);
waitKey(0);
}
vector <String>readClassNames()
{
vector <String>classNames;
fstream fp(labels_txt_file);
if (!fp.is_open())
{
cout << "does not open"<<endl;
exit(-1);
}
string name;
while (!fp.eof())
{
getline(fp, name);
if (name.length())
classNames.push_back(name);
}
fp.close();
return classNames;
}
本文只给出了程序,不提供图片数据。