原文:https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/multibox_detector
这接上面一个例子,用于做图像检测,首先还是下载模型:
wget https://storage.googleapis.com/download.tensorflow.org/models/mobile_multibox_v1a.zip
然后解压得到模型和location文件,在上面的url中的data目录有几个人游泳的照片,然后把照片下载到模型相同的目录既可以,CMakeLists.txt文件和上一篇博客相同,下面直接main.cpp文件:
#include <setjmp.h>
#include <stdio.h>
#include <string.h>
#include <fstream>
#include <vector>
#include "tensorflow/cc/ops/const_op.h"
#include "tensorflow/cc/ops/image_ops.h"
#include "tensorflow/cc/ops/standard_ops.h"
#include "tensorflow/core/framework/graph.pb.h"
#include "tensorflow/core/framework/tensor.h"
#include "tensorflow/core/graph/default_device.h"
#include "tensorflow/core/graph/graph_def_builder.h"
#include "tensorflow/core/lib/core/errors.h"
#include "tensorflow/core/lib/core/stringpiece.h"
#include "tensorflow/core/lib/core/threadpool.h"
#include "tensorflow/core/lib/io/path.h"
#include "tensorflow/core/lib/strings/str_util.h"
#include "tensorflow/core/lib/strings/stringprintf.h"
#include "tensorflow/core/platform/init_main.h"
#include "tensorflow/core/platform/logging.h"
#include "tensorflow/core/platform/types.h"
#include "tensorflow/core/public/session.h"
#include "tensorflow/core/util/command_line_flags.h"
// These are all common classes it's handy to reference with no namespace.
using tensorflow::Flag;
using tensorflow::Tensor;
using tensorflow::Status;
using tensorflow::string;
using tensorflow::int32;
using tensorflow::uint8;
// Takes a file name, and loads a list of comma-separated box priors from it,
// one per line, and returns a vector of the values.
Status ReadLocationsFile(const string& file_name, std::vector<float>* result,
size_t* found_label_count) {
std::ifstream file(file_name);
if (!file) {
return tensorflow::errors::NotFound("Labels file ", file_name,
" not found.");
}
result->clear();
string line;
while (std::getline(file, line)) {
std::vector<float> tokens;
CHECK(tensorflow::str_util::SplitAndParseAsFloats(line, ',', &tokens));
for (auto number : tokens) {
result->push_back(number);
}
}
*found_label_count = result->size();
return Status::OK();
}
// Given an image file name, read in the data, try to decode it as an image,
// resize it to the requested size, and then scale the values as desired.
Status ReadTensorFromImageFile(const string& file_name, const int input_height,
const int input_width, const float input_mean,
const float input_std,
std::vector<Tensor>* out_tensors) {
auto root = tensorflow::Scope::NewRootScope();
using namespace ::tensorflow::ops; // NOLINT(build/namespaces)
string input_name = "file_reader";
string original_name = "identity";
string output_name = "normalized";
auto file_reader =
tensorflow::ops::ReadFile(root.WithOpName(input_name), file_name);
// Now try to figure out what kind of file it is and decode it.
const int wanted_channels = 3;
tensorflow::Output image_reader;
if (tensorflow::str_util::EndsWith(file_name, ".png")) {
image_reader = DecodePng(root.WithOpName("png_reader"), file_reader,
DecodePng::Channels(wanted_channels));
} else if (tensorflow::str_util::EndsWith(file_name, ".gif")) {
image_reader = DecodeGif(root.WithOpName("gif_reader"), file_reader);
} else {
// Assume if it's neither a PNG nor a GIF then it must be a JPEG.
image_reader = DecodeJpeg(root.WithOpName("jpeg_reader"), file_reader,
DecodeJpeg::Channels(wanted_channels));
}
// Also return identity so that we can know the original dimensions and
// optionally save the image out with bounding boxes overlaid.
auto original_image = Identity(root.WithOpName(original_name), image_reader);
// Now cast the image data to float so we can do normal math on it.
auto float_caster = Cast(root.WithOpName("float_caster"), original_image,
tensorflow::DT_FLOAT);
// The convention for image ops in TensorFlow is that all images are expected
// to be in batches, so that they're four-dimensional arrays with indices of
// [batch, height, width, channel]. Because we only have a single image, we
// have to add a batch dimension of 1 to the start with ExpandDims().
auto dims_expander = ExpandDims(root, float_caster, 0);
// Bilinearly resize the image to fit the required dimensions.
auto resized = ResizeBilinear(
root, dims_expander,
Const(root.WithOpName("size"), {input_height, input_width}));
// Subtract the mean and divide by the scale.
Div(root.WithOpName(output_name), Sub(root, resized, {input_mean}),
{input_std});
// This runs the GraphDef network definition that we've just constructed, and
// returns the results in the output tensor.
tensorflow::GraphDef graph;
TF_RETURN_IF_ERROR(root.ToGraphDef(&graph));
std::unique_ptr<tensorflow::Session> session(
tensorflow::NewSession(tensorflow::SessionOptions()));
TF_RETURN_IF_ERROR(session->Create(graph));
TF_RETURN_IF_ERROR(
session->Run({}, {output_name, original_name}, {}, out_tensors));
return Status::OK();
}
Status SaveImage(const Tensor& tensor, const string& file_path) {
LOG(INFO) << "Saving image to " << file_path;
CHECK(tensorflow::str_util::EndsWith(file_path, ".png"))
<< "Only saving of png files is supported.";
auto root = tensorflow::Scope::NewRootScope();
using namespace ::tensorflow::ops; // NOLINT(build/namespaces)
string encoder_name = "encode";
string output_name = "file_writer";
tensorflow::Output image_encoder =
EncodePng(root.WithOpName(encoder_name), tensor);
tensorflow::ops::WriteFile file_saver = tensorflow::ops::WriteFile(
root.WithOpName(output_name), file_path, image_encoder);
tensorflow::GraphDef graph;
TF_RETURN_IF_ERROR(root.ToGraphDef(&graph));
std::unique_ptr<tensorflow::Session> session(
tensorflow::NewSession(tensorflow::SessionOptions()));
TF_RETURN_IF_ERROR(session->Create(graph));
std::vector<Tensor> outputs;
TF_RETURN_IF_ERROR(session->Run({}, {}, {output_name}, &outputs));
return Status::OK();
}
// Reads a model graph definition from disk, and creates a session object you
// can use to run it.
Status LoadGraph(const string& graph_file_name,
std::unique_ptr<tensorflow::Session>* session) {
tensorflow::GraphDef graph_def;
Status load_graph_status =
ReadBinaryProto(tensorflow::Env::Default(), graph_file_name, &graph_def);
if (!load_graph_status.ok()) {
return tensorflow::errors::NotFound("Failed to load compute graph at '",
graph_file_name, "'");
}
session->reset(tensorflow::NewSession(tensorflow::SessionOptions()));
Status session_create_status = (*session)->Create(graph_def);
if (!session_create_status.ok()) {
return session_create_status;
}
return Status::OK();
}
// Analyzes the output of the MultiBox graph to retrieve the highest scores and
// their positions in the tensor, which correspond to individual box detections.
Status GetTopDetections(const std::vector<Tensor>& outputs, int how_many_labels,
Tensor* indices, Tensor* scores) {
auto root = tensorflow::Scope::NewRootScope();
using namespace ::tensorflow::ops; // NOLINT(build/namespaces)
string output_name = "top_k";
TopK(root.WithOpName(output_name), outputs[0], how_many_labels);
// This runs the GraphDef network definition that we've just constructed, and
// returns the results in the output tensors.
tensorflow::GraphDef graph;
TF_RETURN_IF_ERROR(root.ToGraphDef(&graph));
std::unique_ptr<tensorflow::Session> session(
tensorflow::NewSession(tensorflow::SessionOptions()));
TF_RETURN_IF_ERROR(session->Create(graph));
// The TopK node returns two outputs, the scores and their original indices,
// so we have to append :0 and :1 to specify them both.
std::vector<Tensor> out_tensors;
TF_RETURN_IF_ERROR(session->Run({}, {output_name + ":0", output_name + ":1"},
{}, &out_tensors));
*scores = out_tensors[0];
*indices = out_tensors[1];
return Status::OK();
}
// Converts an encoded location to an actual box placement with the provided
// box priors.
void DecodeLocation(const float* encoded_location, const float* box_priors,
float* decoded_location) {
bool non_zero = false;
for (int i = 0; i < 4; ++i) {
const float curr_encoding = encoded_location[i];
non_zero = non_zero || curr_encoding != 0.0f;
const float mean = box_priors[i * 2];
const float std_dev = box_priors[i * 2 + 1];
float currentLocation = curr_encoding * std_dev + mean;
currentLocation = std::max(currentLocation, 0.0f);
currentLocation = std::min(currentLocation, 1.0f);
decoded_location[i] = currentLocation;
}
if (!non_zero) {
LOG(WARNING) << "No non-zero encodings; check log for inference errors.";
}
}
float DecodeScore(float encoded_score) { return 1 / (1 + exp(-encoded_score)); }
void DrawBox(const int image_width, const int image_height, int left, int top,
int right, int bottom, tensorflow::TTypes<uint8>::Flat* image) {
tensorflow::TTypes<uint8>::Flat image_ref = *image;
top = std::max(0, std::min(image_height - 1, top));
bottom = std::max(0, std::min(image_height - 1, bottom));
left = std::max(0, std::min(image_width - 1, left));
right = std::max(0, std::min(image_width - 1, right));
for (int i = 0; i < 3; ++i) {
uint8 val = i == 2 ? 255 : 0;
for (int x = left; x <= right; ++x) {
image_ref((top * image_width + x) * 3 + i) = val;
image_ref((bottom * image_width + x) * 3 + i) = val;
}
for (int y = top; y <= bottom; ++y) {
image_ref((y * image_width + left) * 3 + i) = val;
image_ref((y * image_width + right) * 3 + i) = val;
}
}
}
// Given the output of a model run, and the name of a file containing the labels
// this prints out the top five highest-scoring values.
Status PrintTopDetections(const std::vector<Tensor>& outputs,
const string& labels_file_name,
const int num_boxes,
const int num_detections,
const string& image_file_name,
Tensor* original_tensor) {
std::vector<float> locations;
size_t label_count;
Status read_labels_status =
ReadLocationsFile(labels_file_name, &locations, &label_count);
if (!read_labels_status.ok()) {
LOG(ERROR) << read_labels_status;
return read_labels_status;
}
CHECK_EQ(label_count, num_boxes * 8);
const int how_many_labels =
std::min(num_detections, static_cast<int>(label_count));
Tensor indices;
Tensor scores;
TF_RETURN_IF_ERROR(
GetTopDetections(outputs, how_many_labels, &indices, &scores));
tensorflow::TTypes<float>::Flat scores_flat = scores.flat<float>();
tensorflow::TTypes<int32>::Flat indices_flat = indices.flat<int32>();
const Tensor& encoded_locations = outputs[1];
auto locations_encoded = encoded_locations.flat<float>();
LOG(INFO) << original_tensor->DebugString();
const int image_width = original_tensor->shape().dim_size(1);
const int image_height = original_tensor->shape().dim_size(0);
tensorflow::TTypes<uint8>::Flat image_flat = original_tensor->flat<uint8>();
LOG(INFO) << "===== Top " << how_many_labels << " Detections ======";
for (int pos = 0; pos < how_many_labels; ++pos) {
const int label_index = indices_flat(pos);
const float score = scores_flat(pos);
float decoded_location[4];
DecodeLocation(&locations_encoded(label_index * 4),
&locations[label_index * 8], decoded_location);
float left = decoded_location[0] * image_width;
float top = decoded_location[1] * image_height;
float right = decoded_location[2] * image_width;
float bottom = decoded_location[3] * image_height;
LOG(INFO) << "Detection " << pos << ": "
<< "L:" << left << " "
<< "T:" << top << " "
<< "R:" << right << " "
<< "B:" << bottom << " "
<< "(" << label_index << ") score: " << DecodeScore(score);
DrawBox(image_width, image_height, left, top, right, bottom, &image_flat);
}
if (!image_file_name.empty()) {
return SaveImage(*original_tensor, image_file_name);
}
return Status::OK();
}
int main(int argc, char* argv[]) {
// These are the command-line flags the program can understand.
// They define where the graph and input data is located, and what kind of
// input the model expects. If you train your own model, or use something
// other than multibox_model you'll need to update these.
string image = "/Users/zhoumeixu/Downloads/mobile_multibox/surfers.jpg";
string graph = "/Users/zhoumeixu/Downloads/mobile_multibox/multibox_model.pb";
string box_priors = "/Users/zhoumeixu/Downloads/mobile_multibox/multibox_location_priors.txt";
int32 input_width = 224;
int32 input_height = 224;
int32 input_mean = 128;
int32 input_std = 128;
int32 num_detections = 5;
int32 num_boxes = 784;
string input_layer = "ResizeBilinear";
string output_location_layer = "output_locations/Reshape";
string output_score_layer = "output_scores/Reshape";
string root_dir = "";
string image_out = "";
std::vector<Flag> flag_list = {
Flag("image", &image, "image to be processed"),
Flag("image_out", &image_out,
"location to save output image, if desired"),
Flag("graph", &graph, "graph to be executed"),
Flag("box_priors", &box_priors, "name of file containing box priors"),
Flag("input_width", &input_width, "resize image to this width in pixels"),
Flag("input_height", &input_height,
"resize image to this height in pixels"),
Flag("input_mean", &input_mean, "scale pixel values to this mean"),
Flag("input_std", &input_std, "scale pixel values to this std deviation"),
Flag("num_detections", &num_detections,
"number of top detections to return"),
Flag("num_boxes", &num_boxes,
"number of boxes defined by the location file"),
Flag("input_layer", &input_layer, "name of input layer"),
Flag("output_location_layer", &output_location_layer,
"name of location output layer"),
Flag("output_score_layer", &output_score_layer,
"name of score output layer"),
Flag("root_dir", &root_dir,
"interpret image and graph file names relative to this directory"),
};
string usage = tensorflow::Flags::Usage(argv[0], flag_list);
const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list);
if (!parse_result) {
LOG(ERROR) << usage;
return -1;
}
// We need to call this to set up global state for TensorFlow.
tensorflow::port::InitMain(argv[0], &argc, &argv);
if (argc > 1) {
LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage;
return -1;
}
// First we load and initialize the model.
std::unique_ptr<tensorflow::Session> session;
string graph_path = tensorflow::io::JoinPath(root_dir, graph);
Status load_graph_status = LoadGraph(graph_path, &session);
if (!load_graph_status.ok()) {
LOG(ERROR) << load_graph_status;
return -1;
}
// Get the image from disk as a float array of numbers, resized and normalized
// to the specifications the main graph expects.
std::vector<Tensor> image_tensors;
string image_path = tensorflow::io::JoinPath(root_dir, image);
Status read_tensor_status =
ReadTensorFromImageFile(image_path, input_height, input_width, input_mean,
input_std, &image_tensors);
if (!read_tensor_status.ok()) {
LOG(ERROR) << read_tensor_status;
return -1;
}
const Tensor& resized_tensor = image_tensors[0];
// Actually run the image through the model.
std::vector<Tensor> outputs;
Status run_status =
session->Run({{input_layer, resized_tensor}},
{output_score_layer, output_location_layer}, {}, &outputs);
if (!run_status.ok()) {
LOG(ERROR) << "Running model failed: " << run_status;
return -1;
}
Status print_status = PrintTopDetections(outputs, box_priors, num_boxes,
num_detections, image_out,
&image_tensors[1]);
if (!print_status.ok()) {
LOG(ERROR) << "Running print failed: " << print_status;
return -1;
}
return 0;
}
结果:
2018-08-21 13:37:36.993811: :273] Tensor<type: uint8 shape: [228,480,3] values: [[158 141 147]]...>
2018-08-21 13:37:36.994527: :279] ===== Top 5 Detections ======
2018-08-21 13:37:36.994554: Detection 0: L:324.542 T:76.5764 R:373.26 B:214.957 (635) score: 0.267425
2018-08-21 13:37:36.994582: Detection 1: L:332.896 T:76.2751 R:372.116 B:204.614 (523) score: 0.245335
2018-08-21 13:37:36.994605: Detection 2: L:306.605 T:76.2228 R:371.356 B:217.32 (634) score: 0.21612
2018-08-21 13:37:36.994629: Detection 3: L:143.918 T:86.0909 R:187.333 B:195.885 (387) score: 0.171367
2018-08-21 13:37:36.994651: Detection 4: L:144.915 T:86.2675 R:185.243 B:165.246 (219) score: 0.169244