Mediapipe 基于KNIFT如何输出识别数据

最新推荐文章于 2022-09-19 21:00:00 发布

圣地亚GG

最新推荐文章于 2022-09-19 21:00:00 发布

阅读量2.3k

点赞数

分类专栏： java android 文章标签： android 计算机视觉

本文链接：https://blog.csdn.net/a245387976/article/details/121360592

版权

java 同时被 2 个专栏收录

4 篇文章 1 订阅

订阅专栏

android

1 篇文章 0 订阅

订阅专栏

前言:由于knift识别demo例子没有输出识别结果,只把图像修改了.那就搞一个,方便做业务了.

1.修改模型

文件:mediapipe\graphs\template_matching\template_matching_mobile_cpu.pbtxt

修改后文件,见标签新增1,新增2

# MediaPipe graph that performs template matching with TensorFlow Lite on CPU.
# Used in the examples in
# mediapipe/examples/android/src/java/com/mediapipe/apps/templatematchingcpu

# Images on GPU coming into and out of the graph.
input_stream: "input_video"
output_stream: "output_video"
output_stream: "TEMPLATELABLE:template_lable" #新增1
# Throttles the images flowing downstream for flow control.
node {
  calculator: "FlowLimiterCalculator"
  input_stream: "input_video"
  input_stream: "FINISHED:detections"
  input_stream_info: {
    tag_index: "FINISHED"
    back_edge: true
  }
  output_stream: "throttled_input_video"
}

# Transfers the input image from GPU to CPU memory.
node: {
  calculator: "GpuBufferToImageFrameCalculator"
  input_stream: "throttled_input_video"
  output_stream: "input_video_cpu"
}

# Scale the image's longer side to 640, keeping aspect ratio.
node: {
  calculator: "ImageTransformationCalculator"
  input_stream: "IMAGE:input_video_cpu"
  output_stream: "IMAGE:transformed_input_video_cpu"
  node_options: {
    [type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
      output_width: 640
      output_height: 640
      scale_mode: FILL_AND_CROP
    }
  }
}

node {
  calculator: "ImagePropertiesCalculator"
  input_stream: "IMAGE:transformed_input_video_cpu"
  output_stream: "SIZE:input_video_size"
}

node {
  calculator: "FeatureDetectorCalculator"
  input_stream: "IMAGE:transformed_input_video_cpu"
  output_stream: "FEATURES:features"
  output_stream: "LANDMARKS:landmarks"
  output_stream: "PATCHES:patches"
}

# input tensors: 200*32*32*1 float
# output tensors: 200*40 float, only first keypoint.size()*40 is knift features,
# rest is padded by zero.
node {
  calculator: "TfLiteInferenceCalculator"
  input_stream: "TENSORS:patches"
  output_stream: "TENSORS:knift_feature_tensors"
  node_options: {
    [type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] {
      model_path: "mediapipe/models/knift_float.tflite"
      delegate { xnnpack {} }
    }
  }
}

node {
  calculator: "TfLiteTensorsToFloatsCalculator"
  input_stream: "TENSORS:knift_feature_tensors"
  output_stream: "FLOATS:knift_feature_floats"
}

node {
  calculator: "BoxDetectorCalculator"
  input_stream: "FEATURES:features"
  input_stream: "IMAGE_SIZE:input_video_size"
  input_stream: "DESCRIPTORS:knift_feature_floats"
  output_stream: "BOXES:detections"

  node_options: {
    [type.googleapis.com/mediapipe.BoxDetectorCalculatorOptions] {
      detector_options {
        index_type: OPENCV_BF
        detect_every_n_frame: 1
      }
      index_proto_filename: "mediapipe/models/knift_index.pb"
    }
  }
}

node {
  calculator: "TimedBoxListIdToLabelCalculator"
  input_stream: "detections"
  output_stream: "labeled_detections"
  output_stream: "CLASSIFICATIONS:template_lable" #新增2
  node_options: {
    [type.googleapis.com/mediapipe.TimedBoxListIdToLabelCalculatorOptions] {
      label_map_path: "mediapipe/models/knift_labelmap.txt"
    }
  }
}

node {
  calculator: "TimedBoxListToRenderDataCalculator"
  input_stream: "BOX_LIST:labeled_detections"
  output_stream: "RENDER_DATA:box_render_data"
  node_options: {
    [type.googleapis.com/mediapipe.TimedBoxListToRenderDataCalculatorOptions] {
      box_color { r: 255 g: 0 b: 0 }
      thickness: 5.0
    }
  }
}

node {
  calculator: "LandmarksToRenderDataCalculator"
  input_stream: "NORM_LANDMARKS:landmarks"
  output_stream: "RENDER_DATA:landmarks_render_data"
  node_options: {
    [type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
      landmark_color { r: 0 g: 255 b: 0 }
      thickness: 2.0
    }
  }
}

# Draws annotations and overlays them on top of the input images.
node {
  calculator: "AnnotationOverlayCalculator"
  input_stream: "IMAGE_GPU:throttled_input_video"
  input_stream: "box_render_data"
  input_stream: "landmarks_render_data"
  output_stream: "IMAGE_GPU:output_video"
}

通过Visualizer 工具粘贴上面代码查看其模型

2.根据TfLiteInferenceCalculator函数标签找到实际函数文件

文件:mediapipe\calculators\util\timed_box_list_id_to_label_calculator.cc

修改后文件,见标签新增1,新增2,新增3,新增4

// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "absl/container/node_hash_map.h"
#include "mediapipe/calculators/util/timed_box_list_id_to_label_calculator.pb.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/packet.h"
#include "mediapipe/framework/port/status.h"
#include "mediapipe/util/resource_util.h"
#include "mediapipe/util/tracking/box_tracker.pb.h"

#if defined(MEDIAPIPE_MOBILE)
#include "mediapipe/util/android/file/base/file.h"
#include "mediapipe/util/android/file/base/helpers.h"
#else
#include "mediapipe/framework/port/file_helpers.h"
#endif

namespace mediapipe {

constexpr char kTopKLabelsTag[] = "CLASSIFICATIONS"; //新增1

using mediapipe::TimedBoxProto;
using mediapipe::TimedBoxProtoList;

// Takes a label map (from label IDs to names), and populate the label field in
// TimedBoxProto according to it's ID.
//
// Example usage:
// node {
//   calculator: "TimedBoxListIdToLabelCalculator"
//   input_stream: "input_timed_box_list"
//   output_stream: "output_timed_box_list"
//   node_options: {
//     [mediapipe.TimedBoxListIdToLabelCalculatorOptions] {
//       label_map_path: "labelmap.txt"
//     }
//   }
// }
class TimedBoxListIdToLabelCalculator : public CalculatorBase {
 public:
  static absl::Status GetContract(CalculatorContract* cc);

  absl::Status Open(CalculatorContext* cc) override;
  absl::Status Process(CalculatorContext* cc) override;

 private:
  absl::node_hash_map<int, std::string> label_map_;
};
REGISTER_CALCULATOR(TimedBoxListIdToLabelCalculator);

absl::Status TimedBoxListIdToLabelCalculator::GetContract(
    CalculatorContract* cc) {
  cc->Inputs().Index(0).Set<TimedBoxProtoList>();
  cc->Outputs().Index(0).Set<TimedBoxProtoList>();

  //新增2 定义输出流数据类型
  if (cc->Outputs().HasTag(kTopKLabelsTag)) {
    cc->Outputs().Tag(kTopKLabelsTag).Set<std::string>();
  }

  return absl::OkStatus();
}

absl::Status TimedBoxListIdToLabelCalculator::Open(CalculatorContext* cc) {
  cc->SetOffset(TimestampDiff(0));

  const auto& options =
      cc->Options<::mediapipe::TimedBoxListIdToLabelCalculatorOptions>();

  std::string string_path;
  ASSIGN_OR_RETURN(string_path, PathToResourceAsFile(options.label_map_path()));
  std::string label_map_string;
  MP_RETURN_IF_ERROR(file::GetContents(string_path, &label_map_string));

  std::istringstream stream(label_map_string);
  std::string line;
  int i = 0;
  while (std::getline(stream, line)) {
    label_map_[i++] = line;
  }
  return absl::OkStatus();
}

absl::Status TimedBoxListIdToLabelCalculator::Process(CalculatorContext* cc) {
  const auto& input_list = cc->Inputs().Index(0).Get<TimedBoxProtoList>();
  auto output_list = absl::make_unique<TimedBoxProtoList>();

  //新增3 定义识别变量
  std::string *match_text;
  match_text = new std::string("-");
  for (const auto& input_box : input_list.box()) {
    TimedBoxProto* box_ptr = output_list->add_box();
    *box_ptr = input_box;

    if (label_map_.find(input_box.id()) != label_map_.end()) {
      box_ptr->set_label(label_map_[input_box.id()]);
      match_text =  new std::string(label_map_[input_box.id()]);
    }

  }
  cc->Outputs().Index(0).Add(output_list.release(), cc->InputTimestamp());
 
  //新增4 输出output流
  if (cc->Outputs().HasTag(kTopKLabelsTag)) {
    cc->Outputs()
        .Tag(kTopKLabelsTag)
        .Add(match_text,cc->InputTimestamp());
  }
  return absl::OkStatus();
}

}  // namespace mediapipe

3.生成aar文件

bazel build -c opt --strip=ALWAYS \
    --host_crosstool_top=@bazel_tools//tools/cpp:toolchain \
    --fat_apk_cpu=arm64-v8a,armeabi-v7a \
     mediapipe/examples/android/src/java/com/google/mediapipe/apps/aar_example:my_template_match_aar

4.android项目引入aar文件

修改识别模块的activity文件

/**
 *
 * Main activity of MediaPipe example apps.
 */
public class TemplateMatchActivity extends FragmentActivity implements View.OnClickListener {
    private static final String TAG = "TemplateMatchActivity";

    private static final String BINARY_GRAPH_NAME = "mobile_cpu.binarypb";
    private static final String INPUT_VIDEO_STREAM_NAME = "input_video";
    private static final String OUTPUT_VIDEO_STREAM_NAME = "output_video";
    private static final int CONVERTER_NUM_BUFFERS = 1;
    private static final CameraHelper.CameraFacing CAMERA_FACING = CameraHelper.CameraFacing.BACK;

    //识别后输出标签
    private static final String OUTPUT_TEMPLATE_STREAM_NAME = "template_lable";
 
 
    @Override
    protected void onCreate(Bundle savedInstanceState) {
        Log.d(TAG, "onCreate: ");
        super.onCreate(savedInstanceState);
        setContentView(getContentViewLayoutResId());
        backLayout = (LinearLayout) findViewById(R.id.back_layout);
        backLayout.setOnClickListener(this);

        try {
            applicationInfo = getPackageManager().getApplicationInfo(getPackageName(), PackageManager.GET_META_DATA);
        } catch (NameNotFoundException e) {
            Log.e(TAG, "Cannot find application info: " + e);
        }

        previewDisplayView = new SurfaceView(this);
        setupPreviewDisplayView();

        // Initialize asset manager so that MediaPipe native libraries can access the app assets, e.g.,
        // binary graphs.
        AndroidAssetUtil.initializeNativeAssetManager(this);
        eglManager = new EglManager(null);
        processor = new FrameProcessor(
                this,
                eglManager.getNativeContext(),
                BINARY_GRAPH_NAME,
                INPUT_VIDEO_STREAM_NAME,
                OUTPUT_VIDEO_STREAM_NAME);
        processor.getVideoSurfaceOutput()
                .setFlipY(FLIP_FRAMES_VERTICALLY);
        PermissionHelper.checkAndRequestCameraPermissions(this);


        //识别结果的关键代码
        processor.addPacketCallback(
                OUTPUT_TEMPLATE_STREAM_NAME,
                (packet) -> {
                    String result = PacketGetter.getString(packet);
                    Log.v(TAG, "识别后返回的结果:"+  result +"_"+ packet.getTimestamp());
                    switch (result) {
                        case "right":
                            EventBus.getDefault().post(new MessageEvent("setRoll", "1"));
                            break;
                        case "left":
                            EventBus.getDefault().post(new MessageEvent("setRoll", "-1"));
                            break;
                        case "stop":
                            EventBus.getDefault().post(new MessageEvent("setStop", "0"));
                            break;
                        default:
                            break;
                    }
                });
    }


}