转成caffe2需要安装以下依赖
pip install graphviz
sudo apt-get install graphviz
pip install pydot
较新的onnx.optimizer删除了 , 改为了onnxoptimizer。
export_model.py修改如下
#!/usr/bin/env python
# Copyright (c) Facebook, Inc. and its affiliates.
import argparse
import os
from typing import Dict, List, Tuple
import torch
from torch import Tensor, nn
import detectron2.data.transforms as T
from detectron2.checkpoint import DetectionCheckpointer
from detectron2.config import get_cfg
from detectron2.data import build_detection_test_loader, detection_utils
from detectron2.evaluation import COCOEvaluator, inference_on_dataset, print_csv_format
from detectron2.export import (
Caffe2Tracer,
TracingAdapter,
add_export_config,
dump_torchscript_IR,
scripting_with_instances,
)
from detectron2.modeling import GeneralizedRCNN, RetinaNet, build_model
from detectron2.modeling.postprocessing import detector_postprocess
from detectron2.projects.point_rend import add_pointrend_config
from detectron2.structures import Boxes
from detectron2.utils.env import TORCH_VERSION
from detectron2.utils.file_io import PathManager
from detectron2.utils.logger import setup_logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()
# import some common libraries
import numpy as np
import os, json, cv2, random
# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.structures import BoxMode
from detectron2.utils.visualizer import ColorMode
def setup_cfg(args):
cfg = get_cfg()
# cuda context is initialized before creating dataloader, so we don't fork anymore
cfg.DATALOADER.NUM_WORKERS = 0
cfg = add_export_config(cfg)
add_pointrend_config(cfg)
cfg.merge_from_file(args.config_file)
cfg.merge_from_list(args.opts)
# cfg.freeze()
return cfg
def export_caffe2_tracing(cfg, torch_model, inputs):
tracer = Caffe2Tracer(cfg, torch_model, inputs)
if args.format == "caffe2":
caffe2_model = tracer.export_caffe2()
caffe2_model.save_protobuf(args.output)
# draw the caffe2 graph
caffe2_model.save_graph(os.path.join(args.output, "model.svg"), inputs=inputs)
return caffe2_model
elif args.format == "onnx":
import onnx
onnx_model = tracer.export_onnx()
onnx.save(onnx_model, os.path.join(args.output, "model.onnx"))
elif args.format == "torchscript":
ts_model = tracer.export_torchscript()
with PathManager.open(os.path.join(args.output, "model.ts"), "wb") as f:
torch.jit.save(ts_model, f)
dump_torchscript_IR(ts_model, args.output)
# experimental. API not yet final
def export_scripting(torch_model):
assert TORCH_VERSION >= (1, 8)
fields = {
"proposal_boxes": Boxes,
"objectness_logits": Tensor,
"pred_boxes": Boxes,
"scores": Tensor,
"pred_classes": Tensor,
"pred_masks": Tensor,
"pred_keypoints": torch.Tensor,
"pred_keypoint_heatmaps": torch.Tensor,
}
assert args.format == "torchscript", "Scripting only supports torchscript format."
class ScriptableAdapterBase(nn.Module):
# Use this adapter to workaround https://github.com/pytorch/pytorch/issues/46944
# by not retuning instances but dicts. Otherwise the exported model is not deployable
def __init__(self):
super().__init__()
self.model = torch_model
self.eval()
if isinstance(torch_model, GeneralizedRCNN):
class ScriptableAdapter(ScriptableAdapterBase):
def forward(self, inputs: Tuple[Dict[str, torch.Tensor]]) -> List[Dict[str, Tensor]]:
instances = self.model.inference(inputs, do_postprocess=False)
return [i.get_fields() for i in instances]
else:
class ScriptableAdapter(ScriptableAdapterBase):
def forward(self, inputs: Tuple[Dict[str, torch.Tensor]]) -> List[Dict[str, Tensor]]:
instances = self.model(inputs)
return [i.get_fields() for i in instances]
ts_model = scripting_with_instances(ScriptableAdapter(), fields)
with PathManager.open(os.path.join(args.output, "model.ts"), "wb") as f:
torch.jit.save(ts_model, f)
dump_torchscript_IR(ts_model, args.output)
# TODO inference in Python now missing postprocessing glue code
return None
# experimental. API not yet final
def export_tracing(torch_model, inputs):
assert TORCH_VERSION >= (1, 8)
image = inputs[0]["image"]
inputs = [{"image": image}] # remove other unused keys
if isinstance(torch_model, GeneralizedRCNN):
def inference(model, inputs):
# use do_postprocess=False so it returns ROI mask
inst = model.inference(inputs, do_postprocess=False)[0]
return [{"instances": inst}]
else:
inference = None # assume that we just call the model directly
traceable_model = TracingAdapter(torch_model, inputs, inference)
if args.format == "torchscript":
ts_model = torch.jit.trace(traceable_model, (image,))
with PathManager.open(os.path.join(args.output, "model.ts"), "wb") as f:
torch.jit.save(ts_model, f)
dump_torchscript_IR(ts_model, args.output)
elif args.format == "onnx":
with PathManager.open(os.path.join(args.output, "model.onnx"), "wb") as f:
torch.onnx.export(traceable_model, (image,), f, opset_version=11)
logger.info("Inputs schema: " + str(traceable_model.inputs_schema))
logger.info("Outputs schema: " + str(traceable_model.outputs_schema))
if args.format != "torchscript":
return None
if not isinstance(torch_model, (GeneralizedRCNN, RetinaNet)):
return None
def eval_wrapper(inputs):
"""
The exported model does not contain the final resize step, which is typically
unused in deployment but needed for evaluation. We add it manually here.
"""
input = inputs[0]
instances = traceable_model.outputs_schema(ts_model(input["image"]))[0]["instances"]
postprocessed = detector_postprocess(instances, input["height"], input["width"])
return [{"instances": postprocessed}]
return eval_wrapper
def get_sample_inputs(args):
if args.sample_image is None:
# get a first batch from dataset
data_loader = build_detection_test_loader(cfg, cfg.DATASETS.TEST[0])
first_batch = next(iter(data_loader))
return first_batch
else:
# get a sample data
original_image = detection_utils.read_image(args.sample_image, format=cfg.INPUT.FORMAT)
# Do same preprocessing as DefaultPredictor
aug = T.ResizeShortestEdge(
[cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MIN_SIZE_TEST], cfg.INPUT.MAX_SIZE_TEST
)
height, width = original_image.shape[:2]
image = aug.get_transform(original_image).apply_image(original_image)
image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1))
inputs = {"image": image, "height": height, "width": width}
# Sample ready
sample_inputs = [inputs]
return sample_inputs
def get_balloon_dicts(img_dir):
json_file = os.path.join(img_dir, "via_region_data.json")
with open(json_file) as f:
imgs_anns = json.load(f)
dataset_dicts = []
for idx, v in enumerate(imgs_anns.values()):
record = {}
filename = os.path.join(img_dir, v["filename"])
height, width = cv2.imread(filename).shape[:2]
record["file_name"] = filename
record["image_id"] = idx
record["height"] = height
record["width"] = width
annos = v["regions"]
objs = []
for _, anno in annos.items():
assert not anno["region_attributes"]
anno = anno["shape_attributes"]
px = anno["all_points_x"]
py = anno["all_points_y"]
poly = [(x + 0.5, y + 0.5) for x, y in zip(px, py)]
poly = [p for x in poly for p in x]
obj = {
"bbox": [np.min(px), np.min(py), np.max(px), np.max(py)],
"bbox_mode": BoxMode.XYXY_ABS,
"segmentation": [poly],
"category_id": 0,
}
objs.append(obj)
record["annotations"] = objs
dataset_dicts.append(record)
return dataset_dicts
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Export a model for deployment.")
parser.add_argument(
"--format",
choices=["caffe2", "onnx", "torchscript"],
help="output format",
default="caffe2",
)
parser.add_argument(
"--export-method",
choices=["caffe2_tracing", "tracing", "scripting"],
help="Method to export models",
default="caffe2_tracing",
)
parser.add_argument("--config-file", default="", metavar="FILE", help="path to config file")
parser.add_argument("--sample-image", default=None, type=str, help="sample image for input")
parser.add_argument("--run-eval", action="store_true")
parser.add_argument("--output", help="output directory for the converted model")
parser.add_argument(
"opts",
help="Modify config options using the command-line",
default=None,
nargs=argparse.REMAINDER,
)
args = parser.parse_args()
logger = setup_logger()
logger.info("Command line arguments: " + str(args))
PathManager.mkdirs(args.output)
# Disable respecialization on new shapes. Otherwise --run-eval will be slow
torch._C._jit_set_bailout_depth(1)
cfg = setup_cfg(args)
for d in ["train", "val"]:
DatasetCatalog.register("balloon_" + d, lambda d=d: get_balloon_dicts("/home/wjd1/balloon/" + d))
MetadataCatalog.get("balloon_" + d).set(thing_classes=["balloon"])
cfg.DATASETS.TRAIN = ("balloon_train",)
cfg.DATASETS.TEST = ("balloon_val",)
cfg.DATALOADER.NUM_WORKERS = 0
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.00025 # pick a good LR
cfg.SOLVER.MAX_ITER = 1000 # 300 iterations seems good enough for this toy dataset; you will need to train longer for a practical dataset
cfg.SOLVER.STEPS = [] # do not decay learning rate
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128 # faster, and good enough for this toy dataset (default: 512)
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1 # only has one class (ballon). (see https://detectron2.readthedocs.io/tutorials/datasets.html#update-the-config-for-new-datasets)
# NOTE: this config means the number of classes, but a few popular unofficial tutorials incorrect uses num_classes+1 here.
# create a torch model
torch_model = build_model(cfg)
DetectionCheckpointer(torch_model).resume_or_load(cfg.MODEL.WEIGHTS)
torch_model.eval()
# get sample data
sample_inputs = get_sample_inputs(args)
# convert and save model
if args.export_method == "caffe2_tracing":
exported_model = export_caffe2_tracing(cfg, torch_model, sample_inputs)
elif args.export_method == "scripting":
exported_model = export_scripting(torch_model)
elif args.export_method == "tracing":
exported_model = export_tracing(torch_model, sample_inputs)
# run evaluation with the converted model
if args.run_eval:
assert exported_model is not None, (
"Python inference is not yet implemented for "
f"export_method={args.export_method}, format={args.format}."
)
logger.info("Running evaluation ... this takes a long time if you export to CPU.")
dataset = cfg.DATASETS.TEST[0]
data_loader = build_detection_test_loader(cfg, dataset)
# NOTE: hard-coded evaluator. change to the evaluator for your dataset
evaluator = COCOEvaluator(dataset, output_dir=args.output)
metrics = inference_on_dataset(exported_model, data_loader, evaluator)
print_csv_format(metrics)
setup_cfg里的cfg.freeze()注释掉
我这没有准备coco相关的数据集,用了官网的balloon,要修改下cfg文件,否则默认用的是coco的cfg,类别也对不上。
训练好的模型转onnx
./export_model.py --config-file ../../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml --output ./output --export-method caffe2_tracing --format onnx MODEL.WEIGHTS ../output/model_final.pth MODEL.DEVICE cpu
转caffe2
./export_model.py --config-file ../../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml --output ./output --export-method caffe2_tracing --format caffe2 MODEL.WEIGHTS ../output/model_final.pth MODEL.DEVICE cpu
转成caffe和onnx后就可以用推理框架了,tensorrt,ncnn等。
最后附上训练和推理的代码:
训练
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()
# import some common libraries
import numpy as np
import os, json, cv2, random
from detectron2.structures import BoxMode
# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog
def get_balloon_dicts(img_dir):
json_file = os.path.join(img_dir, "via_region_data.json")
with open(json_file) as f:
imgs_anns = json.load(f)
dataset_dicts = []
for idx, v in enumerate(imgs_anns.values()):
record = {}
filename = os.path.join(img_dir, v["filename"])
height, width = cv2.imread(filename).shape[:2]
record["file_name"] = filename
record["image_id"] = idx
record["height"] = height
record["width"] = width
annos = v["regions"]
objs = []
for _, anno in annos.items():
assert not anno["region_attributes"]
anno = anno["shape_attributes"]
px = anno["all_points_x"]
py = anno["all_points_y"]
poly = [(x + 0.5, y + 0.5) for x, y in zip(px, py)]
poly = [p for x in poly for p in x]
obj = {
"bbox": [np.min(px), np.min(py), np.max(px), np.max(py)],
"bbox_mode": BoxMode.XYXY_ABS,
"segmentation": [poly],
"category_id": 0,
}
objs.append(obj)
record["annotations"] = objs
dataset_dicts.append(record)
return dataset_dicts
for d in ["train", "val"]:
DatasetCatalog.register("balloon_" + d, lambda d=d: get_balloon_dicts("/home/wjd1/balloon/" + d))
MetadataCatalog.get("balloon_" + d).set(thing_classes=["balloon"])
from detectron2.engine import DefaultTrainer
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("balloon_train",)
cfg.DATASETS.TEST = ()
cfg.DATALOADER.NUM_WORKERS = 0
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml") # Let training initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.00025 # pick a good LR
cfg.SOLVER.MAX_ITER = 1000 # 300 iterations seems good enough for this toy dataset; you will need to train longer for a practical dataset
cfg.SOLVER.STEPS = [] # do not decay learning rate
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128 # faster, and good enough for this toy dataset (default: 512)
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1 # only has one class (ballon). (see https://detectron2.readthedocs.io/tutorials/datasets.html#update-the-config-for-new-datasets)
# NOTE: this config means the number of classes, but a few popular unofficial tutorials incorrect uses num_classes+1 here.
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = DefaultTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()
推理
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()
# import some common libraries
import numpy as np
import os, json, cv2, random
# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.structures import BoxMode
from detectron2.utils.visualizer import ColorMode
def get_balloon_dicts(img_dir):
json_file = os.path.join(img_dir, "via_region_data.json")
with open(json_file) as f:
imgs_anns = json.load(f)
dataset_dicts = []
for idx, v in enumerate(imgs_anns.values()):
record = {}
filename = os.path.join(img_dir, v["filename"])
height, width = cv2.imread(filename).shape[:2]
record["file_name"] = filename
record["image_id"] = idx
record["height"] = height
record["width"] = width
annos = v["regions"]
objs = []
for _, anno in annos.items():
assert not anno["region_attributes"]
anno = anno["shape_attributes"]
px = anno["all_points_x"]
py = anno["all_points_y"]
poly = [(x + 0.5, y + 0.5) for x, y in zip(px, py)]
poly = [p for x in poly for p in x]
obj = {
"bbox": [np.min(px), np.min(py), np.max(px), np.max(py)],
"bbox_mode": BoxMode.XYXY_ABS,
"segmentation": [poly],
"category_id": 0,
}
objs.append(obj)
record["annotations"] = objs
dataset_dicts.append(record)
return dataset_dicts
balloon_metadata = MetadataCatalog.get("balloon_train")
for d in ["train", "val"]:
DatasetCatalog.register("balloon_" + d, lambda d=d: get_balloon_dicts("/home/wjd1/balloon/" + d))
MetadataCatalog.get("balloon_" + d).set(thing_classes=["balloon"])
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("balloon_train",)
cfg.DATASETS.TEST = ()
cfg.DATALOADER.NUM_WORKERS = 0
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml") # Let training initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.00025 # pick a good LR
cfg.SOLVER.MAX_ITER = 1000 # 300 iterations seems good enough for this toy dataset; you will need to train longer for a practical dataset
cfg.SOLVER.STEPS = [] # do not decay learning rate
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128 # faster, and good enough for this toy dataset (default: 512)
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1 # only has one class (ballon). (see https://detectron2.readthedocs.io/tutorials/datasets.html#update-the-config-for-new-datasets)
# NOTE: this config means the number of classes, but a few popular unofficial tutorials incorrect uses num_classes+1 here.
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth") # path to the model we just trained
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7 # set a custom testing threshold
predictor = DefaultPredictor(cfg)
dataset_dicts = get_balloon_dicts("/home/wjd1/balloon/val")
for d in random.sample(dataset_dicts, 3):
im = cv2.imread(d["file_name"])
outputs = predictor(im) # format is documented at https://detectron2.readthedocs.io/tutorials/models.html#model-output-format
v = Visualizer(im[:, :, ::-1],
metadata=balloon_metadata,
scale=0.5,
instance_mode=ColorMode.IMAGE_BW # remove the colors of unsegmented pixels. This option is only available for segmentation models
)
out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
print(outputs)
cv2.imshow("ggg",out.get_image()[:, :, ::-1])
cv2.waitKey(0)