一、数据标注(Labelme)
windows环境:
首先安装Anaconda,Python3+版本。
安装成功后,打开Anaconda Prompt,然后依次输入以下命令。
# python3
conda create --name=labelme python=3.6 #创建虚拟环境
conda activate labelme #激活虚拟环境
pip install pyqt5 # pyqt5 can be installed via pip on python3
pip install labelme
labelme
ubuntu环境:
conda create --name=labelme python=3.6 #创建虚拟环境
conda activate labelme #激活虚拟环境
sudo apt-get install python3-pyqt5 #还没安装pyqt5,需要安装pyqt5
pip install pillow #还没安装pillow,安装pillow
pip install labelme
labelme
二、转化为Yolo格式的txt标注文件
json标注数据转换为txt标注数据
import json
import os
import os.path as osp
import PIL.Image
import yaml
from labelme import utils
import numpy as np
def main():
json_path = "D:/深度学习/datasets/dataset"
count = os.listdir(json_path)
labels = {'label1': 0, 'label2': 1}
for i in range(0, len(count)):
path = os.path.join(json_path, count[i])
if os.path.isfile(path) and path.endswith('json'):
res = {}
data = json.load(open(path, 'rb'))
height = data['imageHeight']
width = data['imageWidth']
path_name = data['imagePath']
path_name = path_name.split('\\')[-1].split('.')[0]
img = utils.img_b64_to_arr(data['imageData'])
label_name_to_value = {'_background': 0}
seg_cnt = 0
for shape in data['shapes']:
label_name = shape['label']
if label_name not in label_name_to_value:
label_value = len(label_name_to_value)
label_name_to_value[label_name] = label_value
points = shape['points']
points = np.array(points)
points[:, 0] /= width
points[:, 1] /= height
points = points.reshape((1, points.shape[0]*points.shape[1]))[0]
points = list(np.round(points, 6))
points.insert(0, labels[label_name])
res[seg_cnt] = points
seg_cnt += 1
# label_values must be dense
label_values, label_names = [], []
for ln, lv in sorted(label_name_to_value.items(), key=lambda x: x[1]):
label_values.append(lv)
label_names.append(ln)
assert label_values == list(range(len(label_values)))
lbl = utils.shapes_to_label(img.shape, data['shapes'], label_name_to_value)
# 生成的四个文件存在train_dataset文件夹里面
if not os.path.exists("train_dataset"):
os.mkdir("train_dataset")
label_path = "train_dataset/mask"
if not os.path.exists(label_path):
os.mkdir(label_path)
img_path = "train_dataset/img"
if not os.path.exists(img_path):
os.mkdir(img_path)
yaml_path = "train_dataset/yaml"
if not os.path.exists(yaml_path):
os.mkdir(yaml_path)
txt_path = "train_dataset/txt"
if not os.path.exists(txt_path):
os.mkdir(txt_path)
# save image and mask
PIL.Image.fromarray(img).save(osp.join(img_path, path_name + '.bmp'))
utils.lblsave(osp.join(label_path, path_name + '.png'), lbl[0])
# save label
info = dict(label_names=label_names)
with open(osp.join(yaml_path, path_name + '.yaml'), 'w') as f:
yaml.safe_dump(info, f, default_flow_style=False)
# save txt segment label
txt_path_name = os.path.join(txt_path, count[i].replace('.json', '.txt'))
with open(txt_path_name, 'w+') as f:
for key, value in res.items():
for ii in range(len(value)):
yolo_obj_line = '%s ' % value[ii]
f.write(yolo_obj_line)
if key + 1 != len(res):
f.write('\n')
print('Saved : %s' % path_name)
if __name__ == '__main__':
main()
三、部署模型(opencv)
使用opencv C++部署yolov8实例分割模型。
#include <fstream>
#include <string>
#include <vector>
#include <opencv2/opencv.hpp>
std::vector<std::string> load_class_list(std::string class_names_path)
{
std::vector<std::string> class_list;
std::ifstream ifs(class_names_path);
std::string line;
while (getline(ifs, line))
{
class_list.push_back(line);
}
return class_list;
}
void load_net(cv::dnn::Net& net, bool is_cuda, std::string onnx_model_path)
{
auto result = cv::dnn::readNet(onnx_model_path);
if (is_cuda)
{
std::cout << "Attempty to use CUDA\n";
result.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
result.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA_FP16);
}
else
{
std::cout << "Running on CPU\n";
result.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
result.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
}
net = result;
}
const std::vector<cv::Scalar> colors = { cv::Scalar(255, 255, 0), cv::Scalar(0, 255, 0), cv::Scalar(0, 255, 255), cv::Scalar(255, 0, 0) };
const float INPUT_WIDTH = 640.0;
const float INPUT_HEIGHT = 640.0;
const float SCORE_THRESHOLD = 0.45;
const float NMS_THRESHOLD = 0.5;
struct Detection
{
int class_id;
float confidence;
cv::Rect box;
cv::Mat mask;
};
cv::Mat format_yolo(const cv::Mat& source) {
int col = source.cols;
int row = source.rows;
int _max = MAX(col, row);
cv::Mat result = cv::Mat::zeros(_max, _max, CV_8UC3);
source.copyTo(result(cv::Rect(0, 0, col, row)));
return result;
}
void detect(cv::Mat& image, cv::dnn::Net& net, std::vector<Detection>& output, const std::vector<std::string>& className) {
cv::Mat blob;
auto input_image = format_yolo(image);
cv::dnn::blobFromImage(input_image, blob, 1. / 255., cv::Size(INPUT_WIDTH, INPUT_HEIGHT), cv::Scalar(), true, false);
net.setInput(blob);
std::vector<cv::Mat> outputs;
net.forward(outputs, net.getUnconnectedOutLayersNames());
float x_factor = input_image.cols / INPUT_WIDTH;
float y_factor = input_image.rows / INPUT_HEIGHT;
float sx = 160.0f / 640.0f;
std::cout << "outputs[0]: " << outputs[0].size << std::endl; // 1*116*8400 116=4+80+32
std::cout << "outputs[1]: " << outputs[1].size << std::endl; // 1*32*160*160
int dimensions = outputs[0].size[1]; // 116
int candidates = outputs[0].size[2]; // 8400
outputs[0] = outputs[0].reshape(1, dimensions); // 116*8400
cv::transpose(outputs[0], outputs[0]); // 8400*116
float* data = (float*)outputs[0].data;
float* data1 = (float*)outputs[1].data;
cv::Mat proto(outputs[1].size[1], outputs[1].size[2] * outputs[1].size[3], CV_32FC1, data1);
std::vector<int> class_ids; // class
std::vector<float> confidences; // conf
std::vector<cv::Rect> boxes; // box
std::vector<cv::Mat> masks; // mask
for (int i = 0; i < candidates; ++i)
{
float* classes_scores = data + 4;
cv::Mat scores(1, className.size(), CV_32FC1, classes_scores);
cv::Point class_id;
double maxClassScore;
minMaxLoc(scores, 0, &maxClassScore, 0, &class_id); // maxLoc
if (maxClassScore > SCORE_THRESHOLD)
{
float x = data[0];
float y = data[1];
float w = data[2];
float h = data[3];
float* mask32 = data + 4 + className.size();
int left = int((x - 0.5 * w) * x_factor);
int top = int((y - 0.5 * h) * y_factor);
int width = int(w * x_factor);
int height = int(h * y_factor);
boxes.push_back(cv::Rect(left, top, width, height)); // box
masks.push_back(cv::Mat(1, 32, CV_32FC1, mask32)); // mask
confidences.push_back(maxClassScore); // conf
class_ids.push_back(class_id.x); // class
}
data += dimensions;
}
std::vector<int> nms_result;
cv::dnn::NMSBoxes(boxes, confidences, SCORE_THRESHOLD, NMS_THRESHOLD, nms_result);
for (int i = 0; i < nms_result.size(); i++)
{
int idx = nms_result[i];
Detection result;
result.class_id = class_ids[idx];
result.confidence = confidences[idx];
result.box = boxes[idx];
cv::Mat mask = masks[idx];
cv::Mat res = mask * proto; // 1*32 32*25600 = 1*25600
res = res.reshape(1, 160); // 160*160
int bx1 = boxes[idx].x;
int by1 = boxes[idx].y;
int bx2 = boxes[idx].x + boxes[idx].width;
int by2 = boxes[idx].y + boxes[idx].height;
std::cout << bx1 << " " << by1 << " " << bx2 << " " << by2 << std::endl; // 32 32 623 351
int mx1 = bx1 * sx / x_factor;
int my1 = by1 * sx / y_factor;
int mx2 = std::min(159, int(bx2 * sx / x_factor));
int my2 = std::min(159, int(by2 * sx / y_factor));
std::cout << mx1 << " " << my1 << " " << mx2 << " " << my2 << std::endl; // 8 8 155 87
cv::Mat res_roi = res(cv::Range(my1, my2), cv::Range(mx1, mx2));
cv::resize(res_roi, res_roi, cv::Size(boxes[idx].width, boxes[idx].height)); // bw bh
cv::exp(-res_roi, res_roi); // sigmoid
res_roi = 1 / (1 + res_roi); // sigmoid
cv::Mat dst;
cv::compare(res_roi, 0.5, dst, cv::CMP_GT);
cv::Mat maskt = cv::Mat::zeros(cv::Size(image.cols, image.rows), CV_8UC1);
dst(cv::Range(0, by2 - by1), cv::Range(0, bx2 - bx1)).copyTo(maskt(cv::Range(by1, by2), cv::Range(bx1, bx2)));
result.mask = maskt;
output.push_back(result);
}
}
int main()
{
//std::string class_names_path = "classes.txt";
//std::string onnx_model_path = "yolov8s-seg.onnx";
//std::string input_image_path = "test_bus.jpg";
std::string class_names_path = "classes_two.txt";
std::string onnx_model_path = "yolov8_seg_two_class.onnx";
std::string input_image_path = "test2.bmp";
bool is_cuda = false;
std::vector<std::string> class_list = load_class_list(class_names_path);
cv::Mat frame = cv::imread(input_image_path);
cv::RNG rng;
cv::dnn::Net net;
load_net(net, is_cuda, onnx_model_path);
std::vector<Detection> output;
detect(frame, net, output, class_list);
cv::Mat rgb_mask = cv::Mat::zeros(frame.size(), frame.type());
for (int i = 0; i < output.size(); ++i)
{
auto detection = output[i];
auto box = detection.box;
auto classId = detection.class_id;
auto mask = detection.mask;
const auto color = colors[classId % colors.size()];
cv::rectangle(frame, box, color, 3);
cv::rectangle(frame, cv::Point(box.x, box.y - 20), cv::Point(box.x + box.width, box.y), color, cv::FILLED);
cv::putText(frame, class_list[classId].c_str(), cv::Point(box.x, box.y - 5), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
cv::add(rgb_mask, cv::Scalar(rng.uniform(0, 255), rng.uniform(0, 255), rng.uniform(0, 255)), rgb_mask, mask);
}
cv::Mat combineRes;
cv::addWeighted(frame, 0.6, rgb_mask, 0.4, 0, combineRes);
cv::imwrite("output.bmp", combineRes);
return 0;
}