esp-idf的安装参考我前面的文章: esp32cam和esp32-s3烧录human_face_detect实现人脸识别
一、下载项目
- 训练、转换模型:ModelAssistant(main)
- 部署模型:sscma-example-esp32(1.0.0)
- 说明文档:sscma-model-zoo
二、环境
python3.8 + CUDA11.7 + esp-idf5.0
# 主要按照ModelAssistant/requirements_cuda.txt,如果训练时有库不兼容的问题可参考下方
torch 2.0.0+cu117
torchaudio 2.0.1+cu117
torchvision 0.15.1+cu117
yapf 0.40.2
typing_extensions 4.5.0
tensorboard 2.13.0
tensorboard-data-server 0.7.2
tensorflow 2.13.0
keras 2.13.1
tensorflow-estimator 2.13.0
tensorflow-intel 2.13.0
tensorflow-io-gcs-filesystem 0.31.0
sscma 2.0.0rc3
setuptools 60.2.0
rich 13.4.2
Pillow 9.4.0
mmcls 1.0.0rc6
mmcv 2.0.0
mmdet 3.0.0
mmengine 0.10.1
mmpose 1.2.0
mmyolo 0.5.0
报错
mmcv
如果pip直接安装报错,可参考:mmcv最正确安装方式,亲测有效- 在 Windows 环境下安装过程中遇到
“Microsoft Visual C++ 14.0 or graeter is required” error .
:三步解决error: Microsoft Visual C++ 14.0 or greater is required. Get it with “Microsoft C++ Build Tools“
三、训练和导出模型
- step 1: 将voc格式的标注文件转换为edgelab的训练格式,并按8:2的比例划分为训练集和验证集
import os
import json
import pandas as pd
from xml.etree import ElementTree as ET
from PIL import Image
import shutil
import random
from tqdm import tqdm
# Set paths
voc_path = 'F:/datasets/VOCdevkit/VOC2007'
train_path = 'F:/edgelab/ModelAssistant/datasets/myself/train'
valid_path = 'F:/edgelab/ModelAssistant/datasets/meself/valid'
# 只读取有目标的,且属于需要训练的类别
classes = ["face"]
# Create directories if not exist
if not os.path.exists(train_path):
os.makedirs(train_path)
if not os.path.exists(valid_path):
os.makedirs(valid_path)
# Get list of image files
image_files = os.listdir(os.path.join(voc_path, 'JPEGImages'))
random.seed(0)
random.shuffle(image_files)
# Split data into train and valid
train_files = image_files[:int(len(image_files)*0.8)]
valid_files = image_files[int(len(image_files)*0.8):]
# Convert train data to COCO format
train_data = {'categories': [], 'images': [], 'annotations': []}
train_ann_id = 0
train_cat_id = 0
img_id = 0
train_categories = {}
for file in tqdm(train_files):
# Add annotations
xml_file = os.path.join(voc_path, 'Annotations', file[:-4] + '.xml')
tree = ET.parse(xml_file)
root = tree.getroot()
for obj in root.findall('object'):
category = obj.find('name').text
if category not in classes:
continue
if category not in train_categories:
train_categories[category] = train_cat_id
train_cat_id += 1
category_id = train_categories[category]
bbox = obj.find('bndbox')
x1 = int(bbox.find('xmin').text)
y1 = int(bbox.find('ymin').text)
x2 = int(bbox.find('xmax').text)
y2 = int(bbox.find('ymax').text)
width = x2 - x1
height = y2 - y1
ann_info = {'id': train_ann_id, 'image_id': img_id, 'category_id': category_id, 'bbox': [x1, y1, width, height],
'area': width*height, 'iscrowd': 0}
train_data['annotations'].append(ann_info)
train_ann_id += 1
if len(root.findall('object')):
# 只有有目标的图片才加进来
image_id = img_id
img_id += 1
image_file = os.path.join(voc_path, 'JPEGImages', file)
shutil.copy(image_file, os.path.join(train_path, file))
img = Image.open(image_file)
image_info = {'id': image_id, 'file_name': file, 'width': img.size[0], 'height': img.size[1]}
train_data['images'].append(image_info)
# Add categories
for category, category_id in train_categories.items():
train_data['categories'].append({'id': category_id, 'name': category})
# Save train data to file
with open(os.path.join(train_path, '_annotations.coco.json'), 'w') as f:
json.dump(train_data, f, indent=4)
# Convert valid data to COCO format
valid_data = {'categories': [], 'images': [], 'annotations': []}
valid_ann_id = 0
img_id = 0
for file in tqdm(valid_files):
# Add annotations
xml_file = os.path.join(voc_path, 'Annotations', file[:-4] + '.xml')
tree = ET.parse(xml_file)
root = tree.getroot()
for obj in root.findall('object'):
category = obj.find('name').text
if category not in classes:
continue
category_id = train_categories[category]
bbox = obj.find('bndbox')
x1 = int(bbox.find('xmin').text)
y1 = int(bbox.find('ymin').text)
x2 = int(bbox.find('xmax').text)
y2 = int(bbox.find('ymax').text)
width = x2 - x1
height = y2 - y1
ann_info = {'id': valid_ann_id, 'image_id': img_id, 'category_id': category_id, 'bbox': [x1, y1, width, height],
'area': width*height, 'iscrowd': 0}
valid_data['annotations'].append(ann_info)
valid_ann_id += 1
if len(root.findall('object')):
# Add image
image_id = img_id
img_id += 1
image_file = os.path.join(voc_path, 'JPEGImages', file)
shutil.copy(image_file, os.path.join(valid_path, file))
img = Image.open(image_file)
image_info = {'id': image_id, 'file_name': file, 'width': img.size[0], 'height': img.size[1]}
valid_data['images'].append(image_info)
# Add categories
valid_data['categories'] = train_data['categories']
# Save valid data to file
with open(os.path.join(valid_path, '_annotations.coco.json'), 'w') as f:
json.dump(valid_data, f, indent=4)
- step 2: 参考Face Detection - Swift-YOLO下载模型权重文件和训练
python tools/train.py configs/yolov5/yolov5_tiny_1xb16_300e_coco.py \
--cfg-options \
work_dir=work_dirs/face_96 \
num_classes=1 \
epochs=300 \
height=96 \
width=96 \
batch=128 \
data_root=datasets/face/ \
load_from=datasets/face/pretrain.pth
- step 3: 训练过程可视化tensorboard
cd work_dirs/face_96/20231219_181418/vis_data
tensorboard --logdir=./
然后按照提示打开http://localhost:6006/
- step 4: 导出模型
python tools/export.py configs/yolov5/yolov5_tiny_1xb16_300e_coco.py ./work_dirs/face_96/best_coco_bbox_mAP_epoch_300.pth --target tflite onnx
--cfg-options \
work_dir=work_dirs/face_96 \
num_classes=1 \
epochs=300 \
height=96 \
width=96 \
batch=128 \
data_root=datasets/face/ \
load_from=datasets/face/pretrain.pth
这样就会在./work_dirs/face_96
路径下生成best_coco_bbox_mAP_epoch_300_int8.tflite
文件了。
四、部署模型
- step 1: 将
best_coco_bbox_mAP_epoch_300_int8.tflite
复制到F:\edgelab\sscma-example-esp32-1.0.0\model_zoo
路径下 - step 2: 参照edgelab-example-esp32-训练和部署一个FOMO模型将模型转换为C语言文件,并将其放入到
F:\edgelab\sscma-example-esp32-1.0.0\components\modules\model
路径下
python tools/tflite2c.py --input ./model_zoo/best_coco_bbox_mAP_epoch_300_int8.tflite --name yolo --output_dir ./components/modules/model --classes face
这样会生成./components/modules/model/yolo_model_data.cpp
和yolo_model_data.h
两个文件。
- step 3: 利用idf烧录程序
fb_gfx_printf(frame, yolo.x - yolo.w / 2, yolo.y - yolo.h/2 - 5, 0x1FE0, "%s:%d", g_yolo_model_classes[yolo.target], yolo.confidence);
打开esp-idf cmd
cd F:\edgelab\sscma-example-esp32-1.0.0\examples\yolo
idf.py set-target esp32s3
idf.py menuconfig
勾选上方的这个选项不然报错
E:/Softwares/Espressif/frameworks/esp-idf-v5.0.4/components/driver/deprecated/driver/i2s.h:27:2: warning: #warning "This set of I2S APIs has been deprecated, please include 'driver/i2s_std.h', 'driver/i2s_pdm.h' or 'driver/i2s_tdm.h' instead. if you want to keep using the old APIs and ignore this warning, you can enable 'Suppress leagcy driver deprecated warning' option under 'I2S Configuration' menu in Kconfig" [-Wcpp]
27 | #warning "This set of I2S APIs has been deprecated, \
| ^~~~~~~
ninja: build stopped: subcommand failed.
ninja failed with exit code 1, output of the command is in the F:\edgelab\sscma-example-esp32-1.0.0\examples\yolo\build\log\idf_py_stderr_output_27512 and F:\edgelab\sscma-example-esp32-1.0.0\examples\yolo\build\log\idf_py_stdout_output_27512
idf.py flash monitor -p COM3
lcd端也能实时显示识别结果,输入大小为96x96时推理时间大概200ms,192x192时时间大概660ms
五、存在的问题
该链路中量化是比较简单的,在我的数据集上量化后精度大打折扣,应该需要修改量化算法,后续再说吧。
- 量化前
- 量化后
六、对单张图片的推理脚本
(1)制作无ann
标注文件的json
举例:需要进行测试的图片们保存路径为E:/projects/ESP32S3/imageDatas/img/valid
,图片都是jpg
格式,生成的json
文件格式为,该json
文件主要是用于后面build_runner
中的runner.val_dataloader
使用。
import os
import json
from PIL import Image
# 定义文件夹路径
image_folder = 'E:/projects/ESP32S3/imageDatas/img/valid/'
# 定义categories
categories = [
{"id": 0, "name": "cat"},
{"id": 1, "name": "dog"},
{"id": 2, "name": "bird"}
]
# 初始化images和annotations
images = []
annotations = []
# 遍历文件夹中的所有图片
image_id = 0
for filename in os.listdir(image_folder):
if filename.endswith('.jpg') or filename.endswith('.png'):
# 获取图片的宽度和高度
image_path = os.path.join(image_folder, filename)
with Image.open(image_path) as img:
width, height = img.size
# 添加到images列表
images.append({
"id": image_id,
"file_name": filename,
"width": width,
"height": height
})
image_id += 1
# 生成COCO格式的字典
coco_format = {
"categories": categories,
"images": images,
"annotations": annotations
}
# 将字典写入JSON文件
output_file = 'E:/projects/ESP32S3/imageDatas/img/valid/_annotations.coco.json'
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(coco_format, f, ensure_ascii=False, indent=4)
print(f"COCO格式的标注文件已生成: {output_file}")
(2)推理脚本
新建脚本文件F:\edgelab\ModelAssistant\tools\inference_image.py
import tempfile
import os
import torch
from mmengine.runner import Runner
from sscma.utils.cv import NMS, load_image
from typing import Optional
import os.path as osp
import numpy as np
import cv2
from tqdm import tqdm
import sscma.engine # noqa
import sscma.evaluation # noqa
import sscma.models # noqa
import sscma.visualization # noqa
def show_det(
pred: np.ndarray,
img: Optional[np.ndarray] = None,
img_file: Optional[str] = None,
win_name='Detection',
class_name=None,
shape=None,
save_path=False,
show=False,
) -> np.ndarray:
assert not (img is None and img_file is None), 'The img and img_file parameters cannot both be None'
if img.dtype == np.float32:
img = img * 255
# load image
if isinstance(img, np.ndarray):
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
else:
img = load_image(img_file, shape=shape, mode='BGR').copy()
# plot the result
for i in pred:
x1, y1, x2, y2 = map(int, i[:4])
img = cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 0), 2)
cv2.putText(
img,
class_name[int(i[5])] if class_name else 'None',
(x1, y1),
1,
color=(0, 0, 255),
thickness=1,
fontScale=1,
)
cv2.putText(img, str(round(i[4].item(), 2)), (x1, y1 - 15), 1, color=(0, 0, 255), thickness=1, fontScale=1)
if show:
cv2.imshow(win_name, img)
cv2.waitKey(0)
if save_path:
img_name = osp.basename(img_file)
cv2.imwrite(osp.join(save_path, img_name), img)
return pred
def show_det_ori(
pred: np.ndarray,
img: Optional[np.ndarray] = None,
img_file: Optional[str] = None,
win_name='Detection',
class_name=None,
shape=None,
save_path=False,
show=False,
) -> np.ndarray:
assert not (img is None and img_file is None), 'The img and img_file parameters cannot both be None'
if img.dtype == np.float32:
img = img * 255
# load image
if isinstance(img, np.ndarray):
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
else:
img = load_image(img_file, shape=shape, mode='BGR').copy()
# plot the result
for i in pred:
x1, y1, x2, y2 = map(int, i[:4])
img = cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 0), 2)
cv2.putText(
img,
class_name[int(i[5])] if class_name else 'None',
(x1, y1 + 15),
1,
color=(0, 0, 255),
thickness=2,
fontScale=2,
)
cv2.putText(img, str(round(i[4].item(), 2)), (x1, y1 + 40), 1, color=(0, 0, 255), thickness=2, fontScale=2)
if show:
cv2.imshow(win_name, img)
cv2.waitKey(0)
if save_path:
img_name = osp.basename(img_file)
cv2.imwrite(osp.join(save_path, img_name), img)
return pred
def build_config(args):
from mmengine.config import Config
from sscma.utils import load_config
with tempfile.TemporaryDirectory() as tmp_dir:
print(f"临时目录路径: {tmp_dir}")
cfg_data = load_config(args['config'], folder=tmp_dir, cfg_options=args['cfg_options'])
cfg = Config.fromfile(cfg_data)
if args['cfg_options'] is not None:
cfg.merge_from_dict(args['cfg_options'])
cfg.val_dataloader['batch_size'] = 1
cfg.val_dataloader['num_workers'] = 1
if 'batch_shapes_cfg' in cfg.val_dataloader.dataset:
cfg.val_dataloader.dataset.batch_shapes_cfg = None
cfg.launcher = args['launcher']
if args['out_dir'] is not None:
assert 'visualization' in cfg.default_hooks, "VisualizationHook is required in 'default_hooks'"
if args['task'] != 'mmcls':
cfg.default_hooks.visualization.draw = True
cfg.default_hooks.visualization.interval = args['interval']
if args['out_dir'] is not None:
os.makedirs(args['out_dir'], exist_ok=True)
if args['dump'] is None:
args['dump'] = args['checkpoint'].replace(os.path.splitext(args['checkpoint'])[-1], '.pkl')
print('Using dump path from checkpoint: {}'.format(args['dump']))
if args['dump'] is not None:
dump_metric = dict(type='DumpResults', out_file_path=args['dump'])
if isinstance(cfg.test_evaluator, (list, tuple)):
cfg.test_evaluator = list(cfg.test_evaluator).append(dump_metric)
else:
cfg.test_evaluator = [cfg.test_evaluator, dump_metric]
if args['device'].startswith('cuda'):
args['device'] = args['device'] if torch.cuda.is_available() else 'cpu'
return args, cfg
def build_runner(args, cfg):
runner = Runner.from_cfg(cfg)
checkpoint_ext = os.path.splitext(args['checkpoint'])[-1]
# pth还没修改,暂时不能用
if checkpoint_ext == '.pth':
print("pth格式模型暂不支持")
return None
elif checkpoint_ext in {'.tflite', '.onnx', '.param', '.bin'}:
from sscma.utils import Infernce
runner = Infernce(
args['checkpoint'],
dataloader=runner.val_dataloader,
cfg=cfg,
runner=runner,
dump=args['dump'],
source=args['source'],
task=str(args['task']).replace('mm', ''),
show=args['show'],
save_dir=args['out_dir'],
)
return runner
if __name__ == '__main__':
# 现在只支持'.tflite', '.onnx', '.param', '.bin'格式的模型
# args中的参数就是inference程序输入的命令行参数
args = {
'cfg_options': {
'work_dir': 'F:/edgelab/ModelAssistant/work_dirs/96_1',
'num_classes': 3, 'height': 96, 'width': 96, 'epochs': 500,
'data_root': 'E:/projects/ESP32S3/imageDatas/cup1/',
'load_from': 'F:/edgelab/ModelAssistant/datasets/cups/pretrain_96.pth'
},
'checkpoint': 'F:/edgelab/ModelAssistant/work_dirs/96_1/best_coco_bbox_mAP_epoch_500_float32.onnx',
'config': 'F:/edgelab/ModelAssistant/configs/yolov5/yolov5_tiny_1xb16_300e_coco.py',
'device': 'cuda',
'dump': None,
'input_type': 'image',
'interval': 10,
'launcher': 'none',
'out_dir': 'E:/projects/ESP32S3/imageDatas/inference/',
'show': False,
'source': None,
'task': 'mmdet',
'tta': False,
'wait_time': 0.03,
'work_dir':None,
}
args, cfg = build_config(args)
runner = build_runner(args, cfg)
if runner == None:
exit()
from mmengine.dataset import Compose
test_pipeline = Compose(cfg.val_dataloader.dataset['pipeline'])
image_path = "E:/projects/ESP32S3/imageDatas/cup1/valid/"
Files = os.listdir(image_path)
for file in tqdm(Files):
if '.jpg' in file:
imgPath = os.path.join(image_path, file)
img_data = {
'img_path': imgPath,
'img_id': 0,
}
processed = test_pipeline(img_data)
processed['inputs'] = [processed['inputs']]
processed['data_samples'] = [processed['data_samples']]
processed = runner.data_preprocess(processed, False)
inputs = processed['inputs'][0]
img_path = processed['data_samples'][0].get('img_path', None)
img = processed['inputs'][0].permute(1, 2, 0).cpu().numpy()
preds = runner.model(inputs)
preds = preds[0][0]
bbox, conf, classes = preds[:, :4], preds[:, 4], preds[:, 5:]
preds = NMS(bbox, conf, classes, conf_thres=20, bbox_format='xywh')
# 显示画图结果,img的大小为96x96
show_det(
preds,
img=img,
img_file=img_path.replace('.jpg', '_96.jpg'),
class_name=runner.class_name,
shape=runner.input_shape[:-1],
show=runner.show,
save_path=runner.save_dir,
)
ori_shape = processed['data_samples'][0].ori_shape
tmp_0 = preds[:, :4]
tmp_0[:, 0::2] = tmp_0[:, 0::2] / runner.input_shape[1] * ori_shape[1]
tmp_0[:, 1::2] = tmp_0[:, 1::2] / runner.input_shape[0] * ori_shape[1]
tmp = tmp_0.clone()
padding = (ori_shape[1] - ori_shape[0]) / 2
tmp[:, 1] = tmp_0[:, 1] - padding
tmp[:, 3] = tmp_0[:, 3] - padding
# 显示画图结果,img的大小为原图大小
preds[:, :4] = tmp
ori_img= load_image(img_path, mode='RGB').copy()
show_det_ori(
preds,
img=ori_img,
img_file=img_path,
class_name=runner.class_name,
shape=runner.input_shape[:-1],
show=runner.show,
save_path=runner.save_dir,
)
python tools/inference_image.py
测试保存的路径位于out_dir
下