esp32-s3训练自己的数据进行目标检测、图像分类

咚咚锵咚咚锵

已于 2024-09-18 14:24:25 修改

阅读量5.3k

点赞数 28

分类专栏：模型落地文章标签：目标检测分类人工智能

于 2023-12-21 18:42:19 首次发布

本文链接：https://blog.csdn.net/weixin_45977690/article/details/135137552

版权

模型落地专栏收录该内容

13 篇文章

订阅专栏

esp32-s3训练自己的数据进行目标检测、图像分类

esp-idf的安装参考我前面的文章： esp32cam和esp32-s3烧录human_face_detect实现人脸识别

一、下载项目

训练、转换模型：ModelAssistant(main)
部署模型：sscma-example-esp32(1.0.0)
说明文档：sscma-model-zoo

二、环境

python3.8 + CUDA11.7 + esp-idf5.0
# 主要按照ModelAssistant/requirements_cuda.txt，如果训练时有库不兼容的问题可参考下方
torch                        2.0.0+cu117
torchaudio                   2.0.1+cu117
torchvision                  0.15.1+cu117
yapf                         0.40.2
typing_extensions            4.5.0
tensorboard                  2.13.0
tensorboard-data-server      0.7.2
tensorflow                   2.13.0
keras                        2.13.1
tensorflow-estimator         2.13.0
tensorflow-intel             2.13.0
tensorflow-io-gcs-filesystem 0.31.0
sscma                        2.0.0rc3
setuptools                   60.2.0
rich                         13.4.2
Pillow                       9.4.0
mmcls                        1.0.0rc6
mmcv                         2.0.0
mmdet                        3.0.0
mmengine                     0.10.1
mmpose                       1.2.0
mmyolo                       0.5.0

报错

mmcv如果pip直接安装报错，可参考：mmcv最正确安装方式，亲测有效
在 Windows 环境下安装过程中遇到 “Microsoft Visual C++ 14.0 or graeter is required” error .：三步解决error: Microsoft Visual C++ 14.0 or greater is required. Get it with “Microsoft C++ Build Tools“

三、训练和导出模型

step 1: 将voc格式的标注文件转换为edgelab的训练格式，并按8：2的比例划分为训练集和验证集

import os
import json
import pandas as pd
from xml.etree import ElementTree as ET
from PIL import Image
import shutil
import random
from tqdm import tqdm

# Set paths
voc_path = 'F:/datasets/VOCdevkit/VOC2007'
train_path = 'F:/edgelab/ModelAssistant/datasets/myself/train'
valid_path = 'F:/edgelab/ModelAssistant/datasets/meself/valid'

# 只读取有目标的，且属于需要训练的类别
classes = ["face"]

# Create directories if not exist
if not os.path.exists(train_path):
    os.makedirs(train_path)
if not os.path.exists(valid_path):
    os.makedirs(valid_path)

# Get list of image files
image_files = os.listdir(os.path.join(voc_path, 'JPEGImages'))
random.seed(0)
random.shuffle(image_files)

# Split data into train and valid
train_files = image_files[:int(len(image_files)*0.8)]
valid_files = image_files[int(len(image_files)*0.8):]

# Convert train data to COCO format
train_data = {'categories': [], 'images': [], 'annotations': []}
train_ann_id = 0
train_cat_id = 0
img_id = 0
train_categories = {}
for file in tqdm(train_files):
    # Add annotations
    xml_file = os.path.join(voc_path, 'Annotations', file[:-4] + '.xml')
    tree = ET.parse(xml_file)
    root = tree.getroot()
    for obj in root.findall('object'):
        category = obj.find('name').text
        if category not in classes:
            continue
        if category not in train_categories:
            train_categories[category] = train_cat_id
            train_cat_id += 1
        category_id = train_categories[category]
        bbox = obj.find('bndbox')
        x1 = int(bbox.find('xmin').text)
        y1 = int(bbox.find('ymin').text)
        x2 = int(bbox.find('xmax').text)
        y2 = int(bbox.find('ymax').text)
        width = x2 - x1
        height = y2 - y1
        ann_info = {'id': train_ann_id, 'image_id': img_id, 'category_id': category_id, 'bbox': [x1, y1, width, height],
                   'area': width*height, 'iscrowd': 0}
        train_data['annotations'].append(ann_info)
        train_ann_id += 1
        
    if len(root.findall('object')):
        # 只有有目标的图片才加进来
        image_id = img_id
        img_id += 1
        image_file = os.path.join(voc_path, 'JPEGImages', file)
        shutil.copy(image_file, os.path.join(train_path, file))
        img = Image.open(image_file)
        image_info = {'id': image_id, 'file_name': file, 'width': img.size[0], 'height': img.size[1]}
        train_data['images'].append(image_info)


# Add categories
for category, category_id in train_categories.items():
    train_data['categories'].append({'id': category_id, 'name': category})

# Save train data to file
with open(os.path.join(train_path, '_annotations.coco.json'), 'w') as f:
    json.dump(train_data, f, indent=4)

# Convert valid data to COCO format
valid_data = {'categories': [], 'images': [], 'annotations': []}
valid_ann_id = 0
img_id = 0
for file in tqdm(valid_files):
    # Add annotations
    xml_file = os.path.join(voc_path, 'Annotations', file[:-4] + '.xml')
    tree = ET.parse(xml_file)
    root = tree.getroot()
    for obj in root.findall('object'):
        category = obj.find('name').text
        if category not in classes:
            continue
        category_id = train_categories[category]
        bbox = obj.find('bndbox')
        x1 = int(bbox.find('xmin').text)
        y1 = int(bbox.find('ymin').text)
        x2 = int(bbox.find('xmax').text)
        y2 = int(bbox.find('ymax').text)
        width = x2 - x1
        height = y2 - y1
        ann_info = {'id': valid_ann_id, 'image_id': img_id, 'category_id': category_id, 'bbox': [x1, y1, width, height],
                   'area': width*height, 'iscrowd': 0}
        valid_data['annotations'].append(ann_info)
        valid_ann_id += 1
        
    if len(root.findall('object')):
        # Add image
        image_id = img_id
        img_id += 1
        image_file = os.path.join(voc_path, 'JPEGImages', file)
        shutil.copy(image_file, os.path.join(valid_path, file))
        img = Image.open(image_file)
        image_info = {'id': image_id, 'file_name': file, 'width': img.size[0], 'height': img.size[1]}
        valid_data['images'].append(image_info)

# Add categories
valid_data['categories'] = train_data['categories']

# Save valid data to file
with open(os.path.join(valid_path, '_annotations.coco.json'), 'w') as f:
    json.dump(valid_data, f, indent=4)

step 2: 参考Face Detection - Swift-YOLO下载模型权重文件和训练

python tools/train.py configs/yolov5/yolov5_tiny_1xb16_300e_coco.py \
--cfg-options  \
    work_dir=work_dirs/face_96 \
    num_classes=1 \
    epochs=300  \
    height=96 \
    width=96 \
    batch=128 \
    data_root=datasets/face/ \
    load_from=datasets/face/pretrain.pth

step 3: 训练过程可视化tensorboard

cd work_dirs/face_96/20231219_181418/vis_data
tensorboard --logdir=./

然后按照提示打开http://localhost:6006/
在这里插入图片描述

step 4: 导出模型

python tools/export.py configs/yolov5/yolov5_tiny_1xb16_300e_coco.py ./work_dirs/face_96/best_coco_bbox_mAP_epoch_300.pth --target tflite onnx
--cfg-options  \
    work_dir=work_dirs/face_96 \
    num_classes=1 \
    epochs=300  \
    height=96 \
    width=96 \
    batch=128 \
    data_root=datasets/face/ \
    load_from=datasets/face/pretrain.pth

这样就会在./work_dirs/face_96路径下生成best_coco_bbox_mAP_epoch_300_int8.tflite文件了。

四、部署模型

step 1: 将best_coco_bbox_mAP_epoch_300_int8.tflite复制到F:\edgelab\sscma-example-esp32-1.0.0\model_zoo路径下
step 2: 参照edgelab-example-esp32-训练和部署一个FOMO模型将模型转换为C语言文件，并将其放入到F:\edgelab\sscma-example-esp32-1.0.0\components\modules\model路径下

python tools/tflite2c.py --input ./model_zoo/best_coco_bbox_mAP_epoch_300_int8.tflite --name yolo --output_dir ./components/modules/model --classes face

这样会生成./components/modules/model/yolo_model_data.cpp和yolo_model_data.h两个文件。

step 3: 利用idf烧录程序

fb_gfx_printf(frame, yolo.x - yolo.w / 2, yolo.y - yolo.h/2 - 5, 0x1FE0, "%s:%d", g_yolo_model_classes[yolo.target], yolo.confidence);

打开esp-idf cmd

cd F:\edgelab\sscma-example-esp32-1.0.0\examples\yolo
idf.py set-target esp32s3
idf.py menuconfig

在这里插入图片描述勾选上方的这个选项不然报错

E:/Softwares/Espressif/frameworks/esp-idf-v5.0.4/components/driver/deprecated/driver/i2s.h:27:2: warning: #warning "This set of I2S APIs has been deprecated, please include 'driver/i2s_std.h', 'driver/i2s_pdm.h' or 'driver/i2s_tdm.h' instead. if you want to keep using the old APIs and ignore this warning, you can enable 'Suppress leagcy driver deprecated warning' option under 'I2S Configuration' menu in Kconfig" [-Wcpp]
   27 | #warning "This set of I2S APIs has been deprecated, \
      |  ^~~~~~~
ninja: build stopped: subcommand failed.
ninja failed with exit code 1, output of the command is in the F:\edgelab\sscma-example-esp32-1.0.0\examples\yolo\build\log\idf_py_stderr_output_27512 and F:\edgelab\sscma-example-esp32-1.0.0\examples\yolo\build\log\idf_py_stdout_output_27512

idf.py flash monitor -p COM3

在这里插入图片描述
lcd端也能实时显示识别结果，输入大小为96x96时推理时间大概200ms，192x192时时间大概660ms

五、存在的问题

该链路中量化是比较简单的，在我的数据集上量化后精度大打折扣，应该需要修改量化算法，后续再说吧。

量化前
量化后

六、对单张图片的推理脚本

（1）制作无ann标注文件的json
举例：需要进行测试的图片们保存路径为E:/projects/ESP32S3/imageDatas/img/valid，图片都是jpg格式，生成的json文件格式为，该json文件主要是用于后面build_runner中的runner.val_dataloader使用。
在这里插入图片描述

import os
import json
from PIL import Image

# 定义文件夹路径
image_folder = 'E:/projects/ESP32S3/imageDatas/img/valid/'

# 定义categories
categories = [
    {"id": 0, "name": "cat"},
    {"id": 1, "name": "dog"},
    {"id": 2, "name": "bird"}
]

# 初始化images和annotations
images = []
annotations = []

# 遍历文件夹中的所有图片
image_id = 0
for filename in os.listdir(image_folder):
    if filename.endswith('.jpg') or filename.endswith('.png'):
        # 获取图片的宽度和高度
        image_path = os.path.join(image_folder, filename)
        with Image.open(image_path) as img:
            width, height = img.size
        
        # 添加到images列表
        images.append({
            "id": image_id,
            "file_name": filename,
            "width": width,
            "height": height
        })
        
        image_id += 1

# 生成COCO格式的字典
coco_format = {
    "categories": categories,
    "images": images,
    "annotations": annotations
}

# 将字典写入JSON文件
output_file = 'E:/projects/ESP32S3/imageDatas/img/valid/_annotations.coco.json'
with open(output_file, 'w', encoding='utf-8') as f:
    json.dump(coco_format, f, ensure_ascii=False, indent=4)

print(f"COCO格式的标注文件已生成: {output_file}")

（2）推理脚本
新建脚本文件F:\edgelab\ModelAssistant\tools\inference_image.py

import tempfile
import os
import torch
from mmengine.runner import Runner
from sscma.utils.cv import NMS, load_image
from typing import Optional
import os.path as osp
import numpy as np
import cv2
from tqdm import tqdm
import sscma.engine  # noqa
import sscma.evaluation  # noqa
import sscma.models  # noqa
import sscma.visualization  # noqa

def show_det(
    pred: np.ndarray,
    img: Optional[np.ndarray] = None,
    img_file: Optional[str] = None,
    win_name='Detection',
    class_name=None,
    shape=None,
    save_path=False,
    show=False,
) -> np.ndarray:
    assert not (img is None and img_file is None), 'The img and img_file parameters cannot both be None'
    
    if img.dtype == np.float32:
        img = img * 255
    # load image
    if isinstance(img, np.ndarray):
        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    else:
        img = load_image(img_file, shape=shape, mode='BGR').copy()

    # plot the result
    for i in pred:
        x1, y1, x2, y2 = map(int, i[:4])
        img = cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 0), 2)
        cv2.putText(
            img,
            class_name[int(i[5])] if class_name else 'None',
            (x1, y1),
            1,
            color=(0, 0, 255),
            thickness=1,
            fontScale=1,
        )
        cv2.putText(img, str(round(i[4].item(), 2)), (x1, y1 - 15), 1, color=(0, 0, 255), thickness=1, fontScale=1)
        
    if show:
        cv2.imshow(win_name, img)
        cv2.waitKey(0)

    if save_path:
        img_name = osp.basename(img_file)
        cv2.imwrite(osp.join(save_path, img_name), img)

    return pred

def show_det_ori(
    pred: np.ndarray,
    img: Optional[np.ndarray] = None,
    img_file: Optional[str] = None,
    win_name='Detection',
    class_name=None,
    shape=None,
    save_path=False,
    show=False,
) -> np.ndarray:
    assert not (img is None and img_file is None), 'The img and img_file parameters cannot both be None'
    
    if img.dtype == np.float32:
        img = img * 255
    # load image
    if isinstance(img, np.ndarray):
        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    else:
        img = load_image(img_file, shape=shape, mode='BGR').copy()

    # plot the result
    for i in pred:
        x1, y1, x2, y2 = map(int, i[:4])
        img = cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 0), 2)
        cv2.putText(
            img,
            class_name[int(i[5])] if class_name else 'None',
            (x1, y1 + 15),
            1,
            color=(0, 0, 255),
            thickness=2,
            fontScale=2,
        )
        cv2.putText(img, str(round(i[4].item(), 2)), (x1, y1 + 40), 1, color=(0, 0, 255), thickness=2, fontScale=2)
        
    if show:
        cv2.imshow(win_name, img)
        cv2.waitKey(0)

    if save_path:
        img_name = osp.basename(img_file)
        cv2.imwrite(osp.join(save_path, img_name), img)

    return pred

def build_config(args):
    from mmengine.config import Config
    from sscma.utils import load_config

    with tempfile.TemporaryDirectory() as tmp_dir:
        print(f"临时目录路径: {tmp_dir}")
        cfg_data = load_config(args['config'], folder=tmp_dir, cfg_options=args['cfg_options'])
        cfg = Config.fromfile(cfg_data)
    if args['cfg_options'] is not None:
        cfg.merge_from_dict(args['cfg_options'])
    
    cfg.val_dataloader['batch_size'] = 1
    cfg.val_dataloader['num_workers'] = 1
                                    
    if 'batch_shapes_cfg' in cfg.val_dataloader.dataset:
        cfg.val_dataloader.dataset.batch_shapes_cfg = None
    
    cfg.launcher = args['launcher']
    
    if args['out_dir'] is not None:
        assert 'visualization' in cfg.default_hooks, "VisualizationHook is required in 'default_hooks'"
        if args['task'] != 'mmcls':
            cfg.default_hooks.visualization.draw = True
        cfg.default_hooks.visualization.interval = args['interval']
    if args['out_dir'] is not None:
        os.makedirs(args['out_dir'], exist_ok=True)
    if args['dump'] is None:
        args['dump'] = args['checkpoint'].replace(os.path.splitext(args['checkpoint'])[-1], '.pkl')
        print('Using dump path from checkpoint: {}'.format(args['dump']))
    
    if args['dump'] is not None:
        dump_metric = dict(type='DumpResults', out_file_path=args['dump'])
        if isinstance(cfg.test_evaluator, (list, tuple)):
            cfg.test_evaluator = list(cfg.test_evaluator).append(dump_metric)
        else:
            cfg.test_evaluator = [cfg.test_evaluator, dump_metric]
            
    if args['device'].startswith('cuda'):
        args['device'] = args['device'] if torch.cuda.is_available() else 'cpu'
        
    return args, cfg

def build_runner(args, cfg):
    runner = Runner.from_cfg(cfg)

    checkpoint_ext = os.path.splitext(args['checkpoint'])[-1]
    # pth还没修改，暂时不能用
    if checkpoint_ext == '.pth':
        print("pth格式模型暂不支持")
        return None
                
    elif checkpoint_ext in {'.tflite', '.onnx', '.param', '.bin'}:
        from sscma.utils import Infernce
        
        runner = Infernce(
            args['checkpoint'],
            dataloader=runner.val_dataloader,
            cfg=cfg,
            runner=runner,
            dump=args['dump'],
            source=args['source'],
            task=str(args['task']).replace('mm', ''),
            show=args['show'],
            save_dir=args['out_dir'],
        )

    return runner
    

if __name__ == '__main__':
    # 现在只支持'.tflite', '.onnx', '.param', '.bin'格式的模型
    # args中的参数就是inference程序输入的命令行参数
    args = {
        'cfg_options': {
            'work_dir': 'F:/edgelab/ModelAssistant/work_dirs/96_1', 
            'num_classes': 3, 'height': 96, 'width': 96, 'epochs': 500, 
            'data_root': 'E:/projects/ESP32S3/imageDatas/cup1/', 
            'load_from': 'F:/edgelab/ModelAssistant/datasets/cups/pretrain_96.pth'
        },
        'checkpoint': 'F:/edgelab/ModelAssistant/work_dirs/96_1/best_coco_bbox_mAP_epoch_500_float32.onnx',
        'config': 'F:/edgelab/ModelAssistant/configs/yolov5/yolov5_tiny_1xb16_300e_coco.py',
        'device': 'cuda',
        'dump': None,
        'input_type': 'image',
        'interval': 10,
        'launcher': 'none',
        'out_dir': 'E:/projects/ESP32S3/imageDatas/inference/',
        'show': False,
        'source': None,
        'task': 'mmdet',
        'tta': False,
        'wait_time': 0.03,
        'work_dir':None,
    }

    args, cfg = build_config(args)
    runner = build_runner(args, cfg)
    if runner == None:
        exit()

    from mmengine.dataset import Compose
    test_pipeline = Compose(cfg.val_dataloader.dataset['pipeline'])
    image_path = "E:/projects/ESP32S3/imageDatas/cup1/valid/"
    Files = os.listdir(image_path)
    for file in tqdm(Files):
        if '.jpg' in file:
            imgPath = os.path.join(image_path, file)
            img_data = {
                'img_path': imgPath,
                'img_id': 0,
            }
            processed = test_pipeline(img_data)
            processed['inputs'] = [processed['inputs']]
            processed['data_samples'] = [processed['data_samples']]
            
            processed = runner.data_preprocess(processed, False)
            inputs = processed['inputs'][0]
            img_path = processed['data_samples'][0].get('img_path', None)
            img = processed['inputs'][0].permute(1, 2, 0).cpu().numpy()
            
            preds = runner.model(inputs)
            preds = preds[0][0]
            
            bbox, conf, classes = preds[:, :4], preds[:, 4], preds[:, 5:]
            preds = NMS(bbox, conf, classes, conf_thres=20, bbox_format='xywh')
            
            # 显示画图结果，img的大小为96x96
            show_det(
                preds,
                img=img,
                img_file=img_path.replace('.jpg', '_96.jpg'),
                class_name=runner.class_name,
                shape=runner.input_shape[:-1],
                show=runner.show,
                save_path=runner.save_dir,
            )
            
            ori_shape = processed['data_samples'][0].ori_shape
            tmp_0 = preds[:, :4]
            tmp_0[:, 0::2] = tmp_0[:, 0::2] / runner.input_shape[1] * ori_shape[1]
            tmp_0[:, 1::2] = tmp_0[:, 1::2] / runner.input_shape[0] * ori_shape[1]
            tmp = tmp_0.clone()
            padding = (ori_shape[1] - ori_shape[0]) / 2
            tmp[:, 1] = tmp_0[:, 1] - padding
            tmp[:, 3] = tmp_0[:, 3] - padding

            # 显示画图结果，img的大小为原图大小
            preds[:, :4] = tmp
            ori_img= load_image(img_path, mode='RGB').copy()

            show_det_ori(
                preds,
                img=ori_img,
                img_file=img_path,
                class_name=runner.class_name,
                shape=runner.input_shape[:-1],
                show=runner.show,
                save_path=runner.save_dir,
            )