XIAO ESP32S3 Sense语音唤醒和命令词识别

XIAO ESP32S3 Sense语音唤醒和命令词识别

简介

Seeed Studio XIAO ESP32S3 Sense集成了摄像头传感器、数字麦克风和SD卡支持。

本文基于XIAO ESP32S3 Sense实现语音唤醒和命令词识别,方法主要分为seeed官网提供Edge Impulse教程和乐鑫提供ESP-Skainet。

Edge Impulse

Seeed Studio XIAO ESP32S3 (Sense) 开发板 | Seeed Studio Wiki有一个基于 Edge Impulse 的关键词识别的教程。

教程有详细步骤,本文主要介绍Edge Impulse中的模型训练。

训练集

包含四种样本:开灯、关灯、噪声和未知词。(可以增加一个唤醒词)

训练集

其中噪声和未知词的样本可以从开源项目中获取一部分。

在这里插入图片描述

MFCC

在这里插入图片描述
在这里插入图片描述

Classifier

在这里插入图片描述

实际效果

因为加了噪声和未知词样本,误触发概率很低,open成功率大概在90%、close成功率在80%。
样本缺少多样性(声音大小、不同人声、不同语速、不同环境)且数量比较少,实际使用效果一般。
在这里插入图片描述

ESP-Skainet

ESP-Skainet 是乐鑫推出的智能语音助手,目前支持唤醒词识别和命令词识别。语音唤醒和命令词识别基于ESP-IDF开发。

安装ESP-IDF开发环境

网上教程很多不再赘述,本文使用5.1版本。

下载官方示例

git clone https://github.com/espressif/esp-skainet.git

创建工程

新建工程目录

复制.\esp-skainet-master\examples\wake_word_detection到工程目录中

复制.\esp-skainet-master\components到工程目录中

修改工程目录下的CMakeLists.txt

cmake_minimum_required(VERSION 3.5)

set(EXTRA_COMPONENT_DIRS
    components //路径
    )

include($ENV{IDF_PATH}/tools/cmake/project.cmake)
project(speech_commands_recognition)

修改muneconfig

# 打开ESP-IDF 5.1 CMD 
cd <工程目录下>
idf.py set-target esp32s3
idf.py menuconfig
  1. 修改开发板

    Audio Media HAL—> ESP32-S3-EYE

在这里插入图片描述

  1. 修改唤醒词

    ESP Speech Recognition—> use wakenet —>Select wake words (Load Multiple Wake Words) —> Load Multiple Wake Words

    Load Multiple Wake Words —> 你好小鑫 (wn9_nihaoxiaoxin_tts) \ Hi,ESP (wn9_hiesp)

    选择两个,实际使用时你好小鑫识别率会高一些

    在这里插入图片描述

    ESP Speech Recognition选择如下:

    在这里插入图片描述

  2. 修改flash大小

    Serial flasher config —> Choose flash mode automatically (please read help) —> Flash size (8 MB)

修改I2S驱动

修改:.\components\hardware_driver\boards\esp32s3-eye\bsp_board.c

  1. 增加头文件
#if ESP_IDF_VERSION >= ESP_IDF_VERSION_VAL(5, 0, 0)
#include "driver/i2s_std.h"
#include "driver/i2s_tdm.h"
#include "driver/i2s_pdm.h" //新增
#include "soc/soc_caps.h"
#else
  1. 修改函数bsp_i2s_init
static esp_err_t bsp_i2s_init(i2s_port_t i2s_num, uint32_t sample_rate, int channel_format, int bits_per_chan)
{
    esp_err_t ret_val = ESP_OK;

#if ESP_IDF_VERSION >= ESP_IDF_VERSION_VAL(5, 0, 0)
    i2s_chan_config_t chan_cfg = I2S_CHANNEL_DEFAULT_CONFIG(i2s_num, I2S_ROLE_MASTER);

    ret_val |= i2s_new_channel(&chan_cfg, NULL, &rx_handle);
    i2s_pdm_rx_config_t pdm_rx_cfg = {
        .clk_cfg = I2S_PDM_RX_CLK_DEFAULT_CONFIG(s_play_sample_rate),
        /* The data bit-width of PDM mode is fixed to 16 */
        .slot_cfg = I2S_PDM_RX_SLOT_DEFAULT_CONFIG(I2S_DATA_BIT_WIDTH_16BIT, I2S_SLOT_MODE_MONO),
        .gpio_cfg = {
            .clk = GPIO_NUM_42,
            // Only ESP32-S3 can support 4-line PDM RX
            .dins = {
                GPIO_NUM_41,
            },
            .invert_flags = {
                .clk_inv = false,
            },
        },
    };
    ret_val |= i2s_channel_init_pdm_rx_mode(rx_handle, &pdm_rx_cfg);
    ret_val |= i2s_channel_enable(rx_handle);
#else
    // i2s_config_t i2s_config = I2S_CONFIG_DEFAULT(16000, I2S_CHANNEL_FMT_ONLY_LEFT, 32);
    i2s_config_t i2s_config = I2S_CONFIG_DEFAULT(sample_rate, I2S_CHANNEL_FMT_ONLY_LEFT, bits_per_chan);

    i2s_pin_config_t pin_config = {
        .bck_io_num = GPIO_I2S_SCLK,
        .ws_io_num = GPIO_I2S_LRCK,
        .data_out_num = GPIO_I2S_DOUT,
        .data_in_num = GPIO_I2S_SDIN,
        .mck_io_num = GPIO_I2S_MCLK,
    };

    ret_val |= i2s_driver_install(i2s_num, &i2s_config, 0, NULL);
    ret_val |= i2s_set_pin(i2s_num, &pin_config);
#endif

    return ret_val;
}
  1. 修改函数bsp_board_init

    esp_err_t bsp_board_init(uint32_t sample_rate, int channel_format,
                             int bits_per_chan) {
      bsp_i2s_init(I2S_NUM_AUTO, 16000, 1, 16);
    
      return ESP_OK;
    }
    
  2. 修改函数bsp_get_feed_data

esp_err_t bsp_get_feed_data(bool is_get_raw_channel, int16_t *buffer, int buffer_len)
{
    esp_err_t ret = ESP_OK;
    size_t    bytes_read;
    int       audio_chunksize = buffer_len / (sizeof(int32_t));
#if ESP_IDF_VERSION >= ESP_IDF_VERSION_VAL(5, 0, 0)
    ret = i2s_channel_read(rx_handle, buffer, buffer_len, &bytes_read, portMAX_DELAY);
#else
    ret = i2s_read(I2S_NUM_AUTO, buffer, buffer_len, &bytes_read, portMAX_DELAY);
#endif

    int32_t *tmp_buff = buffer;
    for (int i = 0; i < audio_chunksize; i++) {
        tmp_buff[i] = tmp_buff[i] >> 14;  // 32:8为有效位, 8:0为低8位, 全为0, AFE的输入为16位语音数据,拿29:13位是为了对语音信号放大。
    }

    return ret;
}

现在工程应该可以编译通过,并且程序没有错误。只是无法唤醒语音识别。还需要修改AFE的配置

修改AFE配置

我修改的afe_config的配置如下(根据实际识别效果调整的,这些参数可以看AFE 声学前端算法框架 - ESP32-S3 - — ESP-SR latest 文档 (espressif.com)的说明适当修改)

    afe_handle                          = (esp_afe_sr_iface_t *)&ESP_AFE_SR_HANDLE;
    afe_config_t afe_config             = AFE_CONFIG_DEFAULT();
    afe_config.memory_alloc_mode        = AFE_MEMORY_ALLOC_MORE_PSRAM;
    afe_config.se_init                  = true;
    afe_config.wakenet_init             = true;
    afe_config.wakenet_model_name       = wn_name;
    afe_config.wakenet_model_name_2     = wn_name_2;
    afe_config.voice_communication_init = false;
    afe_config.vad_mode                 = VAD_MODE_4;
    afe_config.wakenet_mode             = DET_MODE_95;
    afe_config.afe_mode                 = SR_MODE_HIGH_PERF;
    afe_config.aec_init                 = false;
    afe_config.pcm_config.total_ch_num  = 1;
    afe_config.pcm_config.mic_num       = 1;
    afe_config.pcm_config.ref_num       = 0;

feed_Task中修改:

void feed_Task(void *arg)
{
    esp_afe_sr_data_t *afe_data        = arg;
    int                audio_chunksize = afe_handle->get_feed_chunksize(afe_data);
    int                nch             = afe_handle->get_channel_num(afe_data);
    int                feed_channel    = esp_get_feed_channel();
    assert(nch < feed_channel);
    int16_t *i2s_buff = malloc(audio_chunksize * sizeof(int16_t) * feed_channel);
    assert(i2s_buff);

    while (task_flag) {
        // 修改成 true (如果为 true,则获取原始通道数的录音数据;如果为 false,则根据板子的配置获取相应数量的通道数据。)
        esp_get_feed_data(true, i2s_buff, audio_chunksize * sizeof(int16_t) * feed_channel);

        afe_handle->feed(afe_data, i2s_buff);
    }
    if (i2s_buff) {
        free(i2s_buff);
        i2s_buff = NULL;
    }
    vTaskDelete(NULL);
}

编译烧录&测试

idf.py flash

测试:

需要多角度(对着板子的角度)、不同语速和音量测试,识别条件感觉还是蛮苛刻的,效果一般。但是基本不会误触。

下文会在增加命令词识别,效果会好一些

在这里插入图片描述

命令词增加

修改muneconfig
idf.py menuconfig

ESP Speech Recognition —> use multinet —> Chinese Speech Commands Model (general chinese recognition (mn6_cn))

在这里插入图片描述

修改main.c
/*
   This example code is in the Public Domain (or CC0 licensed, at your option.)

   Unless required by applicable law or agreed to in writing, this
   software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
   CONDITIONS OF ANY KIND, either express or implied.
*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>

#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#include "esp_wn_iface.h"
#include "esp_wn_models.h"
#include "esp_afe_sr_models.h"
#include "esp_mn_iface.h"
#include "esp_mn_models.h"
#include "esp_board_init.h"
#include "model_path.h"
#include "string.h"
#include "driver/gpio.h"
#include "esp_process_sdkconfig.h"
#include "esp_afe_sr_iface.h"
#include "esp_mn_speech_commands.h"

#define LED_BUILT_IN 21

int                        detect_flag = 0;
static esp_afe_sr_iface_t *afe_handle  = NULL;
static esp_afe_sr_data_t  *afe_data    = NULL;
static volatile int        task_flag   = 0;
srmodel_list_t            *models      = NULL;

void feed_Task(void *arg)
{
    esp_afe_sr_data_t *afe_data        = arg;
    int                audio_chunksize = afe_handle->get_feed_chunksize(afe_data);
    int                nch             = afe_handle->get_channel_num(afe_data);
    int                feed_channel    = esp_get_feed_channel();

    assert(nch < feed_channel);

    int16_t *i2s_buff = malloc(audio_chunksize * sizeof(int16_t) * feed_channel);
    assert(i2s_buff);
    while (task_flag) {
        esp_get_feed_data(true, i2s_buff, audio_chunksize * sizeof(int16_t) * feed_channel);
        afe_handle->feed(afe_data, i2s_buff);
    }
    if (i2s_buff) {
        free(i2s_buff);
        i2s_buff = NULL;
    }
    vTaskDelete(NULL);
}

void detect_Task(void *arg)
{
    esp_afe_sr_data_t *afe_data      = arg;
    int                afe_chunksize = afe_handle->get_fetch_chunksize(afe_data);
    int16_t           *buff          = malloc(afe_chunksize * sizeof(int16_t));
    assert(buff);

    char *mn_name = esp_srmodel_filter(models, ESP_MN_PREFIX, ESP_MN_CHINESE);
    printf("multinet:%s\n", mn_name);
    esp_mn_iface_t     *multinet   = esp_mn_handle_from_name(mn_name);
    model_iface_data_t *model_data = multinet->create(mn_name, 6000);
    esp_mn_commands_update_from_sdkconfig(multinet, model_data);  // Add speech commands from sdkconfig
    // esp_mn_commands_clear();
    // esp_mn_commands_add(1, "da kai dian deng");
    // esp_mn_commands_add(1, "da kai deng");
    // esp_mn_commands_add(2, "guan bi dian deng");
    // esp_mn_commands_add(2, "guan bi deng");
    // esp_mn_commands_update();                            // 更新命令词列表
    multinet->print_active_speech_commands(model_data);  // 打印当前正在使用的所有命令词条
    int mu_chunksize = multinet->get_samp_chunksize(model_data);
    assert(mu_chunksize == afe_chunksize);
    printf("------------detect start------------\n");

    while (task_flag) {
        afe_fetch_result_t *res = afe_handle->fetch(afe_data);
        if (!res || res->ret_value == ESP_FAIL) {
            printf("fetch error!\n");
            break;
        }

        if (res->wakeup_state == WAKENET_DETECTED) {
            printf("WAKEWORD DETECTED\n");
            multinet->clean(model_data);  // clean all status of multinet
        } else if (res->wakeup_state == WAKENET_CHANNEL_VERIFIED) {
            detect_flag = 1;
            printf("AFE_FETCH_CHANNEL_VERIFIED, channel index: %d\n", res->trigger_channel_id);
        }

        if (detect_flag == 1) {
            esp_mn_state_t mn_state = multinet->detect(model_data, res->data);

            if (mn_state == ESP_MN_STATE_DETECTING) {
                continue;
            }

            if (mn_state == ESP_MN_STATE_DETECTED) {
                esp_mn_results_t *mn_result = multinet->get_results(model_data);
                for (int i = 0; i < mn_result->num; i++) {
                    printf("TOP %d, command_id: %d, phrase_id: %d, string:%s prob: %f\n",
                           i + 1, mn_result->command_id[i], mn_result->phrase_id[i], mn_result->string, mn_result->prob[i]);
                    if (mn_result->command_id[i] == 1) {
                        gpio_set_level(LED_BUILT_IN, 0);  // on
                    }
                    if (mn_result->command_id[i] == 2) {
                        gpio_set_level(LED_BUILT_IN, 1);  // on
                    }
                }
                printf("\n-----------listening-----------\n");
            }

            if (mn_state == ESP_MN_STATE_TIMEOUT) {
                esp_mn_results_t *mn_result = multinet->get_results(model_data);
                printf("timeout, string:%s\n", mn_result->string);
                afe_handle->enable_wakenet(afe_data);
                detect_flag = 0;
                printf("\n-----------awaits to be waken up-----------\n");
                continue;
            }
        }
    }
    if (buff) {
        free(buff);
        buff = NULL;
    }
    vTaskDelete(NULL);
}

void app_main()
{
    ESP_ERROR_CHECK(esp_board_init(16000, 1, 16));
    // ESP_ERROR_CHECK(esp_sdcard_init("/sdcard", 10));
    models          = esp_srmodel_init("model");
    char *wn_name   = NULL;
    char *wn_name_2 = NULL;

    if (models != NULL) {
        for (int i = 0; i < models->num; i++) {
            printf("Load model , name[%d]: %s\n", i, models->model_name[i]);
            if (strstr(models->model_name[i], ESP_WN_PREFIX) != NULL) {
                if (wn_name == NULL) {
                    wn_name = models->model_name[i];
                    printf("The first wakenet model: %s\n", wn_name);
                } else if (wn_name_2 == NULL) {
                    wn_name_2 = models->model_name[i];
                    printf("The second wakenet model: %s\n", wn_name_2);
                }
            }
        }
    } else {
        printf("Please enable wakenet model and select wake word by menuconfig!\n");
        return;
    }

    afe_handle                          = (esp_afe_sr_iface_t *)&ESP_AFE_SR_HANDLE;
    afe_config_t afe_config             = AFE_CONFIG_DEFAULT();
    afe_config.memory_alloc_mode        = AFE_MEMORY_ALLOC_MORE_PSRAM;
    afe_config.se_init                  = false;
    afe_config.wakenet_init             = true;
    afe_config.wakenet_model_name       = wn_name;
    afe_config.wakenet_model_name_2     = wn_name_2;
    afe_config.voice_communication_init = false;
    afe_config.vad_mode                 = VAD_MODE_4;
    afe_config.wakenet_mode             = DET_MODE_95;
    afe_config.afe_mode                 = SR_MODE_HIGH_PERF;
    afe_config.aec_init                 = false;
    afe_config.pcm_config.total_ch_num  = 1;
    afe_config.pcm_config.mic_num       = 1;
    afe_config.pcm_config.ref_num       = 0;

    afe_data = afe_handle->create_from_config(&afe_config);

    task_flag = 1;

    afe_config_t *config = &afe_config;
    printf("AEC Init: %s\n", config->aec_init ? "true" : "false");
    printf("SE Init: %s\n", config->se_init ? "true" : "false");
    printf("VAD Init: %s\n", config->vad_init ? "true" : "false");
    printf("Wakenet Init: %s\n", config->wakenet_init ? "true" : "false");
    printf("Voice Communication Init: %s\n", config->voice_communication_init ? "true" : "false");
    printf("Voice Communication AGC Init: %s\n", config->voice_communication_agc_init ? "true" : "false");
    printf("Voice Communication AGC Gain: %d dB\n", config->voice_communication_agc_gain);
    printf("VAD Mode: %d\n", config->vad_mode);
    printf("Wakenet Model Name: %s\n", config->wakenet_model_name);
    printf("Wakenet Mode: %d\n", config->wakenet_mode);
    printf("AFE Mode: %d\n", config->afe_mode);
    printf("AFE Preferred Core: %d\n", config->afe_perferred_core);
    printf("AFE Preferred Priority: %d\n", config->afe_perferred_priority);
    printf("AFE Ringbuf Size: %d\n", config->afe_ringbuf_size);
    printf("Memory Alloc Mode: %d\n", config->memory_alloc_mode);
    printf("AGC Mode: %d\n", config->agc_mode);
    printf("Total Channel Number: %d\n", config->pcm_config.total_ch_num);
    printf("Mic Number: %d\n", config->pcm_config.mic_num);
    printf("Ref Number: %d\n", config->pcm_config.ref_num);
    printf("AFE NS Mode: %d\n", config->afe_ns_mode);

    // GPIO
    gpio_config_t io_conf;
    io_conf.intr_type    = GPIO_INTR_DISABLE;
    io_conf.mode         = GPIO_MODE_INPUT_OUTPUT;
    io_conf.pin_bit_mask = (1ull << LED_BUILT_IN);
    io_conf.pull_down_en = 0;
    io_conf.pull_up_en   = 0;
    gpio_config(&io_conf);

    xTaskCreatePinnedToCore(&feed_Task, "feed", 8 * 1024, (void *)afe_data, 5, NULL, 0);
    xTaskCreatePinnedToCore(&detect_Task, "detect", 4 * 1024, (void *)afe_data, 5, NULL, 1);

    // // You can call afe_handle->destroy to destroy AFE.
    // task_flag = 0;

    // printf("destroy\n");
    // afe_handle->destroy(afe_data);
    // afe_data = NULL;
    // printf("successful\n");
}
编译烧录&测试

唤醒效果差一点,命令词识别效果还是不错的。

在这里插入图片描述

语音点灯

使用命令(打开\关闭灯、开\关灯、打开\关闭电灯)点亮和熄灭板子上的用户灯(橙色,PIN:21)

修改main.c
/*
   This example code is in the Public Domain (or CC0 licensed, at your option.)

   Unless required by applicable law or agreed to in writing, this
   software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
   CONDITIONS OF ANY KIND, either express or implied.
*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>

#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#include "esp_wn_iface.h"
#include "esp_wn_models.h"
#include "esp_afe_sr_models.h"
#include "esp_mn_iface.h"
#include "esp_mn_models.h"
#include "esp_board_init.h"
#include "model_path.h"
#include "string.h"
#include "driver/gpio.h"
#include "esp_process_sdkconfig.h"
#include "esp_afe_sr_iface.h"
#include "esp_mn_speech_commands.h"

#define LED_BUILT_IN 21

int                        detect_flag = 0;
static esp_afe_sr_iface_t *afe_handle  = NULL;
static esp_afe_sr_data_t  *afe_data    = NULL;
static volatile int        task_flag   = 0;
srmodel_list_t            *models      = NULL;

void feed_Task(void *arg)
{
    esp_afe_sr_data_t *afe_data        = arg;
    int                audio_chunksize = afe_handle->get_feed_chunksize(afe_data);
    int                nch             = afe_handle->get_channel_num(afe_data);
    int                feed_channel    = esp_get_feed_channel();

    assert(nch < feed_channel);

    int16_t *i2s_buff = malloc(audio_chunksize * sizeof(int16_t) * feed_channel);
    assert(i2s_buff);
    while (task_flag) {
        esp_get_feed_data(true, i2s_buff, audio_chunksize * sizeof(int16_t) * feed_channel);
        afe_handle->feed(afe_data, i2s_buff);
    }
    if (i2s_buff) {
        free(i2s_buff);
        i2s_buff = NULL;
    }
    vTaskDelete(NULL);
}

void detect_Task(void *arg)
{
    esp_afe_sr_data_t *afe_data      = arg;
    int                afe_chunksize = afe_handle->get_fetch_chunksize(afe_data);
    int16_t           *buff          = malloc(afe_chunksize * sizeof(int16_t));
    assert(buff);

    char *mn_name = esp_srmodel_filter(models, ESP_MN_PREFIX, ESP_MN_CHINESE);
    printf("multinet:%s\n", mn_name);
    esp_mn_iface_t     *multinet   = esp_mn_handle_from_name(mn_name);
    model_iface_data_t *model_data = multinet->create(mn_name, 6000);
    // esp_mn_commands_update_from_sdkconfig(multinet, model_data);  // Add speech commands from sdkconfig
    esp_mn_commands_clear();
    esp_mn_commands_add(1, "da kai dian deng");
    esp_mn_commands_add(1, "da kai deng");
    esp_mn_commands_add(1, "kai deng");
    esp_mn_commands_add(2, "guan bi dian deng");
    esp_mn_commands_add(2, "guan bi deng");
    esp_mn_commands_add(2, "guan deng");
    esp_mn_commands_update();                            // 更新命令词列表
    multinet->print_active_speech_commands(model_data);  // 打印当前正在使用的所有命令词条
    int mu_chunksize = multinet->get_samp_chunksize(model_data);
    assert(mu_chunksize == afe_chunksize);
    printf("------------detect start------------\n");

    while (task_flag) {
        afe_fetch_result_t *res = afe_handle->fetch(afe_data);
        if (!res || res->ret_value == ESP_FAIL) {
            printf("fetch error!\n");
            break;
        }

        if (res->wakeup_state == WAKENET_DETECTED) {
            printf("WAKEWORD DETECTED\n");
            multinet->clean(model_data);  // clean all status of multinet
        } else if (res->wakeup_state == WAKENET_CHANNEL_VERIFIED) {
            detect_flag = 1;
            printf("AFE_FETCH_CHANNEL_VERIFIED, channel index: %d\n", res->trigger_channel_id);
        }

        if (detect_flag == 1) {
            esp_mn_state_t mn_state = multinet->detect(model_data, res->data);

            if (mn_state == ESP_MN_STATE_DETECTING) {
                continue;
            }

            if (mn_state == ESP_MN_STATE_DETECTED) {
                esp_mn_results_t *mn_result = multinet->get_results(model_data);
                for (int i = 0; i < mn_result->num; i++) {
                    printf("TOP %d, command_id: %d, phrase_id: %d, string:%s prob: %f\n",
                           i + 1, mn_result->command_id[i], mn_result->phrase_id[i], mn_result->string, mn_result->prob[i]);
                    if (mn_result->command_id[i] == 1) {
                        gpio_set_level(LED_BUILT_IN, 0);  // on
                    }
                    if (mn_result->command_id[i] == 2) {
                        gpio_set_level(LED_BUILT_IN, 1);  // on
                    }
                }
                printf("\n-----------listening-----------\n");
            }

            if (mn_state == ESP_MN_STATE_TIMEOUT) {
                esp_mn_results_t *mn_result = multinet->get_results(model_data);
                printf("timeout, string:%s\n", mn_result->string);
                afe_handle->enable_wakenet(afe_data);
                detect_flag = 0;
                printf("\n-----------awaits to be waken up-----------\n");
                continue;
            }
        }
    }
    if (buff) {
        free(buff);
        buff = NULL;
    }
    vTaskDelete(NULL);
}

void app_main()
{
    ESP_ERROR_CHECK(esp_board_init(16000, 1, 16));
    // ESP_ERROR_CHECK(esp_sdcard_init("/sdcard", 10));
    models          = esp_srmodel_init("model");
    char *wn_name   = NULL;
    char *wn_name_2 = NULL;

    if (models != NULL) {
        for (int i = 0; i < models->num; i++) {
            printf("Load model , name[%d]: %s\n", i, models->model_name[i]);
            if (strstr(models->model_name[i], ESP_WN_PREFIX) != NULL) {
                if (wn_name == NULL) {
                    wn_name = models->model_name[i];
                    printf("The first wakenet model: %s\n", wn_name);
                } else if (wn_name_2 == NULL) {
                    wn_name_2 = models->model_name[i];
                    printf("The second wakenet model: %s\n", wn_name_2);
                }
            }
        }
    } else {
        printf("Please enable wakenet model and select wake word by menuconfig!\n");
        return;
    }

    afe_handle                          = (esp_afe_sr_iface_t *)&ESP_AFE_SR_HANDLE;
    afe_config_t afe_config             = AFE_CONFIG_DEFAULT();
    afe_config.memory_alloc_mode        = AFE_MEMORY_ALLOC_MORE_PSRAM;
    afe_config.se_init                  = false;
    afe_config.wakenet_init             = true;
    afe_config.wakenet_model_name       = wn_name;
    afe_config.wakenet_model_name_2     = wn_name_2;
    afe_config.voice_communication_init = false;
    afe_config.vad_mode                 = VAD_MODE_4;
    afe_config.wakenet_mode             = DET_MODE_95;
    afe_config.afe_mode                 = SR_MODE_HIGH_PERF;
    afe_config.aec_init                 = false;
    afe_config.pcm_config.total_ch_num  = 1;
    afe_config.pcm_config.mic_num       = 1;
    afe_config.pcm_config.ref_num       = 0;

    afe_data = afe_handle->create_from_config(&afe_config);

    task_flag = 1;

    afe_config_t *config = &afe_config;
    printf("AEC Init: %s\n", config->aec_init ? "true" : "false");
    printf("SE Init: %s\n", config->se_init ? "true" : "false");
    printf("VAD Init: %s\n", config->vad_init ? "true" : "false");
    printf("Wakenet Init: %s\n", config->wakenet_init ? "true" : "false");
    printf("Voice Communication Init: %s\n", config->voice_communication_init ? "true" : "false");
    printf("Voice Communication AGC Init: %s\n", config->voice_communication_agc_init ? "true" : "false");
    printf("Voice Communication AGC Gain: %d dB\n", config->voice_communication_agc_gain);
    printf("VAD Mode: %d\n", config->vad_mode);
    printf("Wakenet Model Name: %s\n", config->wakenet_model_name);
    printf("Wakenet Mode: %d\n", config->wakenet_mode);
    printf("AFE Mode: %d\n", config->afe_mode);
    printf("AFE Preferred Core: %d\n", config->afe_perferred_core);
    printf("AFE Preferred Priority: %d\n", config->afe_perferred_priority);
    printf("AFE Ringbuf Size: %d\n", config->afe_ringbuf_size);
    printf("Memory Alloc Mode: %d\n", config->memory_alloc_mode);
    printf("AGC Mode: %d\n", config->agc_mode);
    printf("Total Channel Number: %d\n", config->pcm_config.total_ch_num);
    printf("Mic Number: %d\n", config->pcm_config.mic_num);
    printf("Ref Number: %d\n", config->pcm_config.ref_num);
    printf("AFE NS Mode: %d\n", config->afe_ns_mode);

    // GPIO
    gpio_config_t io_conf;
    io_conf.intr_type    = GPIO_INTR_DISABLE;
    io_conf.mode         = GPIO_MODE_INPUT_OUTPUT;
    io_conf.pin_bit_mask = (1ull << LED_BUILT_IN);
    io_conf.pull_down_en = 0;
    io_conf.pull_up_en   = 0;
    gpio_config(&io_conf);

    xTaskCreatePinnedToCore(&feed_Task, "feed", 8 * 1024, (void *)afe_data, 5, NULL, 0);
    xTaskCreatePinnedToCore(&detect_Task, "detect", 4 * 1024, (void *)afe_data, 5, NULL, 1);

    // // You can call afe_handle->destroy to destroy AFE.
    // task_flag = 0;

    // printf("destroy\n");
    // afe_handle->destroy(afe_data);
    // afe_data = NULL;
    // printf("successful\n");
}

ESP的模型是使用汉语拼音作为基本识别单元,可以通过拼音自定义命令词。具体参考:命令词 - ESP32-S3 - — ESP-SR latest 文档 (espressif.com)

6个自定义命令都能识别到。

在这里插入图片描述

编译&程序错误

flash选择错误

程序报错:
E (729) spi_flash: Detected size(8192k) smaller than the size in the binary image header(16384k). Probe failed.

解决方法:
上述步骤中:修改muneconfig中的flash大小,xiao esp32s3的flash和psram都是8m.

类型未定义

编译报错:
error: unknown type name ‘bool’

解决方法:
修改头文件引用顺序

  • 25
    点赞
  • 25
    收藏
    觉得还不错? 一键收藏
  • 2
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值