esp adf sr使用

倒霉催的arduino 上的sr模型 不行,以不想装PIO  只能上IDF,好讨厌(官方搞了个ARDUINO-ESP2在IDF中使用ARUDINO代码)

 

AFE-声学前端 (回声消除 噪声抑制

WakeNet-唤醒词 检测 (WakeNet5 用于esp32 WakeNet 8,9 用于s3芯片)

MultiNet-命令词 识别(参ASR)

TTs  语音合成(

示例: examples | components\esp-sr\test_apps

esp-skainet: ESP-Skainet 是乐鑫推出的智能语音助手,目前支持唤醒词识别和命令词识别。 - Gitee.com

chinese_tts 语音识别
cn_speech_commands_recognition 命令词识别
deep_noise_suppression 语音通话
direction_of_arrival 方位
en_speech_commands_recognition 英文命令识别
usb_mic_recorder usb声音记录
voice_activey_detection 语音检测

voice_communication 语音通话

wake_word_detection 唤醒词检测(内有afe 和 wakenet 两种)

 

 

afe-wakenet 区别 (来源:esp-skainet\examples\wake_word_detection\readme.md)

只使用命令词唤醒 和只有单麦克风 并且少消耗CPU资源和内存 则可以使用wakenet

如果有双麦,同时要提升语音效果,使用afe接口

 

 

声学前端 (AEC回声消除 NS-噪声抑制 bss 目标声源与干扰音抑制)

MultiNet 输入为经过前端语音算法(AFE)处理过的音频(格式为 16 KHz,16 bit,单声道)。通过对音频进行识别,则可以对应到相应的汉字或单词。

 

srmodel_list_t *models = esp_srmodel_init("model");
afe_config_t *afe_config = afe_config_init("MMNR", models, AFE_TYPE_SR, AFE_MODE_HIGH_PERF);


// 获取句柄
esp_afe_sr_iface_t *afe_handle = esp_afe_handle_from_config(afe_config);
// 创建实例
esp_afe_sr_data_t *afe_data = afe_handle->create_from_config(afe_config);


int feed_chunksize = afe_handle->get_feed_chunksize(afe_data);
int feed_nch = afe_handle->get_feed_channel_num(afe_data);
int16_t *feed_buff = (int16_t *) malloc(feed_chunksize * feed_nch * sizeof(int16_t));
afe_handle->feed(afe_data, feed_buff);

 

examples/wake_word_detection/wakenet/main/main.c · 乐鑫开源/esp-skainet - Gitee.com

 

示例IMP441 唤醒词 (参考的NOCODEC 

PCM PDM 区别 I2S0 I2S1 区别 I2S可以PCM->PDM PDM->PCM 双向 I2S1无(参考官方文档)

 





#include "esp_log.h"
#include <stdio.h>
#include <freertos/FreeRTOS.h>
#include <driver/i2s_std.h>


#include <stdio.h>
#include <stdlib.h>
#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#include "esp_wn_iface.h"
#include "esp_wn_models.h"
#include "model_path.h"
#include "string.h"
#include "hiesp.h"
#include "hilexin.h"

static const char *TAG = "EXAMPLE-VAD";

#define AUDIO_CODEC_DMA_DESC_NUM 6
#define AUDIO_CODEC_DMA_FRAME_NUM 240
#define AUDIO_CODEC_DEFAULT_MIC_GAIN 30.0

i2s_std_slot_mask_t  mic_slot_mask =  I2S_STD_SLOT_LEFT;

//-------------------i2s 
#define MIC_BCLK  GPIO_NUM_5 //22
#define MIC_WS GPIO_NUM_4 // 23
#define MIC_DIN GPIO_NUM_6//26
i2s_chan_handle_t rx_chan,tx_chan;
//uint32_t rbuf[ 16000*30/1000 * sizeof(short)];
//---------------------


#define CODEC_ADC_I2S_PORT 0
#define VAD_SAMPLE_RATE_HZ 16000
#define VAD_FRAME_LENGTH_MS 30
#define VAD_BUFFER_LENGTH (VAD_FRAME_LENGTH_MS * VAD_SAMPLE_RATE_HZ / 1000)


int16_t buffer[512];
int32_t b32[512];
esp_wn_iface_t *wakenet;
model_iface_data_t *model_data;

static void task(void *arg)
{

    size_t r_bytes =0;
    size_t w_bytes = 0;
    size_t bytes_read = 0;
    size_t samples = 512;
    printf("in task");
    while(1)
    {
        //I2S读取 
        
         if (i2s_channel_read(rx_chan, b32, 512*4, &bytes_read, portMAX_DELAY) != ESP_OK) {
              ESP_LOGE("read", "Read Failed!");
        }
        //printf("r bytes = %d",bytes_read);
        

        samples = 512 ; // / sizeof(int32_t);
        for (int i = 0; i < samples; i++) {
            int32_t value = b32[i] >> 12;
            buffer[i] = (value > INT16_MAX) ? INT16_MAX : (value < -INT16_MAX) ? -INT16_MAX : (int16_t)value;
        }


         wakenet_state_t state = wakenet->detect(model_data, buffer);
        if (state == WAKENET_DETECTED) {
             printf("Detected\n");
        }
        // else
        //     printf("*");



        
    }
}

void app_main(void)
{
     //-----------------I2S初始化-----------RX
    i2s_chan_config_t rx_chan_cfg = {
        .id = I2S_NUM_0,
        .role = I2S_ROLE_MASTER,
        .dma_desc_num = AUDIO_CODEC_DMA_DESC_NUM,
        .dma_frame_num = AUDIO_CODEC_DMA_FRAME_NUM,
        .auto_clear_after_cb = true,
        .auto_clear_before_cb = false,
        .intr_priority = 0,

    };
    i2s_new_channel(&rx_chan_cfg,NULL,&rx_chan);
    // i2s_chan_config_t rx_chan_cfg = I2S_CHANNEL_DEFAULT_CONFIG(I2S_NUM_0,I2S_ROLE_MASTER);
    // i2s_new_channel(&rx_chan_cfg,NULL,&rx_chan);

    i2s_std_config_t  rx_cfg = {
        //.clk_cfg = I2S_STD_CLK_DEFAULT_CONFIG(44100),
        .clk_cfg= {
          .sample_rate_hz = (uint32_t)16000,
            .clk_src = I2S_CLK_SRC_DEFAULT,
            .mclk_multiple = I2S_MCLK_MULTIPLE_256,
        },


        .slot_cfg = {
            .data_bit_width = I2S_DATA_BIT_WIDTH_32BIT,
            .slot_bit_width = I2S_SLOT_BIT_WIDTH_AUTO,
            .slot_mode = I2S_SLOT_MODE_MONO,
            .slot_mask = mic_slot_mask,
            .ws_width = I2S_DATA_BIT_WIDTH_32BIT,
            .ws_pol = false,
            .bit_shift = true,
            #ifdef   I2S_HW_VERSION_2
                .left_align = true,
                .big_endian = false,
                .bit_order_lsb = false
            #endif

        },
        

        //.slot_cfg = I2S_STD_MSB_SLOT_DEFAULT_CONFIG(I2S_DATA_BIT_WIDTH_16BIT,I2S_SLOT_MODE_MONO),
 
    
        .gpio_cfg = {
            .mclk = I2S_GPIO_UNUSED,
            .dout= I2S_GPIO_UNUSED,
            .bclk = MIC_BCLK,
            .ws = MIC_WS,
            .din = MIC_DIN,
            .invert_flags = {
                false,false,false,
            },
            

        },

        
        
    };
    rx_cfg.slot_cfg.slot_mask = I2S_STD_SLOT_LEFT;
    i2s_channel_init_std_mode(rx_chan,&rx_cfg);
    
    i2s_channel_enable(rx_chan);
       
    //---------------------------------------
    vTaskDelay(500 / portTICK_PERIOD_MS);
    srmodel_list_t *models = esp_srmodel_init("model");
    char *model_name = esp_srmodel_filter(models, ESP_WN_PREFIX, "hilexin");
    wakenet = (esp_wn_iface_t*)esp_wn_handle_from_name(model_name);
    model_data = wakenet->create(model_name, DET_MODE_95);

    int frequency = wakenet->get_samp_rate(model_data);
    int audio_chunksize = wakenet->get_samp_chunksize(model_data) ;
    //int16_t *buffer = (int16_t *) malloc(audio_chunksize);

    wakenet->set_det_threshold(model_data, 0.8, 1);
    wakenet->reset_det_threshold(model_data);

    char *wake_words = NULL;
    wake_words = esp_srmodel_get_wake_words(models, model_name);

    unsigned char* data = NULL;
    size_t data_size = 0;
    int chunks = 0;
    if (strstr(model_name, "hiesp") != NULL) {
        data = (unsigned char*)hiesp;
        data_size = sizeof(hiesp);
        printf("wake word: %s, size:%d\n", "hiesp",  data_size);
    } else if(strstr(model_name, "hilexin") != NULL) {
        data = (unsigned char*)hilexin;
        data_size = sizeof(hilexin);
        printf("wake word: %s, size:%d\n", "hilexin",  data_size);
    }


 

    

//    int32_t *b32 = (int32_t *) malloc(audio_chunksize*2);
//    int16_t *rbuf = (int16_t *) malloc(audio_chunksize);
    printf("chuksize = %d",audio_chunksize);
    xTaskCreate(task,"task",4096,NULL,5,NULL);

    while(1)
    {
        printf(".");

        vTaskDelay(50/portTICK_PERIOD_MS);
    }

}

模型 烧写 加载过程 在CMakeLists.txt文件中(参文档)

 

https://gitee.com/open-source-library/esp-sr/blob/master/docs/speech_command_recognition/README_cn.md

Espressif\frameworks\esp-adf\components\esp-sr\include\esp32s3\esp_mn_models.h

示例(skainet 下的 components\tests exampoles 或是 esp -adf下的components\sr 或是 examples :

 

过程 读取 然后使用 afe或 wakenet 识别读取的内容 

WakeNet 包含于AFE(前端算法中)

识别16KHZ 1MONO 

 

命令词识别 打开电灯 关闭电灯  (参skainet  

灵敏度



// #define CONFIG_SR_MN_CN_MULTINET2_SINGLE_RECOGNITION

#include "esp_log.h"
#include <stdio.h>
#include <freertos/FreeRTOS.h>
#include <driver/i2s_std.h>


#include <stdio.h>
#include <stdlib.h>
#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#include "esp_wn_iface.h"
#include "esp_wn_models.h"
#include "model_path.h"
#include "string.h"
#include "hiesp.h"
#include "hilexin.h"


//multinet
#include "esp_mn_iface.h"
#include "esp_mn_models.h"
#include "esp_mn_speech_commands.h"
#include "esp_process_sdkconfig.h"
#include "alexa.h"
#include "dl_lib_convq_queue.h"
#include "da_kai_kong_tiao.h"
#include "tell_me_a_joke.h"





static const char *TAG = "EXAMPLE-VAD";

#define AUDIO_CODEC_DMA_DESC_NUM 6
#define AUDIO_CODEC_DMA_FRAME_NUM 240
#define AUDIO_CODEC_DEFAULT_MIC_GAIN 30.0

i2s_std_slot_mask_t  mic_slot_mask =  I2S_STD_SLOT_LEFT;

//-------------------i2s 
#define MIC_BCLK  GPIO_NUM_5 //22
#define MIC_WS GPIO_NUM_4 // 23
#define MIC_DIN GPIO_NUM_6//26
i2s_chan_handle_t rx_chan,tx_chan;
//uint32_t rbuf[ 16000*30/1000 * sizeof(short)];
//---------------------


#define CODEC_ADC_I2S_PORT 0
#define VAD_SAMPLE_RATE_HZ 16000
#define VAD_FRAME_LENGTH_MS 30
#define VAD_BUFFER_LENGTH (VAD_FRAME_LENGTH_MS * VAD_SAMPLE_RATE_HZ / 1000)


int16_t buffer[512];
int32_t b32[512];
esp_wn_iface_t *wakenet;
model_iface_data_t *model_data;
model_iface_data_t *mn_model_data;

// #define MULTINET_COEFF     "COEFF_NULL"
// #define MULTINET_COEFF get_coeff_multinet2_ch
//static const esp_mn_iface_t *multinet = &MULTINET_MODEL;
//  static const esp_mn_iface_t *multinet ;//= &MULTINET_MODEL;


esp_mn_iface_t *multinet;


static void task(void *arg)
{

    size_t r_bytes =0;
    size_t w_bytes = 0;
    size_t bytes_read = 0;
    size_t samples = 512;


    esp_mn_state_t mn_state;
    printf("in task");
    while(1)
    {
        //I2S读取 
        
         if (i2s_channel_read(rx_chan, b32, 512*4, &bytes_read, portMAX_DELAY) != ESP_OK) {
              ESP_LOGE("read", "Read Failed!");
        }
        //printf("r bytes = %d",bytes_read);
        

        samples = 512 ; // / sizeof(int32_t);
        for (int i = 0; i < samples; i++) {
            int32_t value = b32[i] >> 12;
            buffer[i] = (value > INT16_MAX) ? INT16_MAX : (value < -INT16_MAX) ? -INT16_MAX : (int16_t)value;
        }


         wakenet_state_t state = wakenet->detect(model_data, buffer);
        if (state == WAKENET_DETECTED) {
             printf("Detected\n");
        }
        // else
        //     printf("*");




        //---------------multinet-------------------
         mn_state = multinet->detect(mn_model_data, buffer);
         if (mn_state == ESP_MN_STATE_DETECTED) {
             esp_mn_results_t *mn_result = multinet->get_results(mn_model_data);
             if (mn_result->num > 0)
                 printf("detected: command id:%d, string:%s\n",mn_result->command_id[0], mn_result->string);
             else
                 printf("timeout\n");
        //     break;
        } 



        
    }
}

void app_main(void)
{
     //-----------------I2S初始化-----------RX
    i2s_chan_config_t rx_chan_cfg = {
        .id = I2S_NUM_0,
        .role = I2S_ROLE_MASTER,
        .dma_desc_num = AUDIO_CODEC_DMA_DESC_NUM,
        .dma_frame_num = AUDIO_CODEC_DMA_FRAME_NUM,
        .auto_clear_after_cb = true,
        .auto_clear_before_cb = false,
        .intr_priority = 0,

    };
    i2s_new_channel(&rx_chan_cfg,NULL,&rx_chan);
    // i2s_chan_config_t rx_chan_cfg = I2S_CHANNEL_DEFAULT_CONFIG(I2S_NUM_0,I2S_ROLE_MASTER);
    // i2s_new_channel(&rx_chan_cfg,NULL,&rx_chan);

    i2s_std_config_t  rx_cfg = {
        //.clk_cfg = I2S_STD_CLK_DEFAULT_CONFIG(44100),
        .clk_cfg= {
          .sample_rate_hz = (uint32_t)16000,
            .clk_src = I2S_CLK_SRC_DEFAULT,
            .mclk_multiple = I2S_MCLK_MULTIPLE_256,
        },


        .slot_cfg = {
            .data_bit_width = I2S_DATA_BIT_WIDTH_32BIT,
            .slot_bit_width = I2S_SLOT_BIT_WIDTH_AUTO,
            .slot_mode = I2S_SLOT_MODE_MONO,
            .slot_mask = mic_slot_mask,
            .ws_width = I2S_DATA_BIT_WIDTH_32BIT,
            .ws_pol = false,
            .bit_shift = true,
            #ifdef   I2S_HW_VERSION_2
                .left_align = true,
                .big_endian = false,
                .bit_order_lsb = false
            #endif

        },
        

        //.slot_cfg = I2S_STD_MSB_SLOT_DEFAULT_CONFIG(I2S_DATA_BIT_WIDTH_16BIT,I2S_SLOT_MODE_MONO),
 
    
        .gpio_cfg = {
            .mclk = I2S_GPIO_UNUSED,
            .dout= I2S_GPIO_UNUSED,
            .bclk = MIC_BCLK,
            .ws = MIC_WS,
            .din = MIC_DIN,
            .invert_flags = {
                false,false,false,
            },
            

        },

        
        
    };
    rx_cfg.slot_cfg.slot_mask = I2S_STD_SLOT_LEFT;
    i2s_channel_init_std_mode(rx_chan,&rx_cfg);
    
    i2s_channel_enable(rx_chan);
       

    //---------------wake up------------------------
    vTaskDelay(500 / portTICK_PERIOD_MS);
    srmodel_list_t *models = esp_srmodel_init("model");
    char *model_name = esp_srmodel_filter(models, ESP_WN_PREFIX, "hilexin");
    wakenet = (esp_wn_iface_t*)esp_wn_handle_from_name(model_name);
    model_data = wakenet->create(model_name, DET_MODE_95);

    int frequency = wakenet->get_samp_rate(model_data);
    int audio_chunksize = wakenet->get_samp_chunksize(model_data) ;
    //int16_t *buffer = (int16_t *) malloc(audio_chunksize);

    wakenet->set_det_threshold(model_data, 0.8, 1);
    wakenet->reset_det_threshold(model_data);

    char *wake_words = NULL;
    wake_words = esp_srmodel_get_wake_words(models, model_name);

    unsigned char* data = NULL;
    size_t data_size = 0;
    int chunks = 0;
    if (strstr(model_name, "hiesp") != NULL) {
        data = (unsigned char*)hiesp;
        data_size = sizeof(hiesp);
        printf("wake word: %s, size:%d\n", "hiesp",  data_size);
    } else if(strstr(model_name, "hilexin") != NULL) {
        data = (unsigned char*)hilexin;
        data_size = sizeof(hilexin);
        printf("wake word: %s, size:%d\n", "hilexin",  data_size);
    }

    printf("chuksize = %d",audio_chunksize);
    char *mn_name = esp_srmodel_filter(models, ESP_MN_PREFIX, ESP_MN_CHINESE);
    //------------------------mul-------------------  //adf components //test app test multinet
    // model_iface_data_t *model_mn_data = multinet->create(&MULTINET_COEFF, 6000);
    // int audio_mn_chunksize = multinet->get_samp_chunksize(model_mn_data);
    // int mn_num = multinet->get_samp_chunknum(model_mn_data);
    // int mn_sample_rate = multinet->get_samp_rate(model_mn_data);
    // ESP_LOGI(TAG, "keywords_num = %d , sample_rate = %d, chunksize = %d, sizeof_uint16 = %d", mn_num,  mn_sample_rate, audio_mn_chunksize, sizeof(int16_t));

    multinet = esp_mn_handle_from_name(mn_name);
    mn_model_data = multinet->create(mn_name, 500000);
    frequency = multinet->get_samp_rate(mn_model_data);
    audio_chunksize = multinet->get_samp_chunksize(mn_model_data) ;
    char *lang = multinet->get_language(mn_model_data);
    esp_mn_commands_update_from_sdkconfig(multinet, mn_model_data);
    data = NULL;
    data_size = 0;
    if (strcmp(lang, ESP_MN_ENGLISH) == 0) {
         data = (unsigned char*)tell_me_a_joke;
         data_size = sizeof(tell_me_a_joke);
         printf("commands: tell me a joke, size:%d\n", data_size);
     } else if(strcmp(lang, ESP_MN_CHINESE) == 0) {
         data = (unsigned char*)da_kai_kong_tiao;
         data_size = sizeof(da_kai_kong_tiao);
         printf("commands: da kai kong tiao, size:%d\n", data_size);
     }

   
    multinet->print_active_speech_commands(mn_model_data);
    

//    int32_t *b32 = (int32_t *) malloc(audio_chunksize*2);
//    int16_t *rbuf = (int16_t *) malloc(audio_chunksize);
    printf("chuksize = %d",audio_chunksize);
    xTaskCreate(task,"task",4096,NULL,5,NULL);

    while(1)
    {
        printf(".");

        vTaskDelay(50/portTICK_PERIOD_MS);
    }



    


}

 

 

 

加载模型的过程  flash 函数 加载 

# The following lines of boilerplate have to be in your project's
# CMakeLists in this exact order for cmake to work correctly
cmake_minimum_required(VERSION 3.5)

include($ENV{ADF_PATH}/CMakeLists.txt)
include($ENV{IDF_PATH}/tools/cmake/project.cmake)

add_compile_options (-fdiagnostics-color=always)

project(example_wwe)

# This is a cmake function, which is used to flash the bin file to the specified partition
function(esptool_py_flash_customize_image target_name image_name offset image)
    idf_build_get_property(build_dir BUILD_DIR)
    file(RELATIVE_PATH image ${build_dir} ${image})

    set_property(TARGET ${target_name} APPEND PROPERTY FLASH_FILE
                "\"${offset}\" : \"${image}\"")
    set_property(TARGET ${target_name} APPEND PROPERTY FLASH_ENTRY
                "\"${image_name}\" : { \"offset\" : \"${offset}\", \"file\" : \"${image}\" }")
    set_property(TARGET ${target_name} APPEND PROPERTY IMAGES "${offset} ${image}")

    if(CONFIG_SECURE_FLASH_ENCRYPTION_MODE_DEVELOPMENT)
        set_property(TARGET encrypted-${target_name} APPEND PROPERTY FLASH_FILE
                    "\"${offset}\" : \"${image}\"")
        set_property(TARGET encrypted-${target_name} APPEND PROPERTY FLASH_ENTRY
                    "\"${image_name}\" : { \"offset\" : \"${offset}\", \"file\" : \"${image}\" }")
        set_property(TARGET encrypted-${target_name} APPEND PROPERTY IMAGES "${offset} ${image}")
    endif()
endfunction()


# Flash the custom partition named `flash_tone`.
set(partition flash_tone)
idf_build_get_property(project_dir PROJECT_DIR)
set(image_file ${project_dir}/tone/audio_tone.bin)
partition_table_get_partition_info(offset "--partition-name ${partition}" "offset")
esptool_py_flash_customize_image(flash "${partition}" "${offset}" "${image_file}")

 

音频基础知识简述 & esp-sr 上手指南-CSDN博客

以乐鑫语音开发框架为例,系统了解嵌入式设备的语音唤醒和语音识别-RoboticsCV

 

<think>我们正在寻找 esp-adf 项目中的 `esp_vad.c` 文件。根据之前的讨论,我们知道 VAD 功能由 ESP-SR 子模块提供。因此,该文件应该位于 ESP-ADF 框架下的 ESP-SR 组件目录中。 具体路径推测: 在 ESP-ADF 的目录结构中,ESP-SR 组件位于 `components/esp-sr` 目录下。而 VAD 的源代码通常会在 `esp-sr` 的 `algorithms` 或 `src` 目录中。 根据 ESP-SR文档和代码结构,我们可以尝试以下路径: `$ADF_PATH/components/esp-sr/esp-sr/algorithms/vad/esp_vad.c` 注意:不同版本的 ESP-SR 可能略有差异,但路径关键字是 `esp-sr` 和 `vad`。 验证方法: 1. 确保已经初始化了 ESP-SR 子模块(如之前所述)。 2. 在 ESP-ADF 的根目录下,使用查找命令(在 Linux/macOS 下使用 `find`,在 Windows 下可以使用资源管理器搜索或 `dir /s` 命令)。 例如,在 Linux 或 macOS 终端中: ```bash cd $ADF_PATH find . -name "esp_vad.c" ``` 或者在 Windows 命令提示符中: ```cmd cd %ADF_PATH% dir /s /b esp_vad.c ``` 如果已经正确初始化了子模块,应该能找到该文件。 如果找不到,可能是由于: - 子模块未更新(执行 `git submodule update --init --recursive`) - 使用ESP-ADF 版本不包含 VAD(较旧版本可能没有,但当前通常都有) 另外,根据引用[1]中用户安装的是 ESP-IDF 5.3 和对应的 ESP-ADF(假设是最新的稳定版),那么 VAD 应该位于 ESP-SR 中。 根据经验,在 ESP-SR 的仓库中,VAD 的源代码位于: [esp-sr 仓库的 algorithms/vad 目录](https://github.com/espressif/esp-sr/tree/master/algorithms/vad) 因此,在 ESP-ADF 中,路径应该是: `components/esp-sr/esp-sr/algorithms/vad/esp_vad.c` 注意:这里有两个 `esp-sr` 目录,因为子模块的根目录是 `esp-sr`,而仓库内部也有一个同名的目录。 所以,完整路径为: `$ADF_PATH/components/esp-sr/esp-sr/algorithms/vad/esp_vad.c` 如果用户没有找到,请检查该路径是否存在。 此外,如果用户使用的是较新的版本,路径可能会有所调整,但核心部分不变。 总结步骤: 1. 进入 ESP-ADF 目录:`cd $ADF_PATH` 2. 检查子模块:确保 `components/esp-sr` 目录存在且非空。 3. 进入 VAD 算法目录:`cd components/esp-sr/esp-sr/algorithms/vad` 4. 查看文件:`ls esp_vad.c` 或使用文件管理器查看。 如果以上步骤找不到,请考虑更新 ESP-ADFESP-SR 子模块。 注意:在 ESP-ADF 中,我们通常不直接修改 ESP-SR 中的文件,而是通过头文件和库来使用 VAD 功能。</think>在 ESP-ADF 框架中,`esp_vad.c` 文件的位置取决于您使用ESP-SR 版本。根据 ESP32-S3 开发环境和 ESP-ADF 的典型结构,该文件通常位于以下路径: ### 标准位置 ``` $ADF_PATH/components/esp-sr/esp-sr/algorithms/vad/esp_vad.c ``` ### 验证步骤 1. **检查 ESP-SR 子模块**: ```bash cd $ADF_PATH ls components/esp-sr/esp-sr/algorithms/vad # 应包含以下文件: # esp_vad.c # vad_*.h # libvad.a ``` 2. **若未找到文件**,需初始化子模块: ```bash git submodule update --init --recursive components/esp-sr ``` 3. **路径结构说明**: - `$ADF_PATH`:ESP-ADF 根目录(如 `D:/Espressif/frameworks/esp-adf`) - 核心文件: - 源代码:`components/esp-sr/esp-sr/algorithms/vad/esp_vad.c` - 头文件:`components/esp-sr/include/esp_vad.h` - 库文件:`components/esp-sr/lib/libvad.a` ### 常见问题排查 1. **子模块未初始化**(最常见): ```bash # 删除残缺目录后重新初始化 rm -rf components/esp-sr git submodule sync git submodule update --init --recursive ``` 2. **版本不匹配**: - ESP-IDF v5.3 需搭配 ADF `v2.5+` 和 ESP-SR `v1.2+` - 检查版本: ```bash cd $ADF_PATH && git describe --tags # 查看ADF版本 cd components/esp-sr && git describe --tags # 查看ESP-SR版本 ``` 3. **手动下载**(若网络问题): 1. 访问 [ESP-SR GitHub](https://github.com/espressif/esp-sr) 2. 下载最新 release 包 3. 解压到 `$ADF_PATH/components/esp-sr` > **验证成功标志**:在 `vad_demo` 示例中能正常调用 `esp_vad_process()` 函数,并输出语音检测结果[^1][^3]。 ---
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值