Windows系统实现唤醒+合成+命令词智能语音交互

1、之前写过离线能力调用,今天来个终极版,实现智能交互或者结合大模型的智能交互示例,下面进入正题。上B站效果离线唤醒+离线合成+离线命令词实现智能交互_哔哩哔哩_bilibili

2、到讯飞开放平台下载唤醒+合成+命令词的离线组合包,找到msc_64.dll复制三份出来,一定要注意路径位置,不然会出现错误。msc直接下载的原封不动的拷贝就行

3、常量类的定义,各位直接复制粘贴即可,注意换自己的APPID,不然报错的

package com.day.config;

import com.sun.jna.ptr.IntByReference;

import javax.sound.sampled.*;
import java.io.ByteArrayOutputStream;
import java.io.FileInputStream;

public class Constants {
    // 构造16K 16BIT 单声道音频
    public static final String APPID = "5e11538f";  // APPID
    public static final String WORK_DIR = "src/main/resources";

    // 1、唤醒相关  ssb_param,一定注意IVW_SSB_PARAMS的fo|xxx资源的路径,xxx取值是指WORK_DIR目录下/msc/xxx   xxx是以后的路径开始拼接的!!!!!!!!!!!
    public static final AudioFormat IVW_ASR_AUDIO_FORMAT = new AudioFormat(16000F, 16, 1, true, false);
    public static final String IVW_DLL_PATH = "src/main/resources/ivw_msc_x64.dll"; // windows动态库路径
    public static final String IVW_LOGIN_PARAMS = "appid = " + APPID + ", work_dir = " + WORK_DIR;
    public static final String IVW_SSB_PARAMS = "ivw_threshold=0:1450,sst=wakeup,ivw_shot_word=1,ivw_res_path =fo|res/ivw/wakeupresource.jet";
    public static IntByReference IVW_ERROR_CODE = new IntByReference(-100);
    public static final Integer IVW_FRAME_SIZE = 6400;  // 一定要每200ms写10帧,否则会出现唤醒一段时间后无法唤醒的问题,一帧的大小为640B,其他大小可能导致无法唤醒。
    public static Integer IVW_AUDIO_STATUS = 1;
    public static DataLine.Info IVW_ASR_DATA_LINE_INFO = new DataLine.Info(TargetDataLine.class, IVW_ASR_AUDIO_FORMAT);
    public static TargetDataLine IVW_ASR_TARGET_DATA_LINE; // 录音

    static {
        try {
            IVW_ASR_TARGET_DATA_LINE = (TargetDataLine) AudioSystem.getLine(IVW_ASR_DATA_LINE_INFO);
        } catch (LineUnavailableException e) {
            e.printStackTrace();
        }
    }

    // 2、合成相关
    public static final AudioFormat TTS_AUDIO_FORMAT = new AudioFormat(16000F, 16, 1, true, false);
    public static final String TTS_DLL_PATH = "src/main/resources/tts_msc_x64.dll"; // windows动态库路径
    public static final String TTS_LOGIN_PARAMS = "appid = " + APPID + ", work_dir = " + WORK_DIR;
    public static final String TTS_SESSION_BEGIN_PARAMS = "engine_type = local, voice_name = xiaoyan, text_encoding = UTF8," +
            " tts_res_path = fo|res/tts/xiaoyan.jet;fo|res/tts/common.jet, sample_rate = 16000, speed = 50, volume = 50, pitch = 50, rdn = 2";
    public static IntByReference TTS_ERROR_CODE = new IntByReference(-100);
    public static IntByReference TTS_AUDIO_LEN = new IntByReference(-100);
    public static IntByReference TTS_SYNTH_STATUS = new IntByReference(-100);
    public static String TTS_TEXT; // 合成文本
    public static Integer TTS_TOTAL_AUDIO_LENGTH; // 合成音频长度
    public static ByteArrayOutputStream TTS_BYTE_ARRAY_OUTPUT_STREAM; // 合成音频流
    public static DataLine.Info TTS_DATA_LINE_INFO = new DataLine.Info(SourceDataLine.class, TTS_AUDIO_FORMAT, AudioSystem.NOT_SPECIFIED);
    public static SourceDataLine TTS_SOURCE_DATA_LINE; // 播放

    static {
        try {
            TTS_SOURCE_DATA_LINE = (SourceDataLine) AudioSystem.getLine(Constants.TTS_DATA_LINE_INFO);
        } catch (LineUnavailableException e) {
            e.printStackTrace();
        }
    }

    // 3、离线命令词相关
    public static final String ASR_DLL_PATH = "src/main/resources/asr_msc_x64.dll"; // windows动态库路径
    public static final String ASR_LOGIN_PARAMS = "appid = " + APPID + ", work_dir = " + WORK_DIR;
    public static final String ASR_CALL_BNF_PATH = "src/main/resources/msc/res/asr/call.bnf";
    public static final String ASR_BUILD_PARAMS = "engine_type = local,asr_res_path = fo|res/asr/common.jet," +
            "sample_rate = 16000,grm_build_path = res/asr/GrmBuilld_x64";
    public static final String ASR_LEX_PARAMS = "engine_type=local,asr_res_path = fo|res/asr/common.jet, " +
            "sample_rate = 16000,grm_build_path =res/asr/GrmBuilld_x64, grammar_list =call";
    public static IntByReference ASR_ERROR_CODE = new IntByReference(-100);
    public static final String ASR_SESSION_PARAMS = "vad_bos =3000 ,vad_eos = 10000,engine_type = local,asr_res_path = fo|res/asr/common.jet, " +
            "sample_rate = 16000,grm_build_path = res/asr/GrmBuilld_x64, local_grammar = call,result_type = json, result_encoding = UTF8";
    public static IntByReference ASR_EP_STATUS = new IntByReference(-100);
    public static IntByReference ASR_RECOG_STATUS = new IntByReference(-100);
    public static Integer ASR_AUDIO_STATUS = 1;
    public static Integer ASR_FRAME_SIZE = 640;   // 16k采样率的16位音频,一帧的大小为640Byte(来自Windows SDK的说明)
    public static FileInputStream ASR_FILE_INPUT_STREAM;
    public static String ASR_GRAMMAR_CONTENT;
    public static IntByReference ASR_RESULT_STATUS = new IntByReference(-100);
}

4、唤醒方法重写(唤醒成功执行回调函数,往下看)

 5、合成方法重写

package com.day.service;

import com.day.config.Constants;
import com.sun.jna.Library;
import com.sun.jna.Native;
import com.sun.jna.Pointer;
import com.sun.jna.ptr.IntByReference;

public interface TtsService extends Library {
    /**
     * 重点:
     * 1.char *   对应  String
     * 2.int *    对应  IntByReference
     * 3.void *   对应  byte[]/Pointer,回调函数里此类型需用String来对应。
     * 4.int      对应  int
     * 5.无参     对应  void
     * 6.回调函数  对应  根据文档自定义回调函数,实现接口Callback,离线语音合成无回调
     */
    //加载dll动态库并实例化,从而使用其内部的方法
    TtsService INSTANCE = Native.loadLibrary(Constants.TTS_DLL_PATH, TtsService.class);

    //定义登录方法
    public Integer MSPLogin(String usr, String pwd, String params);

    //开始一次普通离线语音合成
    public String QTTSSessionBegin(String params, IntByReference errorCode);

    //写入需要合成的文本
    public Integer QTTSTextPut(String sessionID, String textString, int textLen, String params);

    //获取离线合成的音频
    public Pointer QTTSAudioGet(String sessionID, IntByReference audioLen, IntByReference synthStatus, IntByReference errorCode);

    //结束本次普通离线语音合成
    public Integer QTTSSessionEnd(String sessionID, String hints);

    //定义退出方法
    public Integer MSPLogout();
}

6、离线命令词方法重写

7、回调函数的定义(1个唤醒的,2个离线命令词的)

8、为了方便各位看官,上POM文件

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>
    <parent>
        <!--父工程坐标############################################################################################-->
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-parent</artifactId>
        <version>2.1.6.RELEASE</version>
        <relativePath/> <!-- lookup parent from repository -->
    </parent>
    <!--自己被别人引用的坐标#########################################################################################-->
    <groupId>com.example</groupId>
    <artifactId>day</artifactId>
    <version>0.0.1-SNAPSHOT</version>
    <name>day</name>
    <description>day</description>
    <!--指定JDK版本################################################################################################-->
    <properties>
        <java.version>1.8</java.version>
    </properties>
    <!--总体依赖JAR################################################################################################-->


    <dependencies>
        <!-- https://mvnrepository.com/artifact/com.google.code.gson/gson -->
        <dependency>
            <groupId>com.google.code.gson</groupId>
            <artifactId>gson</artifactId>
            <version>2.10.1</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/net.java.dev.jna/jna -->
        <dependency>
            <groupId>net.java.dev.jna</groupId>
            <artifactId>jna</artifactId>
            <version>5.5.0</version>
        </dependency>
    </dependencies>

    <build>
        <plugins>
            <plugin>
                <groupId>org.springframework.boot</groupId>
                <artifactId>spring-boot-maven-plugin</artifactId>
                <version>2.1.6.RELEASE</version>
            </plugin>
        </plugins>
    </build>

    <!--配置阿里云仓库下载-->
    <repositories>
        <repository>
            <id>nexus-aliyun</id>
            <name>nexus-aliyun</name>
            <url>https://maven.aliyun.com/nexus/content/groups/public/</url>
            <releases>
                <enabled>true</enabled>
            </releases>
            <snapshots>
                <enabled>false</enabled>
            </snapshots>
        </repository>
    </repositories>
    <pluginRepositories>
        <pluginRepository>
            <id>public</id>
            <name>nexus-aliyun</name>
            <url>https://maven.aliyun.com/nexus/content/groups/public/</url>
            <releases>
                <enabled>true</enabled>
            </releases>
            <snapshots>
                <enabled>false</enabled>
            </snapshots>
        </pluginRepository>
    </pluginRepositories>

</project>

9、命令词也给一份示例Call.bnf(老生常谈,注意放置位置)

#BNF+IAT 1.0;
!grammar call;
!slot <enter>;
!slot <scanSolicitation>;
!slot <scanDelivery>;
!slot <exit>;
!start <callStart>;
<callStart>:[<enter>][<scanSolicitation>][<scanDelivery>][<exit>];
<enter>:立刻|马上|一分钟后|十分钟后|半小时后;
<scanSolicitation>:打开|关闭|调量|调暗|调高|调低;
<scanDelivery>:主卧|次卧|书房|客厅;
<exit>:空调|点灯|窗户|窗帘|衣柜;

10、实现完美的智能语音交互,感兴趣的可以结合下大模型做智能问答场景。

  • 2
    点赞
  • 6
    收藏
    觉得还不错? 一键收藏
  • 8
    评论
评论 8
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值