阿里语音识别看这一篇就够了

先看效果

效果视频

首先到阿里页面创建项目

传送门:https://help.aliyun.com/document_detail/71936.htm?spm=a2c4g.11186623.0.0.12a03787uqgGAh#2572188

下载sdk引入到项目并且依赖

传送门:https://gw.alipayobjects.com/os/bmw-prod/d5d24de6-599d-41ac-aad7-3bfa6fc38f42.zip?spm=a2c4g.11186623.0.0.12a037872vUnOu&file=d5d24de6-599d-41ac-aad7-3bfa6fc38f42.zip

解压ZIP包,在app/libs目录下获取AAR格式的SDK包,将AAR包集成到您的工程项目中进行依赖。

获取Token

Token需要请求服务器获取,可以跟后台沟通获取方式 以及后台给你返回的格式 ,如果只是玩玩demo 可以使用官网给的测试Token
传送门:https://help.aliyun.com/document_detail/450514.htm?spm=a2c4g.11186623.0.0.12a0344eej4i75#587dee8029x7r

以上准备工作结束接下来开始集成

集成还是很简单的基本上官网讲的还是很全面的
需要注意的几个点是在使用语音识别之前必须获取录音权限 否则识别失败。

接下来看主要代码封装成简单的工具类

public class AutomaticSpeechRecognitionUtils implements INativeNuiCallback {
    private static final String TAG = AutomaticSpeechRecognitionUtils.class.getName();


    private NativeNui nui_instance = new NativeNui();
    private final static int WAVE_FRAM_SIZE = 20 * 2 * 1 * 16000 / 1000; //20ms audio for 16k/16bit/mono
    public final static int SAMPLE_RATE = 16000;
    private AudioRecord mAudioRecorder;
    private AutomaticSpeechRecognitionListener mAutomaticSpeechRecognitionListener;
    private Context mContext;
    private boolean mInit = false;//初始化是否成功


    /**
     * @date 8/17/22 4:02 PM
     * @description 请求token并且初始化
     **/
    public void voiceInitToken(Context context, AutomaticSpeechRecognitionListener automaticSpeechRecognitionListener) {
        this.mContext = context;
        this.mAutomaticSpeechRecognitionListener = automaticSpeechRecognitionListener;
        int time = (int) (System.currentTimeMillis() / 1000); //获取当前时间
        if (time > (SearchVoiceSp.getInstance(UKidsApplication.getInstance()).getSearchVoiceTime())) {
            RetrofitManager.getInstance().getAutomaticSpeechRecognition(new UkidsObserver<VoiceRecognitionEntity>() {
                @Override
                public void onSubscribe(Disposable d) {
                    super.onSubscribe(d);
                }

                @Override
                public void onNext(VoiceRecognitionEntity voiceRecognitionEntity) {
                    super.onNext(voiceRecognitionEntity);
                    if (voiceRecognitionEntity != null) {
                        String token = voiceRecognitionEntity.getToken();
                        int expire = voiceRecognitionEntity.getExpire();
                        SearchVoiceSp.getInstance(UKidsApplication.getInstance()).setSearchVoiceToken(token);
                        SearchVoiceSp.getInstance(UKidsApplication.getInstance()).setSearchVoiceTime(expire);
                        Log.i(TAG, "voiceInitToken onNext " + token);
                        voiceInit(token);
                    }

                }

                @Override
                public void onError(Throwable e) {
                    super.onError(e);
                    mInit = false;
                    if (mAutomaticSpeechRecognitionListener != null)
                        mAutomaticSpeechRecognitionListener.initError();
                    Log.i(TAG, "voiceInitToken onError " + e.toString());
                }

                @Override
                public void onComplete() {
                    super.onComplete();
                }
            });

        } else {
            String searchVoiceToken = SearchVoiceSp.getInstance(UKidsApplication.getInstance()).getSearchVoiceToken();
            voiceInit(searchVoiceToken);
        }

    }

    public boolean isInitVoice() {
        return mInit;

    }

    /**
     * @param
     * @date 8/17/22 2:46 PM
     * @description 初始化
     **/
    private void voiceInit(String token) {
        //获取工作路径
        String asset_path = CommonUtils.getModelPath(mContext);
        Log.i(TAG, "use workspace " + asset_path);
        String debug_path = mContext.getExternalCacheDir().getAbsolutePath() + "/debug_" + System.currentTimeMillis();
        FileUtils.createDir(debug_path);

        //这里主动调用完成SDK配置文件的拷贝
        if (CommonUtils.copyAssetsData(mContext)) {
            Log.i(TAG, "copy assets data done");
        } else {
            Log.i(TAG, "copy assets failed");
            return;
        }

        //初始化SDK,注意用户需要在Auth.getAliYunTicket中填入相关ID信息才可以使用。
        int ret = nui_instance.initialize(this, genInitParams(asset_path, debug_path, token), Constants.LogLevel.LOG_LEVEL_VERBOSE, true);
        Log.i(TAG, "是否初始化成功" + ret);
        //设置相关识别参数,具体参考API文档
        nui_instance.setParams(genParams());

        if (ret == Constants.NuiResultCode.SUCCESS) {
            mInit = true;
            if (mAutomaticSpeechRecognitionListener != null)
                mAutomaticSpeechRecognitionListener.initSucceed();
        } else {
            mInit = false;
            if (mAutomaticSpeechRecognitionListener != null)
                mAutomaticSpeechRecognitionListener.initError();
        }
    }

    /**
     * @param
     * @description 初始化录音功能
     **/
    public void initAudioRecorder() {
        if (mAudioRecorder == null) {
            //录音初始化,录音参数中格式只支持16bit/单通道,采样率支持8K/16K
            mAudioRecorder = new AudioRecord(MediaRecorder.AudioSource.DEFAULT, SAMPLE_RATE,
                    AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT, WAVE_FRAM_SIZE * 4);
        }
    }

    /**
     * @date 8/17/22 2:46 PM
     * @description 开始识别
     **/
    public void startDialog() {
        if (nui_instance != null && mInit) {
            int ret = nui_instance.startDialog(Constants.VadMode.TYPE_P2T,
                    genDialogParams());
            Log.i(TAG, "start done with " + ret);
        } else {
            ToastUtil.showShortToast(mContext, "初始化失败");
        }
    }

    /**
     * @param
     * @description 停止识别
     **/
    public void stopDialog() {
        if (nui_instance != null && mInit) {
            long ret = nui_instance.stopDialog();
            Log.i(TAG, "cancel dialog " + ret + " end");
        } else {
            ToastUtil.showShortToast(mContext, "初始化失败");
        }
    }

    /**
     * @date 8/17/22 2:46 PM
     **/
    public void cancelDialog() {
        if (nui_instance != null && mInit) {
            long ret = nui_instance.cancelDialog();
            Log.i(TAG, "cancel dialog " + ret + " end");
        } else {
            ToastUtil.showShortToast(mContext, "初始化失败");
        }
    }


    /**
     * @description 释放
     **/
    public void releaseVoice() {
        if (nui_instance != null) {
            nui_instance.release();
        }
        if (mAudioRecorder != null) {
            mAudioRecorder.release();
            mAudioRecorder = null;
        }

    }


    private String genDialogParams() {
        String params = "";
        try {
            JSONObject dialog_param = new JSONObject();
            params = dialog_param.toString();
        } catch (JSONException e) {
            e.printStackTrace();
        }

        Log.i(TAG, "dialog params: " + params);
        return params;
    }


    private String genParams() {
        String params = "";
        try {
            JSONObject nls_config = new JSONObject();
            nls_config.put("enable_intermediate_result", true);
            //            参数可根据实际业务进行配置
            //nls_config.put("enable_punctuation_prediction", true);
            //nls_config.put("enable_inverse_text_normalization", true);
            nls_config.put("enable_voice_detection", true);
            nls_config.put("max_start_silence", 2000);
            nls_config.put("max_end_silence", 500);
            //nls_config.put("customization_id", "test_id");
            //nls_config.put("vocabulary_id", "test_id");
            //nls_config.put("sample_rate", 16000);
            //nls_config.put("sr_format", "opus");
            JSONObject parameters = new JSONObject();

            parameters.put("nls_config", nls_config);
            //需要请求的语音服务类型,一句话识别为“0”。
            parameters.put("service_type", Constants.kServiceTypeASR);
            // 如果有HttpDns则可进行设置
            // parameters.put("direct_ip", FileUtils.getDirectIp());
            params = parameters.toString();
            Log.i(TAG, "genParams: " + params);
        } catch (JSONException e) {
            e.printStackTrace();
        }
        return params;
    }

    private String genInitParams(String workPath, String debugPath, String token) {
        String str = "";
        try {

            JSONObject object = new JSONObject();
            object.put("app_key", "QhjdgLiaAFM3j8qY");
            object.put("token", token);
            object.put("url", "wss://nls-gateway.aliyuncs.com/ws/v1");
            object.put("device_id", FileUtils.getDeviceId());
            object.put("workspace", workPath);
            object.put("debug_path", debugPath);
            object.put("sample_rate", "16000");
            object.put("format", "opus");
            //            object.put("save_wav", "true");
            str = object.toString();
        } catch (JSONException e) {
            e.printStackTrace();
        }

        Log.i(TAG, "InsideUserContext:" + str);
        return str;
    }

    //结果回掉
    @Override
    public void onNuiEventCallback(Constants.NuiEvent nuiEvent, int i, int i1, KwsResult kwsResult, AsrResult asrResult) {
        Log.i(TAG, "event=" + nuiEvent);
        if (nuiEvent == Constants.NuiEvent.EVENT_ASR_RESULT) {//最终识别结果
            String json = asrResult.asrResult;
            Log.i(TAG, "event=end" + json);
            if (mAutomaticSpeechRecognitionListener != null) {
                mAutomaticSpeechRecognitionListener.accomplish(getResult(json));
            }
        } else if (nuiEvent == Constants.NuiEvent.EVENT_ASR_PARTIAL_RESULT) {//识别中结果
            String json = asrResult.asrResult;
            Log.i(TAG, "event=start" + json);
            if (mAutomaticSpeechRecognitionListener != null) {
                mAutomaticSpeechRecognitionListener.inProgress(getResult(json));
            }
        } else if (nuiEvent == Constants.NuiEvent.EVENT_ASR_ERROR) {//根据错误码信息判断出错原因
            HashMap<String, String> map = new HashMap<>();
            map.put("code", String.valueOf(i));
            UMengUtils.onEvent(UKidsApplication.getInstance(), "U23_voice_sdkerror", map);
            if (mAutomaticSpeechRecognitionListener != null) {
                mAutomaticSpeechRecognitionListener.accomplish("");
            }
        } else if (nuiEvent == Constants.NuiEvent.EVENT_DIALOG_EX) {
            UMengUtils.onEvent(UKidsApplication.getInstance(), "U23_voice_fault");
            if (mAutomaticSpeechRecognitionListener != null) {
                mAutomaticSpeechRecognitionListener.accomplish("");
            }
        }

    }

    private String getResult(String json) {
        AutomaticSpeechRecognitionEntity automaticSpeechRecognitionEntity = GsonUtils.fromJson(json, AutomaticSpeechRecognitionEntity.class);
        AutomaticSpeechRecognitionEntity.PayloadBean payload = automaticSpeechRecognitionEntity.getPayload();
        String result = payload.getResult();
        return result;
    }

    @Override
    public int onNuiNeedAudioData(byte[] bytes, int i) {
        int ret = 0;
        if (mAudioRecorder.getState() != AudioRecord.STATE_INITIALIZED) {
            Log.e(TAG, "audio recorder not init");
            return -1;
        }
        ret = mAudioRecorder.read(bytes, 0, i);
        return ret;
    }

    @Override
    public void onNuiAudioStateChanged(Constants.AudioState audioState) {
        Log.i(TAG, "onNuiAudioStateChanged");
        if (mAudioRecorder != null)
            if (audioState == Constants.AudioState.STATE_OPEN) {
                Log.i(TAG, "audio recorder start");
                mAudioRecorder.startRecording();
            } else if (audioState == Constants.AudioState.STATE_CLOSE) {
                Log.i(TAG, "audio recorder close");
                mAudioRecorder.release();
            } else if (audioState == Constants.AudioState.STATE_PAUSE) {
                Log.i(TAG, "audio recorder pause");
                mAudioRecorder.stop();
            }
    }

    //语音值回调
    @Override
    public void onNuiAudioRMSChanged(float v) {

    }

    @Override
    public void onNuiVprEventCallback(Constants.NuiVprEvent nuiVprEvent) {

    }


}

基本就是这样

参数说明

语音服务的地址根据自己需求配置
在这里插入图片描述
交互图

在这里插入图片描述
在这里插入图片描述
在这里插入图片描述

在这里插入图片描述

基本就这些

错误码详情查看官网文档

传送门:https://help.aliyun.com/document_detail/173298.html

  • 2
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 2
    评论
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值