科大讯飞语音转文字 WebAPI 接口调用

最新推荐文章于 2025-04-24 10:41:08 发布

原创

最新推荐文章于 2025-04-24 10:41:08 发布

· 1k 阅读

2 ·

版权

文章标签：

#语音识别 #人工智能

amr 音频录音必须也是单声道、音频8000频率。录制和听写要一致。

单声道、音频是8000、将amr文件转换成pcm格式以下是代码

//调用科大讯飞处理类解析语音文件
SpeechRecognizeHandle speechRecognizeHandle = new SpeechRecognizeHandle(“音频压缩包地址”, ”我这里是IM消息id“, ”这里是IM会话id 你们可以不用传“, new CloudCallBack() {
    @Override
    public void onSuccess(Object obj) {
}}

package com.minxing.kit.internal.common.util.speech;

import android.content.Context;
import android.media.MediaFormat;
import android.os.AsyncTask;
import android.os.Bundle;
import android.os.Environment;
import android.text.TextUtils;
import android.util.Log;

import com.arthenica.mobileffmpeg.Config;
import com.arthenica.mobileffmpeg.FFmpeg;
import com.gt.base.utils.KLog;
import com.gt.library_cloud_sdklib.utils.CloudCallBack;
import com.iflytek.cloud.ErrorCode;
import com.iflytek.cloud.InitListener;
import com.iflytek.cloud.RecognizerListener;
import com.iflytek.cloud.RecognizerResult;
import com.iflytek.cloud.SpeechConstant;
import com.iflytek.cloud.SpeechError;
import com.iflytek.cloud.SpeechRecognizer;
import com.iflytek.cloud.SpeechUtility;
import com.minxing.kit.MXKit;
import com.minxing.kit.core.concurrent.ThreadPoolManager;
import com.minxing.kit.internal.common.bean.im.ConversationMessage;
import com.minxing.kit.internal.common.util.AESUtil;
import com.minxing.kit.internal.common.util.Clip;
import com.minxing.kit.internal.common.util.FileUtils;
import com.minxing.kit.internal.common.util.PcmAmrToWavUtil;
import com.minxing.kit.utils.logutils.MXLog;

import org.json.JSONArray;
import org.json.JSONObject;
import org.json.JSONTokener;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;

import okhttp3.Call;
import okhttp3.Callback;
import okhttp3.OkHttpClient;
import okhttp3.Request;
import okhttp3.Response;
import okhttp3.WebSocket;
import okhttp3.WebSocket;

/**
 * 科大讯飞语音识别工具
 */

public abstract class SpeechRecognizeHandle {


    private HashMap<Integer, Integer> callHashMap;
    // 用HashMap存储听写结果
    private HashMap<String, String> mIatResults = new LinkedHashMap<>();
    private SpeechRecognizer mIat;
    // 引擎类型
    private String mEngineType = SpeechConstant.TYPE_CLOUD;
    //解码转换
    private AudioDecode audioDecode;

    public SpeechRecognizeHandle(String filePath, int messageId, int conversationId, CloudCallBack callBack) {


        ThreadPoolManager.getGlobalThreadPool().execute(new Runnable() {
            @Override
            public void run() {
                try {
                    if (callHashMap == null) {
                        callHashMap = new HashMap<>();

                    }
                    int index = filePath.lastIndexOf(File.separator);
                    //得到存储文件名称和路径
                    String pcmFileName = filePath.substring(index, filePath.length()).replace("amr", "pcm");
                    String pcmFilePath = MXKit.getInstance().getKitConfiguration().getAppStoreHome() + "app_voice";
                    String pcmFileAbsolutlyPath = pcmFilePath + pcmFileName;
                    //检测本地是否存在已经解压后地文件
                    File outputFile = new File(pcmFileAbsolutlyPath);
                    boolean fileExist = FileUtils.exists(pcmFileAbsolutlyPath);
                    if (fileExist) {
                        File pcmFile = new File(pcmFileAbsolutlyPath);
                        String authUrl = WebIATWS.getAuthUrl(WebIATWS.hostUrl, WebIATWS.apiKey, WebIATWS.apiSecret);
                        OkHttpClient okHttpClient = new OkHttpClient.Builder().build();
                        String url = authUrl.toString().replace("http://", "ws://").replace("https://", "wss://");
                        Request request = new Request.Builder().url(url).build();

                        if (pcmFile != null) {// call.request().tag()
                            Call newCall = okHttpClient.newCall(request);
                            callHashMap.put(messageId, conversationId);
                            WebIATWS webIATWS = new WebIATWS(pcmFile, messageId, callHashMap, callBack);
                            okHttpClient.newWebSocket(request, webIATWS);
                        } else {
                            com.alibaba.fastjson.JSONObject jsonObject = new com.alibaba.fastjson.JSONObject();
                            jsonObject.put("messageId", messageId);
                            callBack.mxError(jsonObject.toJSONString());

                        }
                        return;
                    }
                    //进行解压转换、
                    new AmrToPcmConversionTask(messageId, conversationId, callBack, filePath, outputFile.getAbsolutePath()).execute();

//              敏行解压文件流方式
//            if (oldWay(filePath, messageId, conversationId,callBack, pcmFileName, pcmFilePath, pcmFileAbsolutlyPath))
//                return;

                } catch (Exception e) {
                    com.alibaba.fastjson.JSONObject jsonObject = new com.alibaba.fastjson.JSONObject();
                    jsonObject.put("messageId", messageId);
                    callBack.mxError(jsonObject.toJSONString());
                }
            }
        });


//        audioDecodeFun(filePath);
//        SpeechUtility.createUtility(context, "appid=" + "98e0c527");
//        voice2words(filePath, context, callBack);
    }

    private class AmrToPcmConversionTask extends AsyncTask<String, Void, File> {
        private int messageId;
        private int conversationId;
        public CloudCallBack callBack;
        private String filePath;
        private String pcmFilePath;

        public AmrToPcmConversionTask(int messageId, int conversationId, CloudCallBack callback, String filePath, String pcmFilePath) {
            this.messageId = messageId;
            this.conversationId = conversationId;
            this.callBack = callback;
            this.filePath = filePath;
            this.pcmFilePath = pcmFilePath;
        }

        @Override
        protected File doInBackground(String... paths) {


//            -i: 输入文件路径，指定需要转换的音频文件的路径。在这个例子中，变量 filePath 存储了输入文件的路径。
//
//            -c:a: 指定音频编解码器，这里设置为 pcm_s16le，表示使用 16 位有符号的线性脉冲编码（PCM）进行编码。
//
//            -ar: 设置音频采样率，这里设置为 8000 Hz。采样率表示每秒钟从音频流中提取的样本数。
//
//            -ac: 设置音频通道数，这里设置为 1，表示单声道。对于语音文件而言，通常使用单声道即可。
//
//            -f: 指定输出文件格式，这里设置为 s16le，表示输出格式为 16 位有符号的线性脉冲编码。在某些情况下，需要明确指定输出格式。
//
//            输出文件路径: 这里表示输出文件的路径，即转换后的音频文件将保存在 pcmFilePath 中。
//
//            -v: 设置 FFmpeg 的日志级别为 debug，这会输出详细的调试信息，有助于调试和排查问题。
            String[] cmd = {"-i", filePath, "-c:a", "pcm_s16le", "-ar", "8000", "-ac", "1", "-f", "s16le", pcmFilePath, "-v", "debug"};

            int rc = FFmpeg.execute(cmd);
            if (rc == Config.RETURN_CODE_SUCCESS) {
                // 转换成功后返回输出的 PCM 文件的 File 对象
                return new File(pcmFilePath);
            }
            return null;

        }

        @Override
        protected void onPostExecute(File pcmFile) {
            super.onPostExecute(pcmFile);
            //转换完成地文件、 这个是我们地消息id  messageId
            try {
                if (pcmFile == null) {

                    com.alibaba.fastjson.JSONObject jsonObject = new com.alibaba.fastjson.JSONObject();
                    jsonObject.put("messageId", messageId);
                    callBack.mxError(jsonObject.toJSONString());
                    // 转换失败的处理
                    String error = Config.getLastCommandOutput();
                    KLog.esLog("SpeechRecognizeHandle", "Error: " + error);
                    return;
                }
                //
                String authUrl = WebIATWS.getAuthUrl(WebIATWS.hostUrl, WebIATWS.apiKey, WebIATWS.apiSecret);
                OkHttpClient okHttpClient = new OkHttpClient.Builder().build();
                //将url中的 schema http://和https://分别替换为ws:// 和 wss://
                String url = authUrl.toString().replace("http://", "ws://").replace("https://", "wss://");
                Request request = new Request.Builder().url(url).build();

                if (pcmFile != null) {
                    Call newCall = okHttpClient.newCall(request);
                    callHashMap.put(messageId, conversationId);
                    WebIATWS webIATWS = new WebIATWS(pcmFile, messageId, callHashMap, callBack);
                    okHttpClient.newWebSocket(request, webIATWS);
                } else {
                    com.alibaba.fastjson.JSONObject jsonObject = new com.alibaba.fastjson.JSONObject();
                    jsonObject.put("messageId", messageId);
                    callBack.mxError(jsonObject.toJSONString());
                }
            } catch (Exception exception) {
                exception.getMessage();
                com.alibaba.fastjson.JSONObject jsonObject = new com.alibaba.fastjson.JSONO

最低0.47元/天解锁文章