调用火山云的语音生成TTS和语音识别STT

首先需要去火山云的控制台开通TTS和STT服务语音技术 (volcengine.com)

火山这里都提供了免费的额度可以使用

我这里是使用了java来调用API

目前我还了解到阿里的开源项目SenseVoice(STT)和CosyVoice(TTS)非常的不错,但是都是使用Python开发的。可以做到说话情绪的识别,感兴趣可以去github上了解一下。


TTS(首先需要导入它给的类)

package com.erroright.backend_server_java.pojo.util;

import java.util.UUID;

public class TtsRequest {
    public static final String APP_ID = "控制台的APPID";
    public static final String CLUSTER = "";

    public static final String Token = "";
    public static final String VoiceType = "BV001_streaming";//生成声音的选择(如果生成语音报错,就是你没开通这个音色的权限)
    public static final String Emotion = "angry";//语气




    public TtsRequest() {
    }

    public TtsRequest(String text) {
        this.request.text = text;
    }

    private App app = new App();
    private User user = new User();
    private Audio audio = new Audio();
    private Request request = new Request();

    public App getApp() {
        return app;
    }

    public void setApp(App app) {
        this.app = app;
    }

    public User getUser() {
        return user;
    }

    public void setUser(User user) {
        this.user = user;
    }

    public Audio getAudio() {
        return audio;
    }

    public void setAudio(Audio audio) {
        this.audio = audio;
    }

    public Request getRequest() {
        return request;
    }

    public void setRequest(Request request) {
        this.request = request;
    }

    public class App {
        private String appid = APP_ID;
        private String token = Token; // 目前未生效,填写默认值:access_token
        private String cluster = CLUSTER;

        public String getAppid() {
            return appid;
        }

        public void setAppid(String appid) {
            this.appid = appid;
        }

        public String getToken() {
            return token;
        }

        public void setToken(String token) {
            this.token = token;
        }

        public String getCluster() {
            return cluster;
        }

        public void setCluster(String cluster) {
            this.cluster = cluster;
        }
    }

    public class User {
        private String uid = "388808087185088"; // 目前未生效,填写一个默认值就可以

        public String getUid() {
            return uid;
        }

        public void setUid(String uid) {
            this.uid = uid;
        }
    }

    public class Audio {
        private String voice_type = VoiceType;
        private String encoding = "wav";
        private float speed_ratio = 1.0F;
        private float volume_ratio = 10;
        private float pitch_ratio = 10;
        private String emotion = Emotion;

        public String getVoice_type() {
            return voice_type;
        }

        public void setVoice_type(String voice_type) {
            this.voice_type = voice_type;
        }

        public String getEncoding() {
            return encoding;
        }

        public void setEncoding(String encoding) {
            this.encoding = encoding;
        }

        public float getSpeedRatio() {
            return speed_ratio;
        }

        public void setSpeedRatio(int speed_ratio) {
            this.speed_ratio = speed_ratio;
        }

        public float getVolumeRatio() {
            return volume_ratio;
        }

        public void setVolumeRatio(int volume_ratio) {
            this.volume_ratio = volume_ratio;
        }

        public float getPitchRatio() {
            return pitch_ratio;
        }

        public void setPitchRatio(int pitch_ratio) {
            this.pitch_ratio = pitch_ratio;
        }

        public String getEmotion() {
            return emotion;
        }

        public void setEmotion(int emotion) {
            this.emotion = String.valueOf(emotion);
        }
    }

    public class Request {
        private String reqid = UUID.randomUUID().toString();
        private String text;
        private String text_type = "plain";
        private String operation = "query";

        public String getReqid() {
            return reqid;
        }

        public void setReqid(String reqid) {
            this.reqid = reqid;
        }

        public String getText() {
            return text;
        }

        public void setText(String text) {
            this.text = text;
        }

        public String getText_type() {
            return text_type;
        }

        public void setText_type(String text_type) {
            this.text_type = text_type;
        }

        public String getOperation() {
            return operation;
        }

        public void setOperation(String operation) {
            this.operation = operation;
        }
    }
}

调用代码

package com.erroright.backend_server_java.util;

import com.alibaba.fastjson.JSON;
import com.erroright.backend_server_java.pojo.util.TtsRequest;
import lombok.extern.slf4j.Slf4j;
import okhttp3.*;
import org.springframework.stereotype.Component;


import java.io.FileOutputStream;
import java.io.IOException;
import java.util.Base64;

@Component
@Slf4j
public class TtsHttpClient {


    public static final String API_URL = "https://openspeech.bytedance.com/api/v1/tts";
    public static final String ACCESS_TOKEN = "填入火山云开通项目的Token";


    public static byte[] getTts(String content) throws IOException {
        log.info("TTS生成:"+content);
        TtsRequest ttsRequest = new TtsRequest(content);
        String json= JSON.toJSONString(ttsRequest);
        OkHttpClient client = new OkHttpClient();
        RequestBody body = RequestBody.create(json, MediaType.get("application/json; charset=utf-8"));
        Request request = new Request.Builder()
                .url(API_URL)
                .post(body)
                .header("Authorization", "Bearer; " + ACCESS_TOKEN)
                .build();
        try (Response response = client.newCall(request).execute()) {
            String TtsRresponse=response.body().string();
            // 提取 "data" 字段的值
            String data = TtsRresponse.split("\"data\":\"")[1].split("\"")[0];
               //保存生成的文件
            try (FileOutputStream fos = new FileOutputStream("output.wav")) {
                fos.write(Base64.getDecoder().decode(data));
            }
            // 解码 Base64 数据
            return Base64.getDecoder().decode(data);
        }
    }

}

STT(导入类,在官方文档中是三个类,为了在springBoot中封装,分开了一个)
 

package com.erroright.backend_server_java.pojo.util;

import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.java_websocket.client.WebSocketClient;
import org.java_websocket.handshake.ServerHandshake;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.crypto.Mac;
import javax.crypto.spec.SecretKeySpec;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.security.InvalidKeyException;
import java.security.NoSuchAlgorithmException;
import java.util.Base64;
import java.util.UUID;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;

class AsrParams {
    private App app;
    private User user;
    private Request request;
    private Audio audio;

    public AsrParams(App app, User user, Request request, Audio audio) {
        this.app = app;
        this.user = user;
        this.request = request;
        this.audio = audio;
    }

    public App getApp() {
        return app;
    }

    public void setApp(App app) {
        this.app = app;
    }

    public User getUser() {
        return user;
    }

    public void setUser(User user) {
        this.user = user;
    }

    public Request getRequest() {
        return request;
    }

    public void setRequest(Request request) {
        this.request = request;
    }

    public Audio getAudio() {
        return audio;
    }

    public void setAudio(Audio audio) {
        this.audio = audio;
    }

    public static class App {
        private String appid;
        private String cluster;
        private String token;

        public App(String appid, String cluster, String token) {
            this.appid = appid;
            this.cluster = cluster;
            this.token = token;
        }

        public String getAppid() {
            return appid;
        }

        public void setAppid(String appid) {
            this.appid = appid;
        }

        public String getCluster() {
            return cluster;
        }

        public void setCluster(String cluster) {
            this.cluster = cluster;
        }

        public String getToken() {
            return token;
        }

        public void setToken(String token) {
            this.token = token;
        }
    }

    public static class User {
        private String uid;

        public User(String uid) {
            this.uid = uid;
        }

        public String getUid() {
            return uid;
        }

        public void setUid(String uid) {
            this.uid = uid;
        }
    }

    public static class Request {
        private String reqid;
        private String workflow;
        private int nbest;
        private boolean show_utterances;
        private String result_type;
        private int sequence;

        public Request(String reqid, String workflow, int nbest, boolean show_utterances, String result_type, int sequence) {
            this.reqid = reqid;
            this.workflow = workflow;
            this.nbest = nbest;
            this.show_utterances = show_utterances;
            this.result_type = result_type;
            this.sequence = sequence;
        }

        public String getReqid() {
            return reqid;
        }

        public void setReqid(String reqid) {
            this.reqid = reqid;
        }

        public String getWorkflow() {
            return workflow;
        }

        public void setWorkflow(String workflow) {
            this.workflow = workflow;
        }

        public int getNbest() {
            return nbest;
        }

        public void setNbest(int nbest) {
            this.nbest = nbest;
        }

        public boolean isShow_utterances() {
            return show_utterances;
        }

        public void setShow_utterances(boolean show_utterances) {
            this.show_utterances = show_utterances;
        }

        public String getResult_type() {
            return result_type;
        }

        public void setResult_type(String result_type) {
            this.result_type = result_type;
        }

        public int getSequence() {
            return sequence;
        }

        public void setSequence(int sequence) {
            this.sequence = sequence;
        }
    }

    public static class Audio {
        private String format;
        private String codec;
        private int rate;
        private int bits;
        private int channels;

        public Audio(String format, String codec, int rate, int bits, int channels) {
            this.format = format;
            this.codec = codec;
            this.rate = rate;
            this.bits = bits;
            this.channels = channels;
        }

        public String getFormat() {
            return format;
        }

        public void setFormat(String format) {
            this.format = format;
        }

        public String getCodec() {
            return codec;
        }

        public void setCodec(String codec) {
            this.codec = codec;
        }

        public int getRate() {
            return rate;
        }

        public void setRate(int rate) {
            this.rate = rate;
        }

        public int getBits() {
            return bits;
        }

        public void setBits(int bits) {
            this.bits = bits;
        }

        public int getChannels() {
            return channels;
        }

        public void setChannels(int channels) {
            this.channels = channels;
        }
    }
}

public class AsrClient extends WebSocketClient {
    private static final String URL = "wss://openspeech.bytedance.com/api/v2/asr";
    private static final Logger logger = LoggerFactory.getLogger(WebSocketClient.class);
    private String appid;
    private String token;
    private String sk;
    private String cluster;
    private String workflow = "audio_in,resample,partition,vad,fe,decode,nlu_punctuate";
    private String uid = "usesr_id";
    private int nhest = 1;
    private boolean show_utterances = true;
    private String result_type = "full";
    private String format = "wav";
    private String codec = "raw";
    private int sample_rate = 16000;
    private int channels = 1;
    private int bits = 16;
    private AuthType authType = AuthType.TOKEN;
    private byte[] params_msg = null;
    private AsrResponse asr_response;
    private CountDownLatch recv_latch = null;
    private int recv_timeout = 5;
    private boolean recv_suc = true;

    public static AsrClient build() throws URISyntaxException {
        URI uri = new URI(URL);
        return new AsrClient(uri);
    }

    // TODO 接受一个 listener 监听消息, onOpen, onMessage, onError, onComplete
    private AsrClient(URI uri) {
        super(uri);
    }

    public  static class ProtocolVersion {
        static public int PROTOCOL_VERSION = 0b0001;
    }

    public static class MessageType {
        static public int FULL_CLIENT_REQUEST = 0b0001;
        static public int AUDIO_ONLY_CLIENT_REQUEST = 0b0010;
        static public int FULL_SERVER_RESPONSE = 0b1001;
        static public int SERVER_ACK = 0b1011;
        static public int ERROR_MESSAGE_FROM_SERVER = 0b1111;
    }

    public static class MessageTypeFlag {
        static public int NO_SEQUENCE_NUMBER = 0b0000;
        static public int POSITIVE_SEQUENCE_CLIENT_ASSGIN = 0b0001;
        static public int NEGATIVE_SEQUENCE_SERVER_ASSGIN = 0b0010;
        static public int NEGATIVE_SEQUENCE_CLIENT_ASSGIN = 0b0011;
    }

    public static class MessageSerial {
         public int NO_SERIAL = 0b0000;
         public static int JSON = 0b0001;
         public int CUSTOM_SERIAL = 0b1111;
    }

    public  static class MessageCompress {
         public int NO_COMPRESS = 0b0000;
         public static int GZIP = 0b0001;
         public int CUSTOM_COMPRESS = 0b1111;
    }

    public enum AuthType {
        TOKEN,
        SIGNATURE;
    }

    @Override
    public void onOpen(ServerHandshake serverHandshake) {
        logger.info("asr client onOpen");
    }

    @Override
    public void onMessage(String s) {
        logger.info("onMessage String, should be onMessage(ByteBuffer) called");
//        try {
//            if (parse_response(s) != 0) {
//                logger.error("error happends to close connection");
//                close();
//            }
//        } catch (IOException e) {
//            e.printStackTrace();
//        }
    }

    @Override
    public void onMessage(ByteBuffer bytes) {
        try {
            if (parse_response(bytes) != 0) {
                recv_suc = false;
                logger.error("error happends to close connection");
                close();
            }
            recv_latch.countDown();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    @Override
    public void onClose(int i, String s, boolean b) {
        logger.info("asr onClose {}, {}, {}", i, s, b);
    }

    @Override
    public void onError(Exception e) {
        logger.info("asr onError {}", e.getMessage());
        recv_suc = false;
        recv_latch.countDown();
        this.close();
    }

//    public int asr_connect() throws IOException, NoSuchAlgorithmException, InvalidKeyException {
//        this.params_msg = construct_param();
//        set_auth_header();
//        this.connect();
//        return 0;
//    }

    public boolean asr_sync_connect() throws IOException, InterruptedException, NoSuchAlgorithmException, InvalidKeyException {
        this.params_msg = construct_param();
        set_auth_header();
        boolean ret = this.connectBlocking();
        if (!ret) {
            return ret;
        }
        recv_latch = new CountDownLatch(1);
        this.send(this.params_msg);
        ret = recv_latch.await(recv_timeout, TimeUnit.SECONDS);
        return ret && recv_suc;
    }

    public AsrResponse asr_send(byte[] audio, boolean is_last) throws IOException, InterruptedException {
        recv_latch = new CountDownLatch(1);
        byte[] payload = construct_audio_payload(audio, is_last);
        this.send(payload);
        boolean ret = recv_latch.await(recv_timeout, TimeUnit.SECONDS);
        if (!ret) {
            logger.error("recv message timeout");
            this.close();
            return new AsrResponse();
        }
        return asr_response;
    }

    public int asr_close() {
        this.close();
        return 0;
    }

    private void set_auth_header() throws NoSuchAlgorithmException, InvalidKeyException {
        if (authType == AuthType.TOKEN) {
            this.addHeader("Authorization", "Bearer; " + token);
            return;
        }

        String custom_header = "Custom";
        String custom_cont = "auth_custom";
        this.addHeader(custom_header, custom_cont);

        String str = "GET " + getURI().getPath() + " HTTP/1.1\n"
                + custom_cont + "\n";
        byte[] str_byte = str.getBytes(StandardCharsets.UTF_8);
        byte[] data = concat_byte(str_byte, this.params_msg);

        byte[] sk_byte = this.sk.getBytes(StandardCharsets.UTF_8);
        String HMAC_SHA256 = "HmacSHA256";
        Mac sha256Hmac = Mac.getInstance(HMAC_SHA256);
        SecretKeySpec keySpec = new SecretKeySpec(sk_byte, HMAC_SHA256);
        sha256Hmac.init(keySpec);
        byte[] mac_data = sha256Hmac.doFinal(data);

        String base64_data = Base64.getUrlEncoder().encodeToString(mac_data);
        String auth_cont = "HMAC256; access_token=\"" + this.token
                + "\"; mac=\"" + base64_data
                + "\"; h=\"" + custom_header + "\"";
        this.addHeader("Authorization", auth_cont);
    }

    private byte[] gzip_compress(byte[] content) throws IOException {
        ByteArrayOutputStream out = new ByteArrayOutputStream(content.length);
        GZIPOutputStream gzip = new GZIPOutputStream(out);
        gzip.write(content);
        gzip.close();
        byte[] result = out.toByteArray();
        out.close();
        return result;
    }

    private byte[] gzip_decompress(byte[] content) throws IOException {
        ByteArrayInputStream in = new ByteArrayInputStream(content);
        GZIPInputStream gzip = new GZIPInputStream(in);
        ByteArrayOutputStream out = new ByteArrayOutputStream();
        byte[] buff = new byte[1024];
        int len = 0;
        while ((len = gzip.read(buff, 0, buff.length)) > 0) {
            out.write(buff, 0, len);
        }
        byte[] result = out.toByteArray();
        in.close();
        gzip.close();
        out.close();
        return result;
    }

    private byte[] construct_param() throws IOException {
        int header_len = 4;
        byte[] header = new byte[header_len];
        header[0] = (byte) (ProtocolVersion.PROTOCOL_VERSION << 4 | (header_len >> 2));
        header[1] = (byte) (MessageType.FULL_CLIENT_REQUEST << 4 | MessageTypeFlag.NO_SEQUENCE_NUMBER);
         header[2] = (byte) (MessageSerial.JSON << 4 | MessageCompress.GZIP);
        header[3] = 0;

        String reqid = UUID.randomUUID().toString();
        AsrParams.App app = new AsrParams.App(appid, cluster, token);
        AsrParams.User user = new AsrParams.User(uid);
        AsrParams.Request request = new AsrParams.Request(reqid, workflow, 1, show_utterances, result_type, 1);
        AsrParams.Audio audio = new AsrParams.Audio(format, codec, sample_rate, bits, channels);
        AsrParams asr_params = new AsrParams(app, user, request, audio);
        ObjectMapper mapper = new ObjectMapper();
//        String params_json = mapper.writeValueAsString(asr_params);
        byte[] payload = mapper.writeValueAsBytes(asr_params);
        logger.info("params_json {}", new String(payload));
        payload = gzip_compress(payload);

        // java big-endian default
        int payload_len = payload.length;
        ByteBuffer bb = ByteBuffer.allocate(4);
        //b.order(ByteOrder.BIG_ENDIAN); // optional, the initial order of a byte buffer is always BIG_ENDIAN.
        bb.putInt(payload_len);
        byte[] pl_byte = bb.array();

        return concat_byte(header, pl_byte, payload);
    }

    private int parse_response(ByteBuffer msg) throws IOException {
        byte[] msg_byte = msg.array();
        int header_len = (msg_byte[0] & 0x0f) << 2;
        int message_type = (msg_byte[1] & 0xf0) >> 4;
        int message_type_flag = msg_byte[1] & 0x0f;
        int message_serial = (msg_byte[2] & 0xf0) >> 4;
        int message_compress = msg_byte[2] & 0x0f;
        byte[] payload = null;
        int payload_len = 0;
        int payload_offset = header_len;

        if (message_type == MessageType.FULL_SERVER_RESPONSE) {
            ByteBuffer bb = ByteBuffer.wrap(msg_byte, payload_offset, 4);
            payload_len = bb.getInt();
            payload_offset += 4;
        } else if (message_type == MessageType.SERVER_ACK) {
            ByteBuffer bb = ByteBuffer.wrap(msg_byte, payload_offset, 4);
            int seq = bb.getInt();
            payload_offset += 4;
            if (msg_byte.length > 8) {
                payload_len = ByteBuffer.wrap(msg_byte, payload_offset, 4).getInt();
                payload_offset += 4;
            }
        } else if (message_type == MessageType.ERROR_MESSAGE_FROM_SERVER) {
            int error_code = ByteBuffer.wrap(msg_byte, payload_offset, 4).getInt();
            payload_offset += 4;
            payload_len = ByteBuffer.wrap(msg_byte, payload_offset, 4).getInt();
            payload_offset += 4;
        } else {
            logger.error("unsupported message type {}", message_type);
            return -1;
        }

        payload = new byte[msg_byte.length - payload_offset];
        System.arraycopy(msg_byte, payload_offset, payload, 0, payload.length);
        if (message_compress == MessageCompress.GZIP) {
            payload = gzip_decompress(payload);
        }
        if (message_serial == MessageSerial.JSON) {
            ObjectMapper mapper = new ObjectMapper().disable(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES);
            asr_response = mapper.readValue(payload, 0, payload.length, AsrResponse.class);
        }
        if (asr_response.getCode() != 1000) {
            logger.error("asr resposne {}", new String(payload));
            return -1;
        }
        if (asr_response.getSequence() < 0) {
            logger.debug("get last response");
        }
        // logger.info("asr response {}", new String(payload));

        return 0;
    }

    private byte[] construct_audio_payload(byte[] audio, boolean is_last) throws IOException {
        int header_len = 4;
        byte[] header = new byte[header_len];
        header[0] = (byte) (ProtocolVersion.PROTOCOL_VERSION << 4 | (header_len >> 2));
        if (!is_last) {
            header[1] = (byte) (MessageType.AUDIO_ONLY_CLIENT_REQUEST << 4 | MessageTypeFlag.NO_SEQUENCE_NUMBER);
        } else {
            header[1] = (byte) (MessageType.AUDIO_ONLY_CLIENT_REQUEST << 4 | MessageTypeFlag.NEGATIVE_SEQUENCE_SERVER_ASSGIN);
        }
        header[2] = (byte) (MessageSerial.JSON << 4 | MessageCompress.GZIP);
        header[3] = 0;

        byte[] payload = gzip_compress(audio);
        int payload_len = payload.length;
        ByteBuffer bb = ByteBuffer.allocate(4);
        bb.putInt(payload_len);
        byte[] pl_byte = bb.array();

        return concat_byte(header, pl_byte, payload);
    }

    public void setAppid(String appid) {
        this.appid = appid;
    }

    public void setToken(String token) {
        this.token = token;
    }

    public void setSk(String sk) {
        this.sk = sk;
    }

    public void setCluster(String cluster) {
        this.cluster = cluster;
    }

    public void setWorkflow(String workflow) {
        this.workflow = workflow;
    }

    public void setUid(String uid) {
        this.uid = uid;
    }

    public void setShow_utterances(boolean show_utterances) {
        this.show_utterances = show_utterances;
    }

    public void setResult_type(String result_type) {
        this.result_type = result_type;
    }

    public void setFormat(String format) {
        this.format = format;
    }

    public void setCodec(String codec) {
        this.codec = codec;
    }

    public void setSample_rate(int sample_rate) {
        this.sample_rate = sample_rate;
    }

    public void setChannels(int channels) {
        this.channels = channels;
    }

    public void setBits(int bits) {
        this.bits = bits;
    }

    public AuthType getAuthType() {
        return authType;
    }

    public void setAuthType(AuthType authType) {
        this.authType = authType;
    }

    public AsrResponse getAsrResponse() {
        return asr_response;
    }

    private byte[] concat_byte(byte[] first, byte[] second) {
        byte[] result = new byte[first.length + second.length];
        System.arraycopy(first, 0, result, 0, first.length);
        System.arraycopy(second, 0, result, first.length, second.length);
        return result;
    }

    private byte[] concat_byte(byte[] first, byte[] second, byte[] third) {
        byte[] result = new byte[first.length + second.length + third.length];
        System.arraycopy(first, 0, result, 0, first.length);
        System.arraycopy(second, 0, result, first.length, second.length);
        System.arraycopy(third, 0, result, first.length+second.length, third.length);
        return result;
    }
}
package com.erroright.backend_server_java.pojo.util;

public class AsrResponse {
    private String reqid = "unknow";
    private int code = 0;
    private String message = "";
    private int sequence = 0;
    private Result[] result;
    private Addition addition;

    public String getReqid() {
        return reqid;
    }

    public void setReqid(String reqid) {
        this.reqid = reqid;
    }

    public int getCode() {
        return code;
    }

    public void setCode(int code) {
        this.code = code;
    }

    public String getMessage() {
        return message;
    }

    public void setMessage(String message) {
        this.message = message;
    }

    public int getSequence() {
        return sequence;
    }

    public void setSequence(int sequence) {
        this.sequence = sequence;
    }

    public Result[] getResult() {
        return result;
    }

    public void setResult(Result[] result) {
        this.result = result;
    }

    public Addition getAddition() {
        return addition;
    }

    public void setAddition(Addition addition) {
        this.addition = addition;
    }

    public static class Result {
        private String text;
        private int confidence;
        private String language;
        private Utterances[] utterances;
        private float global_confidence;

        public String getText() {
            return text;
        }

        public void setText(String text) {
            this.text = text;
        }

        public int getConfidence() {
            return confidence;
        }

        public void setConfidence(int confidence) {
            this.confidence = confidence;
        }

        public String getLanguage() {
            return language;
        }

        public void setLanguage(String language) {
            this.language = language;
        }

        public Utterances[] getUtterances() {
            return utterances;
        }

        public void setUtterances(Utterances[] utterances) {
            this.utterances = utterances;
        }

        public float getGlobal_confidence() {
            return global_confidence;
        }

        public void setGlobal_confidence(float global_confidence) {
            this.global_confidence = global_confidence;
        }
    }

    public static class Utterances {
        private String text;
        private int start_time;
        private int end_time;
        private boolean definite;
        private String language;
        private Words[] words;

        public String getText() {
            return text;
        }

        public void setText(String text) {
            this.text = text;
        }

        public int getStart_time() {
            return start_time;
        }

        public void setStart_time(int start_time) {
            this.start_time = start_time;
        }

        public int getEnd_time() {
            return end_time;
        }

        public void setEnd_time(int end_time) {
            this.end_time = end_time;
        }

        public boolean isDefinite() {
            return definite;
        }

        public void setDefinite(boolean definite) {
            this.definite = definite;
        }

        public String getLanguage() {
            return language;
        }

        public void setLanguage(String language) {
            this.language = language;
        }

        public Words[] getWords() {
            return words;
        }

        public void setWords(Words[] words) {
            this.words = words;
        }
    }

    public static class Words {
        private String text;
        private int start_time;
        private int end_time;
        private int blank_duration;

        public String getText() {
            return text;
        }

        public void setText(String text) {
            this.text = text;
        }

        public int getStart_time() {
            return start_time;
        }

        public void setStart_time(int start_time) {
            this.start_time = start_time;
        }

        public int getEnd_time() {
            return end_time;
        }

        public void setEnd_time(int end_time) {
            this.end_time = end_time;
        }

        public int getBlank_duration() {
            return blank_duration;
        }

        public void setBlank_duration(int blank_duration) {
            this.blank_duration = blank_duration;
        }
    }

    public static class Addition {
        private String duration;

        public String getDuration() {
            return duration;
        }

        public void setDuration(String duration) {
            this.duration = duration;
        }
    }
}

调用方式

package com.erroright.backend_server_java.util;

import com.erroright.backend_server_java.pojo.util.AsrClient;
import com.erroright.backend_server_java.pojo.util.AsrResponse;
import com.fasterxml.jackson.core.JsonProcessingException;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Component;


import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.URISyntaxException;
import java.security.InvalidKeyException;
import java.security.NoSuchAlgorithmException;
import java.util.Arrays;

@Slf4j
@Component
public class SttStreamClient {

    String appid = "";  // 项目的 appid
    String token = "";  // 项目的 token
    String cluster = "";  // 请求的集群
    String audio_format = "wav";  // wav 或者 mp3, 根据音频类型设置
    AsrClient asr_client = null;
    SttStreamClient() throws URISyntaxException, IOException, NoSuchAlgorithmException, InvalidKeyException, InterruptedException {
        asr_client = AsrClient.build();
        asr_client.setAppid(appid);
        asr_client.setToken(token);
        asr_client.setCluster(cluster);
        asr_client.setFormat(audio_format);
        asr_client.setShow_utterances(true);
        asr_client.asr_sync_connect();
    }

    public  String STT(  byte[] file ) throws URISyntaxException, JsonProcessingException, FileNotFoundException {
        long startTime = System.currentTimeMillis();
        String STTResult="";
        try {
            // File file = new File(audio_path);
            // FileInputStream fp = new FileInputStream(file);
            byte[] b = new byte[64000];
            int len = 0;
            int count = 0;
            AsrResponse asr_response = new AsrResponse();
            // while ((len = fp.read(b)) > 0) {
            //     count += 1;
            //     asr_response = asr_client.asr_send(Arrays.copyOfRange(b, 0, len), fp.available() == 0);
            // }
            while (len < file.length) {
                int bytesToRead = Math.min(b.length, file.length - len);
                System.arraycopy(file, len, b, 0, bytesToRead);
                len += bytesToRead;

                asr_response = asr_client.asr_send(Arrays.copyOfRange(b, 0, bytesToRead), len == file.length);
                count += 1;
            }
            // get asr text
           // AsrResponse response = asr_client.getAsrResponse();
            for (AsrResponse.Result result: asr_response.getResult()) {
                STTResult+=result.getText();
            }
        } catch (Exception e) {
            System.err.println(e.getMessage());
        } finally {
            if (asr_client != null) {
                asr_client.asr_close();
            }
            long endTime = System.currentTimeMillis();
            log.info("语音识别执行时间: " +( endTime - startTime) / 1000.0);
            return STTResult;
        }
    }
}



 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

weighless1129

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值