websocket实现讯飞实时语音转写

效果图

在这里插入图片描述

前端

  1. 开始语音
    localhost:8085是后台地址和端口
start() {
    const that = this;
    this.$nextTick(() => {
        var ws = new WebSocket('ws://localhost:8085/webSocket/content');
        ws.onopen = function () {
            console.log('WebSocket 连接已经建立。');
            // ws.send('Hello, server!');
        };
        ws.onmessage = function (event) {
            const data = JSON.parse(event.data)
            if (data.msgType === "CONNECT") {// 提示开始
                that.tips = data.msg
            } else if (data.msgType === "INFO") {// 转写的内容
                that.recordForm.content = data.msg
            }
            // 收到服务器消息: {"msg":"测试实施内容","msgType":"INFO"}
            console.log('收到服务器消息:', event.data);
        };
        ws.onerror = function (event) {
            console.error('WebSocket 连接出现错误:', event);
        };
        ws.onclose = function () {
            console.log('WebSocket 连接已经关闭。');
        };
    })

    contentStart().then(() => {
    });
},
  1. 结束语音
end() {
   contentEnd(this.recordForm).then((res) => {
        console.log(res)
        this.recordId = res.data.data.id
        this.showKeyScore = true
        this.keyScore = res.data.data.wordScore
    });
},
  1. 请求地址
export const contentStart = (params) => {
    return request({
        url: '/app/record/content/start',
        method: 'get',
        params
    })
}
export const contentEnd = (data) => {
    return request({
        url: '/app/record/content/end',
        method: 'post',
        data: data
    })
}

后台

引入依赖

<dependency>
    <groupId>org.java-websocket</groupId>
    <artifactId>Java-WebSocket</artifactId>
    <version>1.3.4</version>
</dependency>

controller

/**
 * 开始实时语音转写
 */
@ApiOperation(value = "列表", notes = "全部数据")
@RequestMapping(value = "/content/start", method = RequestMethod.GET)
public Result contentStart() throws Exception {
    recordService.startExam();
    return null;
}

/**
 * 结束实时语音转写
 */
@ApiOperation(value = "列表", notes = "全部数据")
@RequestMapping(value = "/content/end", method = RequestMethod.POST)
public Result contentEnd(@RequestBody Record record) throws Exception {
    RTASRTest.end();
   // 处理业务逻辑
    return Result.success();
}

impl层,异步操作避免请求超时

@Override
@Async
public void startExam() throws Exception {
    RTASRTest.start();
}

讯飞实时语音转写工具类
变量flag开始和结束的标志,true转写,false结束转写
单声道,多次进行转写时要关闭targetDataLine.close()避免占用报错

import cn.hutool.core.util.ObjectUtil;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.feng.modules.app.webSocket.MsgType;
import com.feng.modules.app.webSocket.SocketMsg;
import com.feng.modules.app.webSocket.WebSocketServer;
import org.java_websocket.WebSocket.READYSTATE;
import org.java_websocket.client.WebSocketClient;
import org.java_websocket.drafts.Draft;
import org.java_websocket.handshake.ServerHandshake;

import javax.net.ssl.SSLContext;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;
import javax.sound.sampled.AudioFormat;
import javax.sound.sampled.AudioSystem;
import javax.sound.sampled.DataLine;
import javax.sound.sampled.TargetDataLine;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URI;
import java.net.URLEncoder;
import java.nio.ByteBuffer;
import java.security.cert.CertificateException;
import java.security.cert.X509Certificate;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Objects;
import java.util.concurrent.CountDownLatch;

/**
 * 实时转写调用
 *
 */
public class RTASRTest {

    // appid
    private static final String APPID = "xxxxx";

    // appid对应的secret_key
    private static final String SECRET_KEY = "xxxxxxxxxx";

    // 请求地址
    private static final String HOST = "rtasr.xfyun.cn/v1/ws";

    private static final String BASE_URL = "wss://" + HOST;

    private static final String ORIGIN = "https://" + HOST;

    // 音频文件路径
    private static final String AUDIO_PATH = "./resource/test_1.pcm";

    // 每次发送的数据大小 1280 字节
    private static final int CHUNCKED_SIZE = 1280;

    private static final SimpleDateFormat sdf = new SimpleDateFormat("yyy-MM-dd HH:mm:ss.SSS");

    private static Boolean flag = true;
    //结束调用转写
    public static void end() {
        RTASRTest.flag = false;

    }

    //开始调用转写
    public static void start() throws Exception {
        URI url = new URI(BASE_URL + getHandShakeParams(APPID, SECRET_KEY));
        DraftWithOrigin draft = new DraftWithOrigin(ORIGIN);
        CountDownLatch handshakeSuccess = new CountDownLatch(1);
        CountDownLatch connectClose = new CountDownLatch(1);
        MyWebSocketClient client = new MyWebSocketClient(url, draft, handshakeSuccess, connectClose);

        client.connect();

        while (!client.getReadyState().equals(READYSTATE.OPEN)) {
            System.out.println(getCurrentTimeStr() + "\t连接中");
            WebSocketServer.sendInfo(new SocketMsg("连接中……", MsgType.CONNECT), "content");
            Thread.sleep(1000);
        }

        // 等待握手成功
        handshakeSuccess.await();
        System.out.println(sdf.format(new Date()) + " 开始讲话并发送。。");
        WebSocketServer.sendInfo(new SocketMsg("开始录制,请说话……", MsgType.CONNECT), "content");

        //音频属性
        //采样率 16k
        float rate = 16000.0F;
        //位长16bit
        int sizeInBits = 16;
        //单声道
        int channels = 1;

        AudioFormat audioFormat = new AudioFormat(rate, sizeInBits, channels, true, false);
        DataLine.Info info = new DataLine.Info(TargetDataLine.class, audioFormat);
        TargetDataLine targetDataLine = (TargetDataLine) AudioSystem.getLine(info);
        targetDataLine.open(audioFormat);
        targetDataLine.start();
        final int bufSize = 1280;
        byte[] buffer = new byte[bufSize];
        while ((targetDataLine.read(buffer, 0, bufSize)) > 0 && RTASRTest.flag) {
            //发送麦克风数据流
            send(client, buffer);
        }
        send(client, "{\"end\": true}".getBytes());
        if (!RTASRTest.flag) {
            targetDataLine.close();
            RTASRTest.flag = true;
            System.out.println("发送结束标识");
        }
    }

    // 生成握手参数
    public static String getHandShakeParams(String appId, String secretKey) {
        String ts = System.currentTimeMillis() / 1000 + "";
        String signa = "";
        try {
            signa = EncryptUtil.HmacSHA1Encrypt(EncryptUtil.MD5(appId + ts), secretKey);
            return "?appid=" + appId + "&ts=" + ts + "&signa=" + URLEncoder.encode(signa, "UTF-8") + "&vadMdn=2";
        } catch (Exception e) {
            e.printStackTrace();
        }

        return "";
    }

    public static void send(WebSocketClient client, byte[] bytes) {
        if (client.isClosed()) {
            throw new RuntimeException("client connect closed!");
        }

        client.send(bytes);
    }

    public static String getCurrentTimeStr() {
        return sdf.format(new Date());
    }

    public static class MyWebSocketClient extends WebSocketClient {

        private CountDownLatch handshakeSuccess;
        private CountDownLatch connectClose;

        private String resultMsg;

        public MyWebSocketClient(URI serverUri, Draft protocolDraft, CountDownLatch handshakeSuccess, CountDownLatch connectClose) {
            super(serverUri, protocolDraft);
            this.handshakeSuccess = handshakeSuccess;
            this.connectClose = connectClose;
            if (serverUri.toString().contains("wss")) {
                trustAllHosts(this);
            }
        }

        @Override
        public void onOpen(ServerHandshake handshake) {
            System.out.println(getCurrentTimeStr() + "\t!");
        }

        @Override
        public void onMessage(String msg) {
            JSONObject msgObj = JSON.parseObject(msg);
            String action = msgObj.getString("action");
            if (Objects.equals("started", action)) {
                // 握手成功
                System.out.println(getCurrentTimeStr() + "\t握手成功!sid: " + msgObj.getString("sid"));
                handshakeSuccess.countDown();
            } else if (Objects.equals("result", action)) {

                JSONObject dataObj = msgObj.getJSONObject("data");
                String type = dataObj.getJSONObject("cn").getJSONObject("st").getString("type");

                //处理转写结果的完整度,拼接最终结果的数据       type:0-最终结果;1-中间结果
                if ("0".equals(type) && ObjectUtil.isNotEmpty(getContent(msgObj.getString("data")))) {
                    resultMsg += getContent(msgObj.getString("data"));
                    System.err.println("---------------------resultMsg" + resultMsg);
                    try {
                    	//因为我的最终结果开头总有null没找到原因,所以做了截取…… 
                        WebSocketServer.sendInfo(new SocketMsg(resultMsg.substring(4, resultMsg.length()), MsgType.INFO), "content");
                    } catch (IOException e) {
                        throw new RuntimeException(e);
                    }
                    System.out.println(getCurrentTimeStr() + "\tresult: " + getContent(msgObj.getString("data")));
                }
            } else if (Objects.equals("error", action)) {
                // 连接发生错误
                System.out.println("Error: " + msg);
                System.exit(0);
            }
        }

        @Override
        public void onError(Exception e) {
            System.out.println(getCurrentTimeStr() + "\t连接发生错误:" + e.getMessage() + ", " + new Date());
            e.printStackTrace();
            System.exit(0);
        }

        @Override
        public void onClose(int arg0, String arg1, boolean arg2) {
            System.out.println(getCurrentTimeStr() + "\t链接关闭");
            connectClose.countDown();
        }

        @Override
        public void onMessage(ByteBuffer bytes) {
            try {
                System.out.println(getCurrentTimeStr() + "\t服务端返回:" + new String(bytes.array(), "UTF-8"));
            } catch (UnsupportedEncodingException e) {
                e.printStackTrace();
            }
        }

        public void trustAllHosts(MyWebSocketClient appClient) {
            System.out.println("wss");
            TrustManager[] trustAllCerts = new TrustManager[]{new X509TrustManager() {
                @Override
                public X509Certificate[] getAcceptedIssuers() {
                    return new X509Certificate[]{};
                }

                @Override
                public void checkClientTrusted(X509Certificate[] arg0, String arg1) throws CertificateException {
                    // TODO Auto-generated method stub

                }

                @Override
                public void checkServerTrusted(X509Certificate[] arg0, String arg1) throws CertificateException {
                    // TODO Auto-generated method stub

                }
            }};

            try {
                SSLContext sc = SSLContext.getInstance("TLS");
                sc.init(null, trustAllCerts, new java.security.SecureRandom());
                appClient.setSocket(sc.getSocketFactory().createSocket());
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }

    // 把转写结果解析为句子
    public static String getContent(String message) {
        StringBuffer resultBuilder = new StringBuffer();
        try {
            JSONObject messageObj = JSON.parseObject(message);
            JSONObject cn = messageObj.getJSONObject("cn");
            JSONObject st = cn.getJSONObject("st");
            JSONArray rtArr = st.getJSONArray("rt");
            for (int i = 0; i < rtArr.size(); i++) {
                JSONObject rtArrObj = rtArr.getJSONObject(i);
                JSONArray wsArr = rtArrObj.getJSONArray("ws");
                for (int j = 0; j < wsArr.size(); j++) {
                    JSONObject wsArrObj = wsArr.getJSONObject(j);
                    JSONArray cwArr = wsArrObj.getJSONArray("cw");
                    for (int k = 0; k < cwArr.size(); k++) {
                        JSONObject cwArrObj = cwArr.getJSONObject(k);
                        String wStr = cwArrObj.getString("w");
                        resultBuilder.append(wStr);
                    }
                }
            }
        } catch (Exception e) {
            return message;
        }

        return resultBuilder.toString();
    }
}

websocket相关工具类
WebSocketServer 请求类


import com.alibaba.fastjson.JSONObject;
import com.feng.modules.app.webSocket.MsgType;
import com.feng.modules.app.webSocket.SocketMsg;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Component;

import javax.websocket.*;
import javax.websocket.server.PathParam;
import javax.websocket.server.ServerEndpoint;
import java.io.IOException;
import java.util.Objects;
import java.util.concurrent.CopyOnWriteArraySet;
/**
 * websocket 消息通信
 */
@ServerEndpoint("/webSocket/{sid}")
@Slf4j
@Component
public class WebSocketServer {

	/**
	 * concurrent包的线程安全Set,用来存放每个客户端对应的MyWebSocket对象。
	 */
	private static CopyOnWriteArraySet<WebSocketServer> webSocketSet = new CopyOnWriteArraySet<WebSocketServer>();

	/**
	 * 与某个客户端的连接会话,需要通过它来给客户端发送数据
	 */
	private Session session;

	/**
	 * 接收sid
	 */
	private String sid="";
	/**
	 * 连接建立成功调用的方法
	 * */
	@OnOpen
	public void onOpen(Session session,@PathParam("sid") String sid) {
		System.out.println("连接成功-----sid:"+sid);
		this.session = session;
		//如果存在就先删除一个,防止重复推送消息
		for (WebSocketServer webSocket:webSocketSet) {
			if (webSocket.sid.equals(sid)) {
				webSocketSet.remove(webSocket);
			}
		}
		webSocketSet.add(this);
		this.sid=sid;
	}

	/**
	 * 连接关闭调用的方法
	 */
	@OnClose
	public void onClose() {
		webSocketSet.remove(this);
	}

	/**
	 * 收到客户端消息后调用的方法
	 * @param message 客户端发送过来的消息*/
	@OnMessage
	public void onMessage(String message, Session session) {
		log.info("收到来"+sid+"的信息:"+message);
		//群发消息
		for (WebSocketServer item : webSocketSet) {
			try {
				item.sendMessage(message);
			} catch (IOException e) {
				log.error(e.getMessage(),e);
			}
		}
	}

	@OnError
	public void onError(Session session, Throwable error) {
		log.error("发生错误");
		error.printStackTrace();
	}
	/**
	 * 实现服务器主动推送
	 */
	private void sendMessage(String message) throws IOException {
		System.out.println("---------发送消息------------");
		this.session.getBasicRemote().sendText(message);
	}
	
	/**
	 * 群发自定义消息
	 * */
	public static void sendInfo(SocketMsg socketMsg,@PathParam("sid") String sid) throws IOException {
		String message = JSONObject.toJSONString(socketMsg);
		log.info("推送消息到"+sid+",推送内容:"+message);
		for (WebSocketServer item : webSocketSet) {
			try {
				//这里可以设定只推送给这个sid的,为null则全部推送
				if(sid==null) {
					item.sendMessage(message);
				}else if(item.sid.equals(sid)){
					item.sendMessage(message);
				}
			} catch (IOException ignored) { }
		}
	}

	@Override
	public boolean equals(Object o) {
		if (this == o) {
			return true;
		}
		if (o == null || getClass() != o.getClass()) {
			return false;
		}
		WebSocketServer that = (WebSocketServer) o;
		return Objects.equals(session, that.session) &&
				Objects.equals(sid, that.sid);
	}

	@Override
	public int hashCode() {
		return Objects.hash(session, sid);
	}
}

WebSocketConfig 类

import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.web.socket.server.standard.ServerEndpointExporter;

@Configuration
public class WebSocketConfig {

	@Bean
	public ServerEndpointExporter serverEndpointExporter() {
		return new ServerEndpointExporter();
	}
}

SocketMsg 发送的消息对象

import lombok.Data;

@Data
public class SocketMsg {
	private String msg;
	private MsgType msgType;

	public SocketMsg(String msg, MsgType msgType) {
		this.msg = msg;
		this.msgType = msgType;
	}
}

MsgType 消息类型

public enum MsgType {
	/** 连接 */
	CONNECT,
	/** 关闭 */
	CLOSE,
	/** 信息 */
	INFO,
	/** 错误 */
	ERROR
}
  • 3
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
是一家提供语音识别和自然语言处理技术的公司,它的实时语音转写功能可以将语音实时转化为文本。使用实时语音转写需要以下步骤: 第一步,创建开放平台账号并在平台上申请相应的应用ID和secret key。 第二步,安装Python SDK并导入依赖包,例如:requests、json。 第三步,通过实时语音转写接口发送请求,传递相关参数,从而获得转写结果。 下面是一个基于Python SDK开发的实时语音转写示例代码: ```python # 导入依赖包 import websocket import requests import time import hashlib import base64 import json # API身份验证 APPID = '你的应用ID' API_SECRET = '你的APISecret' API_KEY = '你的APIKey' # 获取时间戳和鉴权参数 def getHeaderParam(): curTime = str(int(time.time())) param = { "engine_type": "sms16k", "aue": "raw", "sample_rate": "16000", "param": "aue=raw,engine_type=sms16k,sample_rate=16000" } paramStr = json.dumps(param) paramBase64 = str(base64.b64encode(paramStr.encode('utf-8')), 'utf-8') checkSum = hashlib.md5((API_KEY + curTime + paramBase64).encode('utf-8')).hexdigest() header = { 'X-CurTime':curTime, 'X-Param':paramBase64, 'X-Appid':APPID, 'X-CheckSum':checkSum } return header # 处理结果 def on_message(ws, message): messageDict = json.loads(message) code = messageDict['code'] if code != 0: print("请重新开始录音") else: result = messageDict['data']['result'] print(result) # 主函数 if __name__ == '__main__': # 通过REST API获取实时语音转写WebSocket地址和token response = requests.post('http://api.xfyun.cn/v1/service/v1/iat', headers=getHeaderParam()) responseDict = json.loads(response.text) if responseDict['code'] != '0': print(responseDict['message']) else: address = responseDict['data']['url'] token = responseDict['data']['token'] # 创建WebSocket连接 ws = websocket.WebSocketApp(address, on_message=on_message, on_error=on_error, on_close=on_close) # 添加鉴权信息 ws.headers = { 'Authorization': token } # 开始录音 ws.send(json.dumps({'common': {'app_id': APPID}})) ws.send(json.dumps({'business': {'language': 'zh_cn','domain': 'iat','accent': 'mandarin','vad_eos': 10000},'data': {'status': 0}})) # 持续录音并发送转写请求 while(True): audio = input("请输入:") # 实现语音输入(如需使用麦克风录音?) if audio == '': break else: ws.send(audio.encode("utf-8")) # 结束录音 ws.send(json.dumps({'business': {'cmd': 'auw', 'aus': '00'}})) ws.close() ``` 注意,该示例代码采用的是模拟语音输入的方式,可以通过实现语音输入并将其转为byte字符串的方式替换。另外,在处理结果的函数内应该添加一些错误处理,以防出现意外情况。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值