WebSocket 与语音识别：实现实时语音转文字，且展示音浪效果

Irene林。

已于 2024-08-27 13:40:14 修改

阅读量2.2k

点赞数 6

分类专栏： websocket 文章标签：语音识别人工智能 vue.js websocket

于 2024-08-23 10:31:20 首次发布

本文链接：https://blog.csdn.net/m0_47396689/article/details/141457894

版权

websocket 专栏收录该内容

1 篇文章

订阅专栏

实现功能：

1.点击开始说话按钮，调用录音功能

2.在说话过程中通过websocket实时发送语音数据，并将数据处理成二进制流传输

3.在说话过程中展示相应音浪效果

4.点击关闭按钮，停止说话，并获取识别的文字内容

效果展示：

1.websocket传输数据格式

2.语音识别的文字内容

3.音浪效果展示

整体效果如下：

完整代码如下：

<template>
  <div class="mainContent">
    <button @click="startIntercom">开始对讲</button>
    <button @click="endIntercom">关闭对讲</button>
    <div>
      语音识别的文字为：{{ resText || "--" }}
    </div>
    <canvas ref="waveformCanvas" width="200" height="20"></canvas>
  </div>
</template>

<script setup>
import { ref, onMounted } from "vue";

const resText = ref(""); //语音识别的文字
/*
 * 该websocket请求需要在开始发送录音数据前和结束录音时发送一个is_speaking的状态标识，startData
数据和endData数据根据实际来传递
 */
// 开始录音传值数据
const startData = {
  is_speaking: true,
  mode: "2pass",
  wav_name: "h5",
};

// 结束录音传值数据
const endData = {
  is_speaking: false,
  mode: "2pass",
  wav_name: "h5",
};

const ws = ref(null); //实现WebSocket
const record = ref(null); //多媒体对象，用来处理音频
const waveformCanvas = ref(null); // Canvas 元素引用

function init(rec) {
  record.value = rec;
}
//录音对象
const Recorder = function (stream) {
  const sampleBits = 16; //输出采样数位 8, 16
  const sampleRate = 16000; //输出采样率
  const context = new AudioContext();
  const audioInput = context.createMediaStreamSource(stream);
  const recorder = context.createScriptProcessor(4096, 1, 1);
  const audioData = {
    type: "wav",
    size: 0, //录音文件长度
    buffer: [], //录音缓存
    inputSampleRate: 48000, //输入采样率
    inputSampleBits: 16, //输入采样数位 8, 16
    outputSampleRate: sampleRate, //输出采样数位
    oututSampleBits: sampleBits, //输出采样率
    clear: function () {
      this.buffer = [];
      this.size = 0;
    },
    input: function (data) {
      this.buffer.push(new Float32Array(data));
      this.size += data.length;
    },
    compress: function () {
      //合并压缩
      const data = new Float32Array(this.size);
      let offset = 0;
      for (let i = 0; i < this.buffer.length; i++) {
        data.set(this.buffer[i], offset);
        offset += this.buffer[i].length;
      }
      //压缩
      const compression = parseInt(
        this.inputSampleRate / this.outputSampleRate
      );
      const length = data.length / compression;
      const result = new Float32Array(length);
      let index = 0,
        j = 0;
      while (index < length) {
        result[index] = data[j];
        j += compression;
        index++;
      }
      return result;
    },
    encodePCM: function () {
      const sampleRate = Math.min(this.inputSampleRate, this.outputSampleRate);
      const sampleBits = Math.min(this.inputSampleBits, this.oututSampleBits);
      const bytes = this.compress();
      const dataLength = bytes.length * (sampleBits / 8);
      const buffer = new ArrayBuffer(dataLength);
      const data = new DataView(buffer);
      let offset = 0;
      for (let i = 0; i < bytes.length; i++, offset += 2) {
        const s = Math.max(-1, Math.min(1, bytes[i]));
        data.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7fff, true);
      }
      return new Blob([data]);
    },
  };
  let recording = true; // 添加一个标志来控制录音
  const sendData = function () {
    if (!recording) return; // 如果录音已停止，则不发送数据
    //对获取的数据进行处理(分包)
    const reader = new FileReader();
    reader.onload = (e) => {
      const outbuffer = e.target.result;
      const arr = new Int8Array(outbuffer);
      if (arr.length > 0) {
        let tmparr = new Int8Array(1024);
        let j = 0;
        for (let i = 0; i < arr.byteLength; i++) {
          tmparr[j++] = arr[i];
          if ((i + 1) % 1024 == 0) {
            ws.value.send(tmparr);
            if (arr.byteLength - i - 1 >= 1024) {
              tmparr = new Int8Array(1024);
            } else {
              tmparr = new Int8Array(arr.byteLength - i - 1);
            }
            j = 0;
          }
          if (i + 1 == arr.byteLength && (i + 1) % 1024 != 0) {
            ws.value.send(tmparr);
          }
        }
      }
    };
    reader.readAsArrayBuffer(audioData.encodePCM());
    audioData.clear(); //每次发送完成则清理掉旧数据
  };

  this.start = function () {
    audioInput.connect(recorder);
    recorder.connect(context.destination);
  };

  this.stop = function () {
    recorder.disconnect();
    recording = false; // 设置录音标志为 false
  };

  this.getBlob = function () {
    return audioData.encodePCM();
  };

  this.clear = function () {
    audioData.clear();
  };

  recorder.onaudioprocess = function (e) {
    const inputBuffer = e.inputBuffer.getChannelData(0);
    audioData.input(inputBuffer);
    sendData();

    // 更新音浪效果
    updateWaveform(inputBuffer);
  };
};

/*
 * WebSocket
 */
const useWebSocket = () => {
  ws.value = new WebSocket("ws://xxx");    //换为实际的websocket地址
  ws.value.binaryType = "arraybuffer"; //传输的是 ArrayBuffer 类型的数据
  ws.value.onopen = function () {
    ws.value.send(JSON.stringify(startData));
    console.log("握手成功");
    if (ws.value.readyState == 1) {
      //ws进入连接状态，则每隔500毫秒发送一包数据
      record.value.start();
    }
  };

  ws.value.onmessage = function (msg) {
    const res = JSON.parse(msg.data);
    resText.value = res.text;
  };

  ws.value.onerror = function (err) {
    console.info(err);
  };
};

/*
 * 开始对讲
 */
const startIntercom = async () => {
  try {
    const mediaStream = await navigator.mediaDevices.getUserMedia({
      audio: true,
    });
    init(new Recorder(mediaStream));
    console.log("开始对讲");
    useWebSocket();
  } catch (error) {
    console.error("无法打开麦克风", error);
  }
};

/*
 * 关闭对讲
 */
const endIntercom = () => {
  if (ws.value) {
    ws.value.send(JSON.stringify(endData));
    console.log("不关闭websocket连接");
    // ws.value.close();
    record.value.stop();
  }
};

// 更新 Canvas 上的音浪效果
const updateWaveform = (inputBuffer) => {
  const canvas = waveformCanvas.value;
  const ctx = canvas.getContext("2d");
  const width = canvas.width;
  const height = canvas.height;

  // 清除画布
  ctx.clearRect(0, 0, width, height);

  // 设置波形颜色
  ctx.fillStyle = "#106AE8";

  // 绘制波形
  const numBars = 20; // 波形数量可根据实际需求设置
  const barWidth = width / (numBars * 3); // 每个波形柱的宽度
  let barHeight;
  let x = 15;

  // 遍历输入缓冲区并绘制波形
  for (let i = 0; i < numBars; i++) {
    // 计算当前波形柱的高度
    const sampleIndex = Math.floor(i * (inputBuffer.length / numBars));
    barHeight = (Math.abs(inputBuffer[sampleIndex]) * height * 6) / 2; // 使用绝对值并增加系数

    // 绘制波形柱
    ctx.fillRect(x, height / 2 - barHeight, barWidth, barHeight * 2);
    x += barWidth + 4;
  }
};

onMounted(() => {
  // 初始化 Canvas
  const canvas = waveformCanvas.value;
});