ASR检测

1、子组件:<ASRDetection/> 

<template>
  <div>
    <div class="message">{{ ASRMessage }}</div>
    <div class="wave wave0" :class="isASRAnimation ? 'animation' : ''"></div>
    <div class="wave wave1" :class="isASRAnimation ? 'animation' : ''"></div>
    <div class="microphone"></div>
  </div>
</template>

<script>
import { defineComponent, onMounted, ref, onUnmounted } from 'vue'
import VAD from '@/assets/js/vad.js'

export default defineComponent({
  name: 'ASRDetection',
  props: {
    ASRMessage: {
      type: String,
      default: '等待说话'
    },
    videoId: {
      type: String,
      default: ''
    }
  },
  setup (props, { emit }) {
    let localStreamTrack = null
    // 控制vad检测动画
    let isASRAnimation = ref(false)
    let isShow = ref(false)
    // let voiceInfo = {
    //   inputData: new Array(), // 截取的录用数据
    //   size: 0, // 录音文件大小
    // };
    // const voiceRecorder = ref();

    // 获取截取音频
    // const getRecode = () => {
    //   if (0 !== voiceInfo.size) {
    //     // 输出pcm二进制数据
    //     const res = getEncodePCM(voiceInfo.inputData, voiceInfo.size)
    //     voiceInfo.inputData = [];
    //     voiceInfo.size = 0;
    //     return res;
    //   }
    // };
    let timer1 = null
    let timer2 = null
    const initVAD = () => {
      window.AudioContext = window.AudioContext || window.webkitAudioContext
      var audioContext
      // Ask for audio device
      navigator.getUserMedia = navigator.mediaDevices.getUserMedia || navigator.mediaDevices.mozGetUserMedia || navigator.mediaDevices.webkitGetUserMedia
      navigator.getUserMedia({ audio: true }).then(stream => {
        audioContext = new AudioContext()
        localStreamTrack = stream
        var source = audioContext.createMediaStreamSource(stream)
        // console.log()
        // voiceRecorder.value = audioContext.createScriptProcessor(4096, 1, 1);
        // source.connect(voiceRecorder.value);
        // voiceRecorder.value.connect(audioContext.destination);

        // voiceRecorder.value.onaudioprocess = function (
        //   audioProcessingEvent
        // ) {
        //   // getChannelData返回Float32Array类型的pcm数据
        //   let inputBuffer = audioProcessingEvent.inputBuffer;
        //   let data = inputBuffer.getChannelData(0);
        //   voiceInfo.inputData.push(new Float32Array(data));
        //   voiceInfo.size += data.length;
        // };

        var options = {
          source: source,
          voice_start () {
            // isASRAnimation.value = true
            // isShow.value=true
            // emit('voice_start')
            console.log('====voice_start',source)
          },
          voice_stop () {
            console.log('=====voice_stopvoice_stopvoice_stopvoice_stopvoice_stopvoice_stopvoice_stopvoice_stop')
          
            setTimeout(() => {
              isASRAnimation.value = false
              isShow.value = false
              emit('voice_stop')
            }, 1000);


          },
        }
        console.log(options)
        new VAD(options)
      })
    }
    let isIPhone = false
    onMounted(() => {
        initVAD()

      // let u = navigator.userAgent
      // if (u.indexOf('iPhone') > -1) {
      //   isIPhone = true
      // }
      // console.log(isIPhone, 'isIPhone')
      // timer1 = setTimeout(() => {
      //   isASRAnimation.value = true
      //   isShow.value = true
      //   emit('voice_start')
      //   clearTimeout(timer1)
      //   if (isIPhone) {
      //     timer2 = setTimeout(() => {
      //       isASRAnimation.value = false
      //       isShow.value = false
      //       emit('voice_stop')
      //       clearTimeout(timer2)
      //     }, 5000);
      //   }
      // }, 1200);
      // if (!isIPhone) {
      //   initVAD()
      // }
    })
    onUnmounted(() => {
      if (!isIPhone) {
        localStreamTrack.getTracks()[0].stop()
      }
      if (timer1) {
        clearTimeout(timer1)
      }
      if (timer2) {
        clearTimeout(timer2)
      }
    })
    return { isASRAnimation, isShow }
  }
})
</script>

<style lang="scss" scoped>
.message {
  width: 100%;
  position: fixed;
  top: 32%;
  left: 0;
  text-align: center;
  z-index: 3;
  color: #fff;
}
.wave0 {
  width: 220px;
  height: 220px;
  background: rgba(86, 151, 255, 0.16);
  position: fixed;
  bottom: 210px;
  left: 50%;
  margin-left: -110px;
  z-index: 1;
  border-radius: 50%;
}
.wave1 {
  width: 200px;
  height: 200px;
  background: rgba(86, 151, 255, 0.48);
  position: fixed;
  bottom: 220px;
  left: 50%;
  margin-left: -100px;
  z-index: 2;
  border-radius: 50%;
}
.wave0.animation {
  animation: twinkling 1s infinite ease-in-out;
}
.wave1.animation {
  animation: twinkling 1s infinite ease-in-out;
}
.microphone {
  width: 140px;
  height: 140px;
  position: fixed;
  bottom: 250px;
  left: 50%;
  margin-left: -70px;
  z-index: 3;
  background-image: url('../../../assets/microphone.png');
  background-size: cover;
}
// @keyframes scale0 {
//   0% {
//     transform: scale(1);
//   }
//   100% {
//     transform: scale(1.2);
//   }
// }
// @keyframes scale1 {
//   0% {
//     transform: scale(1);
//   }
//   100% {
//     transform: scale(1.1);
//   }
// }
@keyframes twinkling {
  0% {
    opacity: 0.5;
    filter: alpha(opacity=50);
    -webkit-transform: scale(1);
  }

  50% {
    opacity: 1;
    filter: alpha(opacity=100);
    -webkit-transform: scale(1.12);
  }

  100% {
    opacity: 0.5;
    filter: alpha(opacity=50);
    -webkit-transform: scale(1);
  }
}
</style>

2、父组件

  <ASRDetection
    @voice_start="voice_start"
    @voice_stop="voice_stop"
    v-if="isASRDetectState"
  />



    const voice_start = () => {
      console.log("====voice_start", voice_start);
      // 监听vad开始事件
  
    };

    const voice_stop = () => {
      console.log("====voice_stop", voice_stop);
      // 监听vad停止事件

        isASRDetectState.value = false;
        Toast.loading({
          message: "检测中...",
          forbidClick: true,
          duration: 0,
        });
    };

3、vad.js

const VAD = function(options) {
  // Default options
  this.options = {
    fftSize: 512,
    bufferLen: 1024, 
    voice_stop: function() {},
    voice_start: function() {}, 
    smoothingTimeConstant: 0.99, 
    energy_offset: 1e-8, // The initial offset.
    energy_threshold_ratio_pos: 2, // Signal must be twice the offset
    energy_threshold_ratio_neg: 0.5, // Signal must be half the offset
    energy_integration: 1, // Size of integration change compared to the signal per second.
    filter: [
      {f: 200, v:0}, // 0 -> 200 is 0
      {f: 2000, v:1} // 200 -> 2k is 1
    ],
    source: null,
    context: null
  };

  // User options
  for(var option in options) {
    // options.hasOwnProperty(option)
    // Object.prototype.hasOwnProperty.call(options, option)
    if(Object.prototype.hasOwnProperty.call(options, option)) {
      this.options[option] = options[option];
    }
  }

  // Require source
  if(!this.options.source)
    throw new Error("The options must specify a MediaStreamAudioSourceNode.");

  // Set this.options.context
  this.options.context = this.options.source.context;

  // Calculate time relationships
  this.hertzPerBin = this.options.context.sampleRate / this.options.fftSize;
  this.iterationFrequency = this.options.context.sampleRate / this.options.bufferLen;
  this.iterationPeriod = 1 / this.iterationFrequency;

  var DEBUG = true;
  if(DEBUG) console.log(
    'Vad' +
    ' | sampleRate: ' + this.options.context.sampleRate +
    ' | hertzPerBin: ' + this.hertzPerBin +
    ' | iterationFrequency: ' + this.iterationFrequency +
    ' | iterationPeriod: ' + this.iterationPeriod
  );

  this.setFilter = function(shape) {
    this.filter = [];
    for(var i = 0, iLen = this.options.fftSize / 2; i < iLen; i++) {
      this.filter[i] = 0;
      for(var j = 0, jLen = shape.length; j < jLen; j++) {
        if(i * this.hertzPerBin < shape[j].f) {
          this.filter[i] = shape[j].v;
          break; // Exit j loop
        }
      }
    }
  }
  // console.log(this.filter,'this.filter')
  this.setFilter(this.options.filter);

  this.ready = {};
  this.vadState = false; // True when Voice Activity Detected

  // Energy detector props
  this.energy_offset = this.options.energy_offset;
  this.energy_threshold_pos = this.energy_offset * this.options.energy_threshold_ratio_pos;
  this.energy_threshold_neg = this.energy_offset * this.options.energy_threshold_ratio_neg;

  this.voiceTrend = 0;
  this.voiceTrendMax = 10;
  this.voiceTrendMin = -10;
  this.voiceTrendStart = 5;
  this.voiceTrendEnd = -5;

  // Create analyser 
  this.analyser = this.options.context.createAnalyser();
  this.analyser.smoothingTimeConstant = this.options.smoothingTimeConstant; // 0.99;
  this.analyser.fftSize = this.options.fftSize;

  this.floatFrequencyData = new Float32Array(this.analyser.frequencyBinCount);

  // Setup local storage of the Linear FFT data
  this.floatFrequencyDataLinear = new Float32Array(this.floatFrequencyData.length);

  // Connect this.analyser
  this.options.source.connect(this.analyser); 
  // console.log(this.options.bufferLen,'this.options.bufferLen')
  // Create ScriptProcessorNode
  this.scriptProcessorNode = this.options.context.createScriptProcessor(this.options.bufferLen, 1, 1);
  // console.log(this.scriptProcessorNode,'this.scriptProcessorNode1')
  // Connect scriptProcessorNode (Theretically, not required)
  this.scriptProcessorNode.connect(this.options.context.destination);
  // console.log(this.scriptProcessorNode,'this.scriptProcessorNode2')
  // Create callback to update/analyze floatFrequencyData
  var self = this;
  this.scriptProcessorNode.onaudioprocess = function() {
    self.analyser.getFloatFrequencyData(self.floatFrequencyData);
    self.update();
    self.monitor();
  };

  // Connect scriptProcessorNode
  this.options.source.connect(this.scriptProcessorNode);

  // log stuff
  this.logging = false;
  this.log_i = 0;
  this.log_limit = 100;

  this.triggerLog = function(limit) {
    this.logging = true;
    this.log_i = 0;
    this.log_limit = typeof limit === 'number' ? limit : this.log_limit;
  }

  this.log = function(msg) {
    if(this.logging && this.log_i < this.log_limit) {
      this.log_i++;
      console.log(msg);
    } else {
      this.logging = false;
    }
  }

  this.update = function() {
    // Update the local version of the Linear FFT
    var fft = this.floatFrequencyData;
    for(var i = 0, iLen = fft.length; i < iLen; i++) {
      this.floatFrequencyDataLinear[i] = Math.pow(10, fft[i] / 10);
    }
    this.ready = {};
  }

  this.getEnergy = function() {
    if(this.ready.energy) {
      return this.energy;
    }

    var energy = 0;
    var fft = this.floatFrequencyDataLinear;
    // console.log(fft,'fftfftfftfftfftfftfftfftfftfftfftfftfft')
    // console.log(this.filter,'    this.filterthis.filterthis.filterthis.filterthis.filterthis.filterthis.filterthis.filterthis.filter')
    for(var i = 0, iLen = fft.length; i < iLen; i++) {
      energy += this.filter[i] * fft[i] * fft[i];
    }

    this.energy = energy;
    this.ready.energy = true;
    // console.log(this.energy,'   this.energythis.energythis.energythis.energythis.energythis.energythis.energy')
    return energy;
  }

  this.monitor = function() {
    var energy = this.getEnergy();
    var signal = energy - this.energy_offset;
    // console.log('a1',signal,this.energy_threshold_pos,-this.energy_threshold_neg)
    // console.log('b1',signal > this.energy_threshold_pos,signal < -this.energy_threshold_neg)
    if(signal > this.energy_threshold_pos) {
      this.voiceTrend = (this.voiceTrend + 1 > this.voiceTrendMax) ? this.voiceTrendMax : this.voiceTrend + 1;
    } else if(signal < -this.energy_threshold_neg) {
      this.voiceTrend = (this.voiceTrend - 1 < this.voiceTrendMin) ? this.voiceTrendMin : this.voiceTrend - 1;
    } else {
      // voiceTrend gets smaller
      if(this.voiceTrend > 0) {
        this.voiceTrend--;
      } else if(this.voiceTrend < 0) {
        this.voiceTrend++;
      }
    }
    // console.log('a2',energy,signal,this.energy_threshold_pos,this.voiceTrend,this.voiceTrendStart,this.voiceTrendEnd)
    // console.log('b2',this.voiceTrend > this.voiceTrendStart,this.voiceTrend < this.voiceTrendEnd)
    var start = false, end = false;
    if(this.voiceTrend > this.voiceTrendStart) {
      // Start of speech detected
      start = true;
    } else if(this.voiceTrend < this.voiceTrendEnd) {
      // End of speech detected
      end = true;
    }

    // Integration brings in the real-time aspect through the relationship with the frequency this functions is called.
    var integration = signal * this.iterationPeriod * this.options.energy_integration;

    // Idea?: The integration is affected by the voiceTrend magnitude? - Not sure. Not doing atm.

    // The !end limits the offset delta boost till after the end is detected.
    if(integration > 0 || !end) {
      this.energy_offset += integration;
    } else {
      this.energy_offset += integration * 10;
    }
    this.energy_offset = this.energy_offset < 0 ? 0 : this.energy_offset;
    this.energy_threshold_pos = this.energy_offset * this.options.energy_threshold_ratio_pos;
    this.energy_threshold_neg = this.energy_offset * this.options.energy_threshold_ratio_neg;
    // console.log('dd1',start,this.vadState,end)
    // Broadcast the messages
    // let startBtn=false
    // let endBtn=false
    // setTimeout(() => {
    //   startBtn=true
    // }, 3000);
    // setTimeout(() => {
    //   endBtn=true
    // }, 7000);
    if(start && !this.vadState) {
      this.vadState = true;
      this.options.voice_start();
    }
    if(end && this.vadState) {
      this.vadState = false;
      this.options.voice_stop();
    }
    // if(startBtn) {
    //   console.log('startstartstartstartstartstartstartstartstartstartstartstartstartstartstartstartstartstartstartstartstartstartstartstartstartstartstartstartstartstart')
    //   this.options.voice_start();
    // }
    // if(endBtn) {
    //   console.log('endendendendendendendendendendendendendendendendendendendendendendendendendend')
    //   this.options.voice_stop();
    // }
    // console.log('dd2',start,this.vadState,end)

    this.log(
      'e: ' + energy +
      ' | e_of: ' + this.energy_offset +
      ' | e+_th: ' + this.energy_threshold_pos +
      ' | e-_th: ' + this.energy_threshold_neg +
      ' | signal: ' + signal +
      ' | int: ' + integration +
      ' | voiceTrend: ' + this.voiceTrend +
      ' | start: ' + start +
      ' | end: ' + end
    );

    return signal;
  }
}

export default VAD

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值