前端语音识别（webkitSpeechRecognition）

莲青见卿

已于 2023-08-09 11:15:33 修改

阅读量1w

点赞数 27

分类专栏：前端基础文章标签：前端语音识别人工智能

于 2023-08-02 17:17:29 首次发布

本文链接：https://blog.csdn.net/vh_YUNYANGYUMO/article/details/132065935

版权

前端基础专栏收录该内容

13 篇文章

订阅专栏

需求：需要在浏览器（pc）实现语音转文字。

webkitSpeechRecognition（语音识别）

// 创建一个webkitSpeechRecognition实例
let newRecognition = new webkitSpeechRecognition();
// 设置识别到声音就关闭还是一直识别
newRecognition.continuous = true;
// 开启录音
newRecognition.start();
// 关闭录音
newRecognition.stop();
// 识别到结果即触发，所以讲话过程多次停顿的情况下会触发多次
newRecognition.onresult = function(event) { 
    console.log(event);
}
// 基本以上就已经达到了语音转文字的功能，下面是一些其他事件
//只要开始讲话了，就会触发onsoundstart和onspeechstart事件
newRecognition.onsoundstart = function(e){
    console.log("开始收听了");
    console.log(e);
}
newRecognition.onspeechstart = (e)=>{
    console.log("开始讲话了");
    console.log(e);
}
// onspeechend事件应该是监测到执行了stop方法时触发的。而如果一段时间没有录入信息，则会自动执行stop方法停止录音，同样也会触发onspeechend事件
newRecognition.onspeechend = (e)=>{
    console.log("讲话完毕");
    console.log(e);
}
// onerror很明显是有异常时会触发该事件，测试发现当长时间没有录入导致自动关闭录音的时候就会触发该事件
newRecognition.onerror = (e)=>{
    console.log("发生异常");
    console.log(e);
}

vue3.0 demo

存在兼容问题，本人测试谷歌浏览器并不能正常使用（不排除版本较低的缘故），safari浏览器正常。

<template>
    <div>
        <textarea
            name="语音转文字内容区"
            id="area"
            cols="30"
            rows="10"
            placeholder="说点什么吧..."
            ref="areaRef"
            v-model="textInp"></textarea>
        <div>
            <button @click="speek">🎤</button>
            <p @click="addText">发布</p>
        </div>
        <ul v-show="list.length > 0">
            <li v-for="(item,index) in list" :key="index">{{ item }}</li>
        </ul>
    </div>
</template>

<script>
import { reactive, toRefs, ref } from 'vue'

export default {
    setup () {
        const areaRef = ref(null)
        const state = reactive({
            list: [],
            textInp: '',
            isGoing: false
        })
        var recognition = new window.webkitSpeechRecognition();
        recognition.continuous = true;
        recognition.interimResults = true;
        recognition.lang = 'zh-CN';
        recognition.onresult=function(event) {
            let result = ''
            for(let i = 0;i <= event.resultIndex; i++) {
                result += event.results[i][0].transcript;
            }
            state.textInp = result
        }
		/** 开始 / 结束 录音 */
        function speek(){
            if(state.isGoing) {
                recognition.stop();
                state.isGoing = false;
            }
            else {
                recognition.start();
                state.isGoing = true;
            }
            
        }
		/** 发布 */
        function addText(){
            state.list.push(state.textInp);
            state.textInp = '';
        }

        return {
            ...toRefs(state),
            areaRef,
            addText,
            speek
        }
    }
}
</script>

<style lang="scss" scoped>

</style>

SpeechSynthesisUtterance（语音合成）

基础测试

var ssu = new window.SpeechSynthesisUtterance('Hi，girl！');
window.speechSynthesis.speak(ssu);

SpeechSynthesisUtterance对象提供了一些其他属性供设置：

lang：使用的语言，字符串（比如：“zh-cn”）
volume：音量，值在0-1之间（默认是1）
rate：语速的倍数，值在0.1-10之间（默认1倍）
pitch：音高，值在0-2之间，（默认是1）
voiceURI：指定希望使用的声音，字符串
onstart：语音开始合成时触发
onpause：语音暂停时触发
onresume：语音合成重新开始时触发
onend：语音结束时触发

var ssu = new window.SpeechSynthesisUtterance();
ssu.text = 'Hi，girl！';
ssu.volume = 0.5;
ssu.rate = 1

speechSynthesis对象
创建完SpeechSynthesisUtterance对象之后，把这个对象传递给speechSynthesis对象的speak方法中。

stop()：停止合成
pause()：暂停合成
resume()：重新开始合成
getVoices()：返回浏览器支持的语音包数组

window.addEventListener("click", ()=>{
    window.speechSynthesis.pause(); // 点击暂停
});
console.log(window.speechSynthesis.getVoices()); //我的chrome浏览器下竟然是空数组。。wtf

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>语音转文字</title>
    <style>
        textarea {
            width: 100%;
            height: 50px;
        }
    </style>
</head>
<body>
    <div>
        <textarea shape="" coords="" href="" alt="" id="area" placeholder="请说点什么..."></textarea>
        <button id="speek">麦克风</button>
        <button id="addBtn">发布</button>
        <ul id="text"></ul>
    </div>

    <script>
        window.onload = () => {
            console.log('页面加载完毕');
            const area = document.querySelector('#area');
            const speek = document.querySelector('#speek');
            const addBtn = document.querySelector('#addBtn');
            const text = document.querySelector('#text');
            const recognition = new webkitSpeechRecognition();
            let isSpeek = false;

            recognition.continuous = true;
            recognition.interimResults = true;
            recognition.lang = 'zh-CN';
            recognition.onresult=function(event) {
                let result = ''
                for(let i = event.resultIndex;i <= event.resultIndex; i++) {
                    if (event.results[i].isFinal) {
                        result += event.results[i][0].transcript;
                    }
                }
                area.value = result
            }

            speek.addEventListener('click', () => {
                if(isSpeek) {
                    recognition.stop();
                    isSpeek = false;
                    return;
                }
                recognition.start();
                isSpeek = true;
            })

            addBtn.addEventListener('click', () => {
                const li = document.createElement('li');
                li.textContent = area.value;
                text.appendChild(li);
                area.value = '';
            })

        }

    </script>
</body>
</html>