文章目录
1.百度ASR应用创建
- 领取个人免费额度(注意:语音识别与语音合成不同,需要创建新的应用)
短语音识别标准版_短语音识别-百度AI开放平台 (baidu.com)
点击立即选购,领取个人或企业免费额度
- 控制台创建应用
这一步与上一篇《前端使用百度TTS》步骤相同,这里不再演示
2.代码
包含TTS和ASR
<template>
<div class="app">
<div class="get_tts_token">
<h1>
1.请输入你<i>语音合成</i>应用的client_id和client_secret获取access_token
</h1>
<el-row :gutter="50">
<el-col :span="8">
<el-input
v-model.trim="client_id"
placeholder="请输入你的client_id(应用的API Key)"
/>
</el-col>
<el-col :span="8">
<el-input
v-model.trim="client_secret"
placeholder="请输入你的client_secret(应用的Secret Key)"
/>
</el-col>
<el-col :span="8"
><el-button @click="handleGetAccessToken"
>获取AccessToken</el-button
></el-col
>
</el-row>
</div>
<hr />
<div class="text2audio">
<h1>2.语音合成</h1>
<h4>免费的只能使用前4种语音</h4>
<el-radio-group v-model="per">
<el-radio-button label="1">度小宇</el-radio-button>
<el-radio-button label="0">度小美</el-radio-button>
<el-radio-button label="3">度逍遥(基础)</el-radio-button>
<el-radio-button label="4">度丫丫</el-radio-button>
<el-radio-button label="5003">度逍遥(精品)</el-radio-button>
<el-radio-button label="5118">度小鹿</el-radio-button>
<el-radio-button label="106">度博文</el-radio-button>
<el-radio-button label="110">度小童</el-radio-button>
<el-radio-button label="111">度小萌</el-radio-button>
<el-radio-button label="103">度米朵</el-radio-button>
<el-radio-button label="5">度小娇</el-radio-button>
</el-radio-group>
<el-row :gutter="50">
<el-col :span="8">
<el-input
v-model.trim="inputText"
placeholder="请输入你要转化的文本"
/>
</el-col>
<el-col :span="2"
><el-button @click="handleTextToAudio">语音合成</el-button></el-col
>
<el-col :span="8">
<audio :src="audioSrc" v-if="audioSrc" controls>
您的浏览器不支持音频播放。
</audio>
</el-col>
</el-row>
</div>
<hr />
<div class="get_asr_token">
<h1>
3.请输入你<i>语音识别</i>应用的client_id和client_secret获取access_token
</h1>
<el-row :gutter="50">
<el-col :span="8">
<el-input
v-model.trim="client_id"
placeholder="请输入你的client_id(应用的API Key)"
/>
</el-col>
<el-col :span="8">
<el-input
v-model.trim="client_secret"
placeholder="请输入你的client_secret(应用的Secret Key)"
/>
</el-col>
<el-col :span="8"
><el-button @click="handleGetAccessToken"
>获取AccessToken</el-button
></el-col
>
</el-row>
</div>
<hr />
<div class="audio2text">
<h1>4.语音识别</h1>
<el-row :gutter="50">
<el-col :span="4">
<el-button @click="handleGetPermissions">获取录音权限</el-button>
</el-col>
<el-col :span="4">
<el-button @click="handleRecording">{{ recordBtn }}</el-button>
</el-col>
<el-col :span="8">
<audio :src="audioRecordSrc" controls></audio>
</el-col>
</el-row>
<el-row :gutter="50">
<el-col :span="4">
<el-button @click="handleSpeechRecognition">语音识别</el-button>
</el-col>
<el-col :span="8">
{{ recordText }}
<!-- <audio :src="audioRecordSrc" controls></audio> -->
</el-col>
</el-row>
</div>
</div>
</template>
<script setup>
import { reactive, ref } from "vue";
import axios from "axios";
import qs from "qs";
import { ElMessage, ElMessageBox } from "element-plus";
import { HZRecorder } from "./utils/recorder";
// 提示
const openMsg = (message, type) => {
ElMessage({
message,
type,
});
};
// 1.获取AccessToken
// client_id是你创建的应用的API Key,client_secret是你创建应用的Secret Key
const client_id = ref("");
const client_secret = ref("");
const handleGetAccessToken = async () => {
try {
const option = {
grant_type: "client_credentials",
client_id: client_id.value,
client_secret: client_secret.value,
};
const res = await axios.post("/oauth/2.0/token", qs.stringify(option));
if (res.status !== 200) {
return openMsg(res.statusText, "warning");
}
openMsg("获取token成功", "success");
localStorage.setItem("access_token", res.data.access_token);
client_id.value = "";
client_secret.value = "";
} catch (error) {
console.log(error);
}
};
// 2.语音合成接口调用
// per配音角色
const per = ref("1");
// 输入的文本
const inputText = ref("");
// 动态绑定audio的src属性
const audioSrc = ref("");
const handleTextToAudio = async () => {
const token = localStorage.getItem("access_token");
if (!token) {
return openMsg("请先获取token!", "warning");
}
textToAudio(token);
};
const textToAudio = async (token) => {
const option = {
tex: inputText.value,
tok: token,
cuid: `${Math.floor(Math.random() * 1000000)}`,
ctp: "1",
lan: "zh",
per: per.value,
};
const res = await axios.post("/text2audio", qs.stringify(option), {
headers: { "Content-Type": "application/x-www-form-urlencoded" },
responseType: "blob",
});
if (res.status !== 200) {
return openMsg(res.statusText, "warning");
}
openMsg("语音合成成功", "success");
audioSrc.value = URL.createObjectURL(res.data);
};
// 3.语音识别
let recordBtn = ref("开始录音");
const audioRecordSrc = ref("");
const recordText = ref("");
window.URL = window.URL || window.webkitURL;
// recorder存放录音器实例
let recorder = null;
let isRecorder = false;
// wavBlob存放音频二进制数据
let wavBlob = null;
// 获取权限
const handleGetPermissions = () => {
// constraints 为需要获取的权限列表,这里只需要指定音频 audio 即可。其返回是个 Promise,因为用户何时进行授权是不确定的。通过在 Promise 的回调中进行授权成功或失败的处理。在使用前需要判断浏览器是否已经支持相应的 API
if (navigator.mediaDevices.getUserMedia) {
const constraints = { audio: true };
navigator.mediaDevices.getUserMedia(constraints).then(
(stream) => {
recorder = new HZRecorder(stream);
console.log("初始化完成");
},
() => {
console.error("授权失败!");
}
);
} else {
console.error("浏览器不支持 getUserMedia");
}
};
// 录音
const handleRecording = () => {
if (!isRecorder) {
recorder && recorder.start();
recordBtn.value = "结束录音";
isRecorder = true;
} else {
recorder && recorder.stop();
wavBlob = recorder.upload();
console.log(wavBlob);
audioRecordSrc.value = window.URL.createObjectURL(wavBlob);
recordBtn.value = "开始录音";
isRecorder = false;
}
};
// 语音识别
const handleSpeechRecognition = async () => {
const token = localStorage.getItem("access_token");
if (!token) {
return openMsg("请先获取token!", "warning");
}
// wavBlob = recorder.upload();
let blobToDataURL = (blob, callback) => {
var a = new FileReader();
a.onload = function (e) {
callback(e.target.result.split("data:audio/wav;base64,")[1]);
};
a.readAsDataURL(blob);
};
blobToDataURL(wavBlob, async (base_64) => {
const res = await axios.post(
"/server_api",
{
speech: base_64, //本地语音文件的的二进制语音数据 ,需要进行base64 编码。与len参数连一起使用。
len: wavBlob.size, //字节数
dev_pid: 1537, //普通话识别代码
cuid: "541b:3f:5af4:b2c9",
rate: 16000, //音频格式16k或8k 采样率、16bit 位深、单声道,
token: token, //根据你的参数获取的token
channel: 1, //单声道
format: "wav", //识别的格式
},
{
headers: {
"Content-Type": "application/json",
},
}
);
recorder.clear();
if (res.data.err_no !== 0) {
return openMsg(res.data.err_msg, "warning");
}
openMsg("识别成功", "success");
recordText.value = res.data.result[0];
console.log("识别结果:" + res.data.result[0]);
});
};
</script>
<style scoped>
.app {
width: 80%;
margin: auto;
margin-top: 50px;
}
hr {
margin: 30px 0;
}
h1 {
margin: 10px 0;
}
:deep(.el-radio-group) {
margin-bottom: 30px;
}
</style>
// recorder.js
export function HZRecorder(stream, config) {
config = config || {};
config.sampleBits = config.sampleBits || 16; //采样数位 8, 16
config.sampleRate = config.sampleRate || 16000; //采样率16khz
var context = new (window.webkitAudioContext || window.AudioContext)();
var audioInput = context.createMediaStreamSource(stream);
var createScript = context.createScriptProcessor || context.createJavaScriptNode;
var recorder = createScript.apply(context, [4096, 1, 1]);
var audioData = {
size: 0 //录音文件长度
, buffer: [] //录音缓存
, inputSampleRate: context.sampleRate //输入采样率
, inputSampleBits: 16 //输入采样数位 8, 16
, outputSampleRate: config.sampleRate //输出采样率
, oututSampleBits: config.sampleBits //输出采样数位 8, 16
, input: function (data) {
this.buffer.push(new Float32Array(data));
this.size += data.length;
}
, compress: function () { //合并压缩
//合并
var data = new Float32Array(this.size);
var offset = 0;
for (var i = 0; i < this.buffer.length; i++) {
data.set(this.buffer[i], offset);
offset += this.buffer[i].length;
}
//压缩
var compression = parseInt(this.inputSampleRate / this.outputSampleRate);
var length = data.length / compression;
var result = new Float32Array(length);
var index = 0, j = 0;
while (index < length) {
result[index] = data[j];
j += compression;
index++;
}
return result;
}
, encodeWAV: function () {
var sampleRate = Math.min(this.inputSampleRate, this.outputSampleRate);
var sampleBits = Math.min(this.inputSampleBits, this.oututSampleBits);
var bytes = this.compress();
var dataLength = bytes.length * (sampleBits / 8);
var buffer = new ArrayBuffer(44 + dataLength);
var data = new DataView(buffer);
var channelCount = 1;//单声道
var offset = 0;
var writeString = function (str) {
for (var i = 0; i < str.length; i++) {
data.setUint8(offset + i, str.charCodeAt(i));
}
}
// 资源交换文件标识符
writeString('RIFF'); offset += 4;
// 下个地址开始到文件尾总字节数,即文件大小-8
data.setUint32(offset, 36 + dataLength, true); offset += 4;
// WAV文件标志
writeString('WAVE'); offset += 4;
// 波形格式标志
writeString('fmt '); offset += 4;
// 过滤字节,一般为 0x10 = 16
data.setUint32(offset, 16, true); offset += 4;
// 格式类别 (PCM形式采样数据)
data.setUint16(offset, 1, true); offset += 2;
// 通道数
data.setUint16(offset, channelCount, true); offset += 2;
// 采样率,每秒样本数,表示每个通道的播放速度
data.setUint32(offset, sampleRate, true); offset += 4;
// 波形数据传输率 (每秒平均字节数) 单声道×每秒数据位数×每样本数据位/8
data.setUint32(offset, channelCount * sampleRate * (sampleBits / 8), true); offset += 4;
// 快数据调整数 采样一次占用字节数 单声道×每样本的数据位数/8
data.setUint16(offset, channelCount * (sampleBits / 8), true); offset += 2;
// 每样本数据位数
data.setUint16(offset, sampleBits, true); offset += 2;
// 数据标识符
writeString('data'); offset += 4;
// 采样数据总数,即数据总大小-44
data.setUint32(offset, dataLength, true); offset += 4;
// 写入采样数据
if (sampleBits === 8) {
for (var i = 0; i < bytes.length; i++, offset++) {
var s = Math.max(-1, Math.min(1, bytes[i]));
var val = s < 0 ? s * 0x8000 : s * 0x7FFF;
val = parseInt(255 / (65535 / (val + 32768)));
data.setInt8(offset, val, true);
}
} else {
for (var i = 0; i < bytes.length; i++, offset += 2) {
var s = Math.max(-1, Math.min(1, bytes[i]));
data.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
}
}
return new Blob([data], { type: 'audio/wav' });
}
};
//开始录音
this.start = function () {
audioInput.connect(recorder);
recorder.connect(context.destination);
}
//停止
this.stop = function () {
recorder.disconnect();
}
//获取音频文件
this.getBlob = function () {
this.stop();
return audioData.encodeWAV();
}
//回放
this.play = function (audio) {
var blob=this.getBlob();
// saveAs(blob, "F:/3.wav");
audio.src = window.URL.createObjectURL(this.getBlob());
}
//上传
this.upload = function () {
return this.getBlob()
}
//清除
this.clear = function(){
console.log("audioData",audioData.size)
audioData.size = 0
audioData.buffer = []
}
//音频采集
recorder.onaudioprocess = function (e) {
audioData.input(e.inputBuffer.getChannelData(0));
//record(e.inputBuffer.getChannelData(0));
}
}
export default HZRecorder
3.演示
- 在(3)输入语音识别应用的client_id和client_secret,点击获取token
- 点击获取录音权限
- 点击开始录音,然后结束录音,结束后会在audio上存放你的录音数据
- 点击语音识别,识别完成后会在按钮的右侧展示识别的文字