asr语音识别项目为了测试不同模型的语音识别效果和上屏展示,适用于会议记录,翻译等场景。我主要负责页面设计和搭建,测试,联调和根据客户需求进行优化。asr语音识别可以对多模型进行识别,比如实时语音,非实时语音,日韩语音等。还有前端降采样的问题,之前没接触过音频数据的处理,遇到很多困难,刚开始是直接用audio-recorder库设置采样率,这种方法对音频处理造成的影响太大,会对前后音频进行切割,保留中间的一份,会切割到有用的音频,音频噪声也会很大。于是又想到用webrtc提供的音频处理方法,但是他是c语言开发,将c代码转写为js代码,测试发现采样率还是没有降下去。最后通过调用ffmpeg中的方法进行降采样达到想要的效果。又与Java降采样,au(adobe audition)音频处理软件进行对比,发现达到的效果接近一致。前端还做了对音频文件的识别,支持pcm,wav格式的音频识别,为了使音频文件作出实时展示的效果,我会对音频进行分割,每个40ms推送一次,这样上屏展示时就是一个字一个字蹦出来的。
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8"/>
<meta
name="viewport"
content="width=device-width, user-scalable=no, initial-scale=1.0, maximum-scale=1.0, minimum-scale=1.0"
/>
<meta name="apple-mobile-web-capable" content="yes"/>
<title>ASR demo</title>
<style type="text/css">
html body {
margin: 0;
padding: 0;
}
.manage {
display: flex;
justify-content: space-between;
}
.comments {
word-wrap: break-word;
display: block;
margin: 0 auto;
overflow: auto;
width: 97%;
font-size: 14px;
height: 200px;
line-height: 24px;
padding: 2px;
outline: none;
margin: 30px auto;
}
img {
display: none;
}
.voice-btn {
color: #fff;
background-color: #409eff;
font-weight: 500;
padding: 12px 20px;
font-size: 14px;
border-radius: 4px;
border: 0;
cursor: pointer;
}
.voice-btn.end {
display: black;
}
.fileInput {
position: relative;
display: inline-block;
background: #d0eeff;
border: 1px solid #99d3f5;
border-radius: 4px;
padding: 6px 18px;
overflow: hidden;
color: #1e88c7;
text-decoration: none;
text-indent: 0;
line-height: 30px;
}
.file input {
position: absolute;
font-size: 100px;
right: 0;
top: 0;
opacity: 0;
}
.file:hover {
background: #aadffd;
border-color: #78c3f3;
color: #004974;
text-decoration: none;
}
.upload {
color: #fff;
background-color: #78c3f3;
font-weight: 500;
padding: 12px 20px;
border-radius: 4px;
border: 0;
cursor: pointer;
}
</style>
</head>
<body>
<div class="manage">
<div id="controls">
<button id="intercomBegin" class="voice-btn">开始</button>
<button id="intercomEnd" class="voice-btn end">关闭</button>
</div>
<br/>
<a href="音频地址" downlown=""></a>
<!-- 上传音频 accept打开系统文件目录 -->
<div class="">
<form
id="file-rec"
method="POST"
action="#"
target="myIframe"
enctype="multipart/form-data"
>
<input
type="file"
class="fileInput"
id="fileInput"
accept=".pcm,.wav"
multiple="multiple"
/>
<br/>
<button id="upload" class="upload">上传</button>
</form>
<iframe
style="display: none"
id="myIframe"
name="myIframe"
class="iframes"
></iframe>
</div>
</div>
<div>选择模式</div>
<br/>
<form id="article-foot" class="article-foot"></form>
<br/>
</body>
<!-- 引入组件 -->
<script src="./js/jquery.min.js"></script>
<script src="./js/toastr.min.js"></script>
<!-- 基于jquery的非阻塞的消息提示插件 -->
<link rel="stylesheet" href="./css/toastr.min.css"/>
<script src="./js/HZRecorder.js"></script>
<!-- <script src="./js/audio.js"></script> -->
<script type="text/javascript">
var asrResults = new Array();
var textAreaCount = 0;
var textResult = document.getElementById("textResult");
var begin = document.getElementById("intercomBegin");
var end = document.getElementById("intercomEnd");
var ws = null; //实现WebSocket
var record = null; //多媒体对象,用来处理音频
var interval; //定时器
const NewlineSymbol = "?。!";
function init(rec) {
//init初始化
record = rec;
console.log(record);
}
//生成唯一标识符
function uuid() {
var s = [];
var hexDigits = "0123456789abcdef";
for (var i = 0; i < 36; i++) {
s[i] = hexDigits.substr(Math.floor(Math.random() * 0x10), 1);
}
s[14] = "4"; // bits 12-15 of the time_hi_and_version field to 0010
s[19] = hexDigits.substr((s[19] & 0x3) | 0x8, 1); // bits 6-7 of the clock_seq_hi_and_reserved to 01
s[8] = s[13] = s[18] = s[23] = "-";
var uuid = s.join("");
return uuid;
}
uuid(); // "ffb7cefd-02cb-4853-8238-c0292cf988d5"
var uuid = uuid(