参考连接:
语音听写(流式版)WebAPI 文档 | 讯飞开放平台文档中心 (xfyun.cn)
在用WPF做上位机的时候需要用到科大讯飞的语音识别功能,看了看官方文档在Windows上想要实现语音识别可以通过WebAPI或者SDK的方式实现。
在网上对比了两种方法,SDK好像不能直接用到C#上面,而且还要往文件里引入好几个库,本着偷懒的想法就选择了WebAPI的方法,相对来说会简洁一点。
使用的是.NET Framework 4.7.2
如果将修改成以下参数,识别速度会大大加快但是没有经过大量测试不能保证稳定性。
int frameSize = 6400;
int intervel = 10;
3.14更新:
主要是在接收返回结果的函数中,对是否接收到服务器的结果全部返回标识做了判断,并断开与服务器的连接
using HandyControl.Controls;
using Newtonsoft.Json;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net.WebSockets;
using System.Security.Cryptography;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using System.Windows;
using System.Windows.Controls;
using System.Windows.Data;
using System.Windows.Documents;
using System.Windows.Input;
using System.Windows.Media;
using System.Windows.Media.Imaging;
using System.Windows.Navigation;
using System.Windows.Shapes;
namespace HandyControlTest
{
/// <summary>
/// MainWindow.xaml 的交互逻辑
/// </summary>
public partial class MainWindow : System.Windows.Window
{
recorder r = new recorder();
public MainWindow()
{
InitializeComponent();
}
private void Button_Click(object sender, RoutedEventArgs e)
{
r.SendFileToWebSocket();
r.receiveMessage();
}
}
}
class recorder
{
private string _appID = "6f28cdb3";
private string _apiSecret = "Mzc3OWJjYjQxYjI5YzVhNWMwYzdiNmFk";
private string _apiKey = "bf7d03b453328d790a0d2efabe47a1db";
private string _wss = "wss://iat-api.xfyun.cn/v2/iat";
private string _host = "iat-api.xfyun.cn";
private string _request_line = "GET /v2/iat HTTP/1.1";
private bool _isConnected = false;//连接服务器标志位
private bool _isSendDone = false;//已经完成发送
private WebSocket _ws;
//创建url
private string GetUrl()
{
string date = DateTime.Now.ToString("R");
string signature_origin = $"host: {_host}\ndate: {date}\n{_request_line}";
HMACSHA256 mac = new HMACSHA256(Encoding.UTF8.GetBytes(_apiSecret));
string signature = Convert.ToBase64String(mac.ComputeHash(Encoding.UTF8.GetBytes(signature_origin)));
string authorization_origin = $"api_key=\"{_apiKey}\",algorithm=\"hmac-sha256\",headers=\"host date request-line\",signature=\"{signature}\"";
string authorization = Convert.ToBase64String(Encoding.UTF8.GetBytes(authorization_origin));
string url = $"{_wss}?authorization={authorization}&date={date}&host={_host}";
return url;
}
//连接WebSocket服务器
private async void ConnectWebSocket()
{
try
{
Uri url = new Uri(this.GetUrl());
ClientWebSocket webSocket = new ClientWebSocket();
CancellationToken ct = new CancellationToken();
await webSocket.ConnectAsync(url, ct);
_isConnected = true;
_ws = webSocket;
}
catch (Exception ex)
{
System.Windows.MessageBox.Show(ex.Message);
}
}
//发送音频至服务器
public async void SendFileToWebSocket()
{
int frameSize = 6400;//每次发送音频字节数6400B
int intervel = 5;//每次发送音频间隔5ms
int pcmCount = 0;//记录音频已发送的大小
int pcmSize = 0;//剩余音频大小
int status = 0;//音频的发送状态;0:第一帧;1:继续帧;2:最后一帧
string filePath = "E:\\SystemData\\desktop\\recorded.wav";//录音文件路径
ConnectWebSocket();
try
{
while (!_isConnected)
{
await Task.Delay(intervel);//异步等待服务器连接
}
byte[] arr = File.ReadAllBytes(filePath);//读取文件所有字节
if (arr == null)
{
return;
}
pcmSize = arr.Length;//获取音频总长
while (true)
{
if (pcmSize <= frameSize)//若剩余音频小于等于1280B,则视为最后帧发送
{
frameSize = pcmSize;
status = 2;
if (frameSize <= 0)
break;
}
byte[] buffer = new byte[frameSize];
Array.Copy(arr, pcmCount, buffer, 0, frameSize);
pcmCount += frameSize;
pcmSize -= frameSize;
switch (status)
{
case 0:
onSend(0, Convert.ToBase64String(buffer));
status = 1;
break;
case 1:
onSend(1, Convert.ToBase64String(buffer));
break;
case 2:
onSend(1, Convert.ToBase64String(buffer));
onSend(2, null);//告诉服务器已经发送结束,本应该需要隔40ms再发送
_isSendDone = true;
break;
}
await Task.Delay(intervel);//延时40ms
}
}
catch (Exception e)
{
System.Windows.MessageBox.Show(e.Message);
}
}
//每次发送数据
//curStatus:用于记录发送过程
//0:首次发送;1:发送中;2:结束发送
private void onSend(int curStatus, string data)
{
if (curStatus == 0)
{
var json = new
{
common = new
{
app_id = $"{_appID}"
},
business = new
{
language = "zh_cn",
domain = "iat",
accent = "mandarin"
},
data = new
{
status = 0,
format = "audio/L16;rate=16000",
encoding = "raw",
audio = $"{data}"
}
};
string jsonStr = JsonConvert.SerializeObject(json, Formatting.Indented);
_ws.SendAsync(new ArraySegment<byte>(Encoding.UTF8.GetBytes(jsonStr)), WebSocketMessageType.Binary, true, new CancellationToken());
}
else if (curStatus == 1)
{
var json = new
{
data = new
{
status = 1,
format = "audio/L16;rate=16000",
encoding = "raw",
audio = $"{data}"
}
};
string jsonStr = JsonConvert.SerializeObject(json, Formatting.Indented);
_ws.SendAsync(new ArraySegment<byte>(Encoding.UTF8.GetBytes(jsonStr)), WebSocketMessageType.Binary, true, new CancellationToken());
}
else if (curStatus == 2)
{
var json = new
{
data = new
{
status = 2
}
};
string jsonStr = JsonConvert.SerializeObject(json, Formatting.Indented);
_ws.SendAsync(new ArraySegment<byte>(Encoding.UTF8.GetBytes(jsonStr)), WebSocketMessageType.Binary, true, new CancellationToken());
}
}
public async void receiveMessage()
{
while (!_isSendDone)
{
await Task.Delay(10);//异步等待服务器连接
}
byte[] buffer = new byte[1024];//可以给大一点,如果识别长时间音频可以会出bug
string str = null;//拼接整句话
while (_ws.State == WebSocketState.Open)
{
WebSocketReceiveResult result = await _ws.ReceiveAsync(new ArraySegment<byte>(buffer),CancellationToken.None);//等待返回结果
if(result.MessageType == WebSocketMessageType.Text)
{
string jsonData = Encoding.UTF8.GetString(buffer,0,result.Count);
var jsonObject = JsonConvert.DeserializeObject<dynamic>(jsonData);//转换为json格式
var data = jsonObject["data"]["result"]["ws"];//提取听写结果
foreach (var item in data)
{
foreach (var item2 in item["cw"])
{
str += item2["w"];
}
}
bool done = jsonObject["data"]["result"]["ls"];//是否完成所有返回结果的接收
if(done)
{
await _ws.CloseAsync(WebSocketCloseStatus.NormalClosure, "closing", CancellationToken.None);//断开与服务器的连接
_isSendDone = false;
_isConnected = false;
break;
}
}
}
HandyControl.Controls.MessageBox.Show(str);
}
}
这是我用来实现语音识别的测试,空出来的几个参数是需要自己注册科大讯飞账号然后在控制台获取
private string _appID = ;
private string _apiSecret = ;
private string _apiKey = ;
科大讯飞对于音频有要求,具体可到官方文档查阅,我使用开源库NAudio能够录制到符合要求的音频