最近接到一需求:离线语音播放异常信息;听到他突然想到了去年玩耍的离线语音识别;这次还是基于讯飞再来玩耍一下,先来SDK下载:
1、找到可借鉴的代码:
https://www.cnblogs.com/DriftingLeaf/p/17648569.html
2、Windows下配置,编码格式GB2312:
string loginParams = "appid=应用的appid,work_dir=.";
string sessionBeginParams = $"engine_type=local,voice_name={speaker},text_encoding=GB2312,tts_res_path=fo|res\\tts\\{speaker}.jet;fo|res\\tts\\common.jet,sample_rate=16000,speed={speed},volume={volume},pitch=50,rdn=2,reg=2";
ret = MSPLogin(UserName, Password, loginParams);
if (ret != 0)
{
Console.WriteLine($"MSPLogin failed, error code: {ret}.");
return;
}
Console.WriteLine("开始合成 ...");
ret = StartSpeechSynthesis(text, filename, sessionBeginParams);
if (ret != 0)
{
Console.WriteLine($"text_to_speech failed, error code: {ret}.");
}
Console.WriteLine("合成完毕");
MSPLogout();
3、开始合成实现方法:
private int StartSpeechSynthesis(string srcText, string desPath, string paramsStr)
{
int ret = -1;
nint sessionID = nint.Zero;
uint audioLen = 0;
WavePcmHdr wavHdr = DefaultWavHdr;
int synthStatus = 2; // MSP_TTS_FLAG_STILL_HAVE_DATA
if (string.IsNullOrEmpty(srcText) || string.IsNullOrEmpty(desPath))
{
Console.WriteLine("params is error!");
return ret;
}
using (FileStream fs = new FileStream(desPath, FileMode.Create, FileAccess.Write))
{
sessionID = QTTSSessionBegin(paramsStr, ref ret);
if (ret != 0)
{
Console.WriteLine($"QTTSSessionBegin failed, error code: {ret}.");
return ret;
}
Encoding encoding = Encoding.GetEncoding("UTF-8");
uint textLen = (uint)encoding.GetByteCount(srcText);
ret = QTTSTextPut(sessionID, srcText, textLen, paramsStr);
if (ret != 0)
{
Console.WriteLine($"QTTSTextPut failed, error code: {ret}.");
QTTSSessionEnd(sessionID, "TextPutError");
return ret;
}
Console.WriteLine("正在合成 ...");
byte[] wavHdrBytes = StructToBytes(wavHdr);
fs.Write(wavHdrBytes, 0, wavHdrBytes.Length);
while (true)
{
nint dataPtr = QTTSAudioGet(sessionID, ref audioLen, ref synthStatus, ref ret);
if (ret != 0)
break;
if (dataPtr != nint.Zero)
{
byte[] data = new byte[audioLen];
Marshal.Copy(dataPtr, data, 0, (int)audioLen);
fs.Write(data, 0, data.Length);
wavHdr.data_size += (int)audioLen;
}
if (synthStatus == 2 || ret != 0)
break;
}
if (ret != 0)
{
Console.WriteLine($"QTTSAudioGet failed, error code: {ret}.");
QTTSSessionEnd(sessionID, "AudioGetError");
return ret;
}
wavHdr.size_8 += wavHdr.data_size + (Marshal.SizeOf(wavHdr) - 8);
fs.Seek(4, SeekOrigin.Begin);
fs.Write(BitConverter.GetBytes(wavHdr.size_8), 0, sizeof(int));
fs.Seek(40, SeekOrigin.Begin);
fs.Write(BitConverter.GetBytes(wavHdr.data_size), 0, sizeof(int));
}
ret = QTTSSessionEnd(sessionID, "Normal");
if (ret != 0)
{
Console.WriteLine($"QTTSSessionEnd failed, error code: {ret}.");
}
return ret;
}
4、在linux下编码格式UTF-8:
sessionBeginParams = $"engine_type = purextts,voice_name = {speaker},text_encoding =UTF-8,tts_res_path = fo|res/xtts/{speaker}.jet;fo|res/xtts/common.jet, sample_rate = 16000, speed = {speed}, volume = {volume}, pitch = 50, rdn = 2,reg=2";
5、在linux下合成实现方法:
Encoding encoding = Encoding.GetEncoding("UTF-8");
uint textLen = (uint)encoding.GetByteCount(srcText);
6、在乌班图下出现问题:
7、需要把下载SDK中资源放到输出目录下:
最终简单的效果先这样吧;以后有时间的话,可以再去摸索一下更复杂的效果;编程不息、Bug不止、无Bug、无生活;改bug的冷静、编码的激情、完成后的喜悦、挖坑的激动 、填坑的兴奋;这也许就是屌丝程序员的乐趣吧;今天就到这里吧;希望自己有动力一步一步坚持下去;生命不息,代码不止;大家抽空可以看看今天分享的效果,有好的意见和想法,可以在留言板随意留言;我看到后会第一时间回复大家,多谢大家的一直默默的关注和支持!如果觉得不错,那就伸出您的小手点个赞并关注一下!