http://blog.csdn.net/yiwei151/article/details/78360990
新建脚本,将下列代码复制进去即可,
using LitJson;
using System;
using System.Collections;
using System.Collections.Generic;
using System.IO;
using System.Text;
using UnityEngine;
using UnityEngine.UI;
public class ToWord : MonoBehaviour {
private string token= ""; //access_token
private string cuid = "11"; //用户标识
private string format = "wav"; //语音格式
private int rate = 8000; //采样率
private int channel = 1; //声道数
private string speech; //语音数据,进行base64编码
private int len; //原始语音长度
private string lan = "zh"; //语种
private string grant_Type = "client_credentials";
private string client_ID = "这里输入百度的appkey,自己到官网申请填入这里"; //百度appkey
private string client_Secret = "这里输入百度secretkey,自己到官网申请填写"; //百度Secret Key
private string baiduAPI = "http://vop.baidu.com/server_api";
private string getTokenAPIPath = "https://openapi.baidu.com/oauth/2.0/token";
private byte[] clipByte;
/// <summary>
/// 转换出来的TEXT
/// </summary>
public static string audioToString;
public AudioSource aud;
private int audioLength;//录音的长度
public delegate void CallBack(string name);
public delegate string ds();
private static ToWord _toWord;
public MicroPhoneManager m;
private void Awake()
{
_toWord = this;
StartCoroutine(GetToken(getTokenAPIPath));
}
public static ToWord GetInstance() {
return _toWord;
}
/// <summary>
/// 获取百度用户令牌
/// </summary>
/// <param name="url">获取的url</param>
/// <returns></returns>
private IEnumerator GetToken(string url)
{
WWWForm getTForm = new WWWForm();
getTForm.AddField("grant_type", grant_Type);
getTForm.AddField("client_id", client_ID);
getTForm.AddField("client_secret", client_Secret);
WWW getTW = new WWW(url, getTForm);
yield return getTW;
if (getTW.isDone)
{
if (getTW.error == null)
{
token = JsonMapper.ToObject(getTW.text)["access_token"].ToString();
Debug.Log("获取百度用户令牌 初始化完成");
}
else
Debug.Log("error:" + getTW.error);
}
}
/// <summary>
/// 开始录音
/// </summary>
public void StartMic(int durationTime)
{
if (Microphone.devices.Length == 0) return;
Microphone.End(null);
Debug.Log("Start");
aud.clip = Microphone.Start(null, false, durationTime, rate);
}
/// <summary>
/// 结束录音
/// </summary>
public void EndMic(CallBack cb,BtnInfo info)
{
int lastPos = Microphone.GetPosition(null);
if (Microphone.IsRecording(null))
audioLength = lastPos / rate;//录音时长
else
audioLength = 10;
Debug.Log("录音结束");
Microphone.End(null);
clipByte = GetClipData();
len = clipByte.Length;
speech = Convert.ToBase64String(clipByte);
using (FileStream fs = CreateEmpty(Utils.GetAudioDataPath() + "/" + info.ID + "_1.wav"))
{
ConvertAndWrite(fs, aud.clip);
WriteHeader(fs, aud.clip);
Debug.Log("保存成功");
}
StartCoroutine(GetAudioString(baiduAPI, cb));
}
void aaa(string str) { }
private void WriteHeader(FileStream stream, AudioClip clip)
{
int hz = clip.frequency;
int channels = clip.channels;
int samples = clip.samples;
stream.Seek(0, SeekOrigin.Begin);
Byte[] riff = System.Text.Encoding.UTF8.GetBytes("RIFF");
stream.Write(riff, 0, 4);
Byte[] chunkSize = BitConverter.GetBytes(stream.Length - 8);
stream.Write(chunkSize, 0, 4);
Byte[] wave = System.Text.Encoding.UTF8.GetBytes("WAVE");
stream.Write(wave, 0, 4);
Byte[] fmt = System.Text.Encoding.UTF8.GetBytes("fmt ");
stream.Write(fmt, 0, 4);
Byte[] subChunk1 = BitConverter.GetBytes(16);
stream.Write(subChunk1, 0, 4);
UInt16 two = 2;
UInt16 one = 1;
Byte[] audioFormat = BitConverter.GetBytes(one);
stream.Write(audioFormat, 0, 2);
Byte[] numChannels = BitConverter.GetBytes(channels);
stream.Write(numChannels, 0, 2);
Byte[] sampleRate = BitConverter.GetBytes(hz);
stream.Write(sampleRate, 0, 4);
Byte[] byteRate = BitConverter.GetBytes(hz * channels * 2); // sampleRate * bytesPerSample*number of channels, here 44100*2*2
stream.Write(byteRate, 0, 4);
UInt16 blockAlign = (ushort)(channels * 2);
stream.Write(BitConverter.GetBytes(blockAlign), 0, 2);
UInt16 bps = 16;
Byte[] bitsPerSample = BitConverter.GetBytes(bps);
stream.Write(bitsPerSample, 0, 2);
Byte[] datastring = System.Text.Encoding.UTF8.GetBytes("data");
stream.Write(datastring, 0, 4);
Byte[] subChunk2 = BitConverter.GetBytes(samples * channels * 2);
stream.Write(subChunk2, 0, 4);
}
private FileStream CreateEmpty(string filepath)
{
FileStream fileStream = new FileStream(filepath, FileMode.Create);
byte emptyByte = new byte();
for (int i = 0; i < 44; i++) //preparing the header
{
fileStream.WriteByte(emptyByte);
}
return fileStream;
}
private void ConvertAndWrite(FileStream fileStream, AudioClip clip)
{
float[] samples = new float[clip.samples];
//float[] samples = new float[(int)CurAudioSource.time + 1];
clip.GetData(samples, 0);
Int16[] intData = new Int16[samples.Length];
Byte[] bytesData = new Byte[samples.Length * 2];
int rescaleFactor = 32767; //to convert float to Int16
for (int i = 0; i < samples.Length; i++)
{
intData[i] = (short)(samples[i] * rescaleFactor);
Byte[] byteArr = new Byte[2];
byteArr = BitConverter.GetBytes(intData[i]);
byteArr.CopyTo(bytesData, i * 2);
}
fileStream.Write(bytesData, 0, bytesData.Length);
}
/// <summary>
/// 把语音转换为文字
/// </summary>
/// <param name="url"></param>
/// <returns></returns>
private IEnumerator GetAudioString(string url, CallBack cb)
{
JsonWriter jw = new JsonWriter();
jw.WriteObjectStart();
jw.WritePropertyName("format");
jw.Write(format);
jw.WritePropertyName("rate");
jw.Write(rate);
jw.WritePropertyName("channel");
jw.Write(channel);
jw.WritePropertyName("token");
jw.Write(token);
jw.WritePropertyName("cuid");
jw.Write(cuid);
jw.WritePropertyName("len");
jw.Write(len);
jw.WritePropertyName("speech");
jw.Write(speech);
jw.WriteObjectEnd();
WWWForm w = new WWWForm();
WWW getASW = new WWW(url, Encoding.Default.GetBytes(jw.ToString()));
yield return getASW;
if (getASW.isDone)
{
if (getASW.error == null)
{
JsonData getASWJson = JsonMapper.ToObject(getASW.text);
if (getASWJson["err_msg"].ToString() == "success.")
{
audioToString = getASWJson["result"][0].ToString();
if (audioToString.Substring(audioToString.Length - 1) == ",")
audioToString = audioToString.Substring(0, audioToString.Length - 1);
}
}
else
{
//Debug.LogError(getASW.error);
audioToString = "";
Debug.Log("error:" + getASW.error);
}
Debug.Log("此次语音文字为:" + audioToString);
if (cb != null)
{
cb(audioToString);
}
}
}
/// <summary>
/// 把录音转换为Byte[]
/// </summary>
/// <returns></returns>
public byte[] GetClipData()
{
if (aud.clip == null)
{
//Debug.LogError("录音数据为空");
Debug.Log("录音数据为空");
return null;
}
float[] samples = new float[aud.clip.samples];
aud.clip.GetData(samples, 0);
byte[] outData = new byte[samples.Length * 2];
int rescaleFactor = 32767; //to convert float to Int16
for (int i = 0; i < samples.Length; i++)
{
short temshort = (short)(samples[i] * rescaleFactor);
byte[] temdata = System.BitConverter.GetBytes(temshort);
outData[i * 2] = temdata[0];
outData[i * 2 + 1] = temdata[1];
}
if (outData == null || outData.Length <= 0)
{
//Debug.LogError("录音数据为空");
Debug.Log("录音数据为空");
return null;
}
//return SubByte(outData, 0, audioLength * 8000 * 2);
return outData;
}
void Start () {
}
private void OnGUI()
{
if (GUILayout.Button("Start"))
StartMic();
if (GUILayout.Button("End"))
EndMic(null);
}
public Text debugText;
private void Update()
{
debugText.text = audioToString;
}
}
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49
- 50
- 51
- 52
- 53
- 54
- 55
- 56
- 57
- 58
- 59
- 60
- 61
- 62
- 63
- 64
- 65
- 66
- 67
- 68
- 69
- 70
- 71
- 72
- 73
- 74
- 75
- 76
- 77
- 78
- 79
- 80
- 81
- 82
- 83
- 84
- 85
- 86
- 87
- 88
- 89
- 90
- 91
- 92
- 93
- 94
- 95
- 96
- 97
- 98
- 99
- 100
- 101
- 102
- 103
- 104
- 105
- 106
- 107
- 108
- 109
- 110
- 111
- 112
- 113
- 114
- 115
- 116
- 117
- 118
- 119
- 120
- 121
- 122
- 123
- 124
- 125
- 126
- 127
- 128
- 129
- 130
- 131
- 132
- 133
- 134
- 135
- 136
- 137
- 138
- 139
- 140
- 141
- 142
- 143
- 144
- 145
- 146
- 147
- 148
- 149
- 150
- 151
- 152
- 153
- 154
- 155
- 156
- 157
- 158
- 159
- 160
- 161
- 162
- 163
- 164
- 165
- 166
- 167
- 168
- 169
- 170
- 171
- 172
- 173
- 174
- 175
- 176
- 177
- 178
- 179
- 180
- 181
- 182
- 183
- 184
- 185
- 186
- 187
- 188
- 189
- 190
- 191
- 192
- 193
- 194
- 195
- 196
- 197
- 198
- 199
- 200
- 201
- 202
- 203
- 204
- 205
- 206
- 207
- 208
- 209
- 210
- 211
- 212
- 213
- 214
- 215
- 216
- 217
- 218
- 219
- 220
- 221
- 222
- 223
- 224
- 225
- 226
- 227
- 228
- 229
- 230
- 231
- 232
- 233
- 234
- 235
- 236
- 237
- 238
- 239
- 240
- 241
- 242
- 243
- 244
- 245
- 246
- 247
- 248
- 249
- 250
- 251
- 252
- 253
- 254
- 255
- 256
- 257
- 258
- 259
- 260
- 261
- 262
- 263
- 264
- 265
- 266
- 267
- 268
- 269
- 270
- 271
- 272
- 273
- 274
- 275
- 276
- 277
- 278
- 279
- 280
- 281
- 282
- 283
- 284
- 285
- 286
- 287
- 288
- 289
- 290
- 291
- 292
- 293
- 294
- 295
- 296
- 297
- 298
- 299
- 300
- 301
- 302
- 303
- 304
- 305
- 306
- 307
- 308
- 309
- 310
- 311
- 312
- 313
- 314
百度语音是通过http请求的方式来进行识别的,最大的有点是全平台通用,不过识别率没有讯飞的高