在上一章已详细说明如何读取本地音频文件,调用GoogleCloudSpeech API转换为文字。从中可以看出,对音频文件的播放长度严格地限制在60s以内。对此限制,Google采用将音频文件上传到Cloud Storage的Bucket中,然后进行文字转换,大大地增长了文件的播放长度。针对在Google Cloud PlatForm中的系列操作及上传文件到Cloud Storage Bucket中的方式,请查看GoogleCloudSpeechAPI 的调用注意事项 。
读取Cloud Storage Bucket中的音频文件进行文字转换
注:如下Demo是windows应用程序,所有方法都为static
1.创建类型为CloudSpeechAPIService的方法,目的是通过环境变量获取Google的凭证,连接在云平台建立的项目。PS:如果此方法出现异常,请查看上一章的前提条件二。
static public CloudSpeechAPIService CreateAuthorizedClient()
{
GoogleCredential credential =GoogleCredential.GetApplicationDefaultAsync().Result;//读取环境变量中的GOOGLE_APPLICATION_CREDENTIALS
if (credential.IsCreateScopedRequired)
{
credential = credential.CreateScoped(new[]
{
CloudSpeechAPIService.Scope.CloudPlatform
});//获取认证
}
return new CloudSpeechAPIService(new BaseClientService.Initializer()
{
HttpClientInitializer = credential,
ApplicationName = "DotNet Google Cloud Platform Speech Sample",
});
}
2.选择Cloud Storage Bucket中的音频文件,调用该API进行文字转换。ps:音频文件格式最好为1声道PCM,播放时间小于80minutes否则转换容易出错。
private static void SpeechTotextByUri()
{
var service = CreateAuthorizedClient();//获取Google云认证
var request = new Google.Apis.CloudSpeechAPI.v1beta1.Data.AsyncRecognizeRequest()
{
Config = new Google.Apis.CloudSpeechAPI.v1beta1.Data.RecognitionConfig()
{
Encoding = "LINEAR16",//编码格式PCM
SampleRate = 16000,//采样频率16000HZ
LanguageCode = "en-US"//英文播放文件
// LanguageCode = "cmn-Hans-CN"中文播放文件
},
Audio = new Google.Apis.CloudSpeechAPI.v1beta1.Data.RecognitionAudio()
{
Uri= "gs://Storage中的Bucket名称/该Bucket中的音频文件"//如Google提供的 Uri= "gs://cloud-samples-tests/speech/brooklyn.flac"
}
};
DateTime startTime = DateTime.Now;
Console.WriteLine("Sarte Time :" + startTime);
StringBuilder sb = new StringBuilder();
try
{
var asyncResponse = service.Speech.Asyncrecognize(request).Execute();
var name = asyncResponse.Name;
Google.Apis.CloudSpeechAPI.v1beta1.Data.Operation op;
do
{
Console.WriteLine("Waiting for server processing...");
Thread.Sleep(1000);
op = service.Operations.Get(name).Execute();
if (op.Error?.Message != null)
{
Console.WriteLine(op.Error.Message);
}
} while (!(op.Done.HasValue && op.Done.Value));
dynamic results = op.Response["results"];
StringBuilder sbContent = new StringBuilder();
foreach (var result in results)
{
foreach (var alternative in result.alternatives)
{
sbContent.Append(alternative.transcript);
}
}
sb.Append(sbContent);
Console.WriteLine("The result:");
Console.WriteLine();
Console.WriteLine(sbContent.ToString());
Console.WriteLine();
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
}
DateTime endTime = DateTime.Now;
var timeTaken = endTime - startTime;
Console.WriteLine("End Time:" + endTime + "\t" + "Time-taken:" + (timeTaken));
sb.Append("\r\nEnd Time:" + endTime + "\t" + "Time-taken:" + (timeTaken));
try
{
//将转换的文字内容保存到本地
StreamWriter sw = new StreamWriter(Directory.GetCurrentDirectory() + $"\\{filename}_result.txt");
sw.Write(sb.ToString());
sw.Close();
}
catch (Exception ex)
{
Console.WriteLine("Write file failed!\t" + ex.Message);
}
Console.WriteLine();
Console.WriteLine("Enter any key to exit the program!");
Console.ReadKey();
}
PS:使用的命名空间有:
using System;
using Google.Apis.CloudSpeechAPI.v1beta1;
using Google.Apis.Auth.OAuth2;
using Google.Apis.Services;
using System.IO;
using System.Text;
using System.Threading;
using Google.Cloud.Storage.V1;
结果如下: