- 项目需要把语音转换成文字显示,于是去找语音识别的sdk。最后去看了百度和讯飞的语音识别。最后选了百度的语音识别,当然是因为免费?
- 【注:
- 1.在线语音识别支持识别任意词,离线语音识别仅支持命令词识别(语法模式)。
- 注:首次使用离线,SDK将会后台下载离线授权文件,成功后,授权文件有效期(三年)内无需联网。有效期即将结束后SDK将自动多次尝试联网更新证书)。
- 没有纯离线识别。只能离线识别固定短语
- 离线识别目前不支持任意语句。您可以预先定义好,下载bsg文件 http://yuyin.baidu.com/asr
- bds_easr_gramm.dat 文件件的内容替换成 自己定义的bsg 文件的内容
- 自定义短语越多效果越差,建议不超过100行
- 】
- 一。首先在百度语音开放平台注册,创建应用,生成API_KEY,SECRET_KEY和APP_ID
- 创建应用时的包名填工程的 Bundle identifier
- 二,下载SDK,先打开官方demo运行看看, 替换创建应用生成的API_KEY,SECRET_KEY和APP_ID
- 三,然后在自己项目集成,开发。 官方文档地址http://ai.baidu.com/docs#/ASR-iOS-SDK/top
- 1.导入所需资源 BDSClientLib BDSClientResource BDSClientHeaders
- 将开发包中BDSClientResource/ASR/BDSClientResources目录以“create folder references”方式添加到工程的资源Group中,注意使用"create groups"方式添加不能生效。
- 将开发包中BDSClientResource/ASR/BDSClientEASRResources目录以"create groups"方式添加到工程目录下即可,
- 2.添加framework
- libiconv.2.4.0.tbd libsqlite3.0.tbd 也要添加,亲试不添加会报错
- plist 访问麦克风权限设置NSMicrophoneUsageDescription -> 麦克风
- 3.导入头文件
- //如果只需要使用识别功能,只需要引入如下头文件
- #import "BDSEventManager.h" #import "BDSASRDefines.h" #import "BDSASRParameters.h"
- @property(nonatomic, strong) BDSEventManager *asrEventManager;
- // 创建语音识别对象
- self.asrEventManager = [BDSEventManager createEventManagerWithName:BDS_ASR_NAME];
- // 设置语音识别代理
- [self.asrEventManager setDelegate:self];
- // 参数配置:在线身份验证
- [self.asrEventManager setParameter:@[API_KEY, SECRET_KEY] forKey:BDS_ASR_API_SECRET_KEYS];
- //离线引擎身份验证 设置 APPID 离线授权所需APPCODE(APPID), 如使用该方式进行正式授权,请移除临时授权文件
- [self.asrEventManager setParameter:APP_ID forKey:BDS_ASR_OFFLINE_APP_CODE];
- //识别策略 @0 : @"在线识别", @4 : @"离在线并行"
- [self.asrEventManager setParameter:@(EVR_STRATEGY_ONLINE) forKey:BDS_ASR_STRATEGY];
- //识别语言 @0 : @"普通话", @1 : @"粤语", @2 : @"英文", @3 : @"四川话"
- [self.asrEventManager setParameter:@(EVoiceRecognitionLanguageChinese) forKey:BDS_ASR_LANGUAGE];
- //采样率 @"自适应", @"8K", @"16K"
- [self.asrEventManager setParameter:@(EVoiceRecognitionRecordSampleRateAuto) forKey:BDS_ASR_SAMPLE_RATE];
- //设置DEBUG_LOG的级别
- //[self.asrEventManager setParameter:@(EVRDebugLogLevelTrace) forKey:BDS_ASR_DEBUG_LOG_LEVEL]
- //是否启用长语音识别
- [self.asrEventManager setParameter:@(YES) forKey:BDS_ASR_ENABLE_LONG_SPEECH];
- //开启提示音 @0 : @"关闭", @(EVRPlayToneAll) : @"开启"}
- //使用长语音必须关闭提示音
- [self.asrEventManager setParameter:@(0) forKey:BDS_ASR_PLAY_TONE];
- //开启端点检测 {@NO : @"关闭", @YES : @"开启"} 使用长语音必须开启本地VAD
- //端点检测,即自动检测音频输入的起始点和结束点。SDK默认开启VAD,检测到静音后自动停止识别。
- //如果需要自行控制识别结束需关闭VAD,请同时关闭服务端VAD与端上VAD
- //[self.asrEventManager setParameter:@(YES) forKey:BDS_ASR_ENABLE_LOCAL_VAD];
- // 关闭服务端VAD
- [self.asrEventManager setParameter:@(NO) forKey:BDS_ASR_ENABLE_EARLY_RETURN];
- // 关闭本地VAD
- [self.asrEventManager setParameter:@(NO) forKey:BDS_ASR_ENABLE_LOCAL_VAD];
- //打开的话配置端点检测(二选一)
- //[self configModelVAD];//ModelVAD
- //[self configDNNMFE];//DNNMFE
- // ModelVAD端点检测方式 检测更加精准,抗噪能力强,响应速度较慢
- - (void)configModelVAD {
- NSString *modelVAD_filepath = [[NSBundle mainBundle] pathForResource:@"bds_easr_basic_model" ofType:@"dat"];
- //ModelVAD所需资源文件路径
- [self.asrEventManager setParameter:modelVAD_filepath forKey:BDS_ASR_MODEL_VAD_DAT_FILE];
- }
- //DNNMFE端点检测方式 提供基础检测功能,性能高,响应速度快
- - (void)configDNNMFE {
- //设置MFE模型文件
- NSString *mfe_dnn_filepath = [[NSBundle mainBundle] pathForResource:@"bds_easr_mfe_dnn" ofType:@"dat"];
- [self.asrEventManager setParameter:mfe_dnn_filepath forKey:BDS_ASR_MFE_DNN_DAT_FILE];
- //设置MFE CMVN文件路径
- NSString *cmvn_dnn_filepath = [[NSBundle mainBundle] pathForResource:@"bds_easr_mfe_cmvn" ofType:@"dat"];
- [self.asrEventManager setParameter:cmvn_dnn_filepath forKey:BDS_ASR_MFE_CMVN_DAT_FILE]
- //是否使用ModelVAD,打开需配置资源文件参数
- [self.asrEventManager setParameter:@(NO) forKey:BDS_ASR_ENABLE_MODEL_VAD];
- // MFE支持自定义静音时长
- // [self.asrEventManager setParameter:@(500.f) forKey:BDS_ASR_MFE_MAX_SPEECH_PAUSE];
- // [self.asrEventManager setParameter:@(500.f) forKey:BDS_ASR_MFE_MAX_WAIT_DURATION];
- }
- // 发送指令:启动识别
- [self.asrEventManager sendCommand:BDS_ASR_CMD_START];
- 【离在线并行
- //离在线并行识别
- // 参数设置:识别策略为离在线并行
- [self.asrEventManager setParameter:@(EVR_STRATEGY_BOTH) forKey:BDS_ASR_STRATEGY];
- // 参数设置:离线识别引擎类型 EVR_OFFLINE_ENGINE_INPUT 输入法模式 EVR_OFFLINE_ENGINE_GRAMMER 离线引 擎语法模式
- //离线语音识别仅支持命令词识别(语法模式)。
- //[self.asrEventManager setParameter:@(EVR_OFFLINE_ENGINE_INPUT) forKey:BDS_ASR_OFFLINE_ENGINE_TYPE];
- [self.asrEventManager setParameter:@(EVR_OFFLINE_ENGINE_GRAMMER) forKey:BDS_ASR_OFFLINE_ENGINE_TYPE];
- //并生成bsg文件。下载语法文件后,设置BDS_ASR_OFFLINE_ENGINE_GRAMMER_FILE_PATH参数
- NSString* gramm_filepath = [[NSBundle mainBundle] pathForResource:@"bds_easr_gramm" ofType:@"dat"];
- // 请在 (官网)[http://speech.baidu.com/asr] 参考模板定义语法,下载语法文件后,替换BDS_ASR_OFFLINE_ENGINE_GRAMMER_FILE_PATH参数
- [self.asrEventManager setParameter:gramm_filepath forKey:BDS_ASR_OFFLINE_ENGINE_GRAMMER_FILE_PATH];
- //离线识别资源文件路径
- NSString* lm_filepath = [[NSBundle mainBundle] pathForResource:@"bds_easr_basic_model" ofType:@"dat"];
- [self.asrEventManager setParameter:lm_filepath forKey:BDS_ASR_OFFLINE_ENGINE_DAT_FILE_PATH];
- //加载离线引擎
- [self.asrEventManager sendCommand:BDS_ASR_CMD_LOAD_ENGINE];
- 】
- //识别功能代理 BDSClientASRDelegate
- #pragma mark -- 语音识别状态、录音数据等回调均在此代理中发生
- - (void)VoiceRecognitionClientWorkStatus:(int)workStatus obj:(id)aObj{
- switch (workStatus) {
- case EVoiceRecognitionClientWorkStatusNewRecordData: {
- [self.fileHandler writeData:(NSData *)aObj];
- NSLog(@"录音数据回调");
- break;
- }
- case EVoiceRecognitionClientWorkStatusStartWorkIng: {
- NSLog(@"识别工作开始开始采集及处理数据");
- NSDictionary *logDic = [self parseLogToDic:aObj];
- [self printLogTextView:[NSString stringWithFormat:@"开始识别-log: %@\n", logDic]];
- break;
- }
- case EVoiceRecognitionClientWorkStatusStart: {
- NSLog(@"检测到用户开始说话");
- [self printLogTextView:@"检测到用户开始说话.\n"];
- break;
- }
- case EVoiceRecognitionClientWorkStatusEnd: {
- NSLog(@"用户说话完成,但服务器尚未返回结果");
- [self printLogTextView:@"用户说话完成,但服务器尚未返回结果.\n"];
- self.contentTextView.text = @"无识别结果";
- break;
- }
- case EVoiceRecognitionClientWorkStatusFlushData: {
- // 逐句显示。配合连续上屏的中间结果,可以进一步 升语音输入的体验
- 该状态值表示服务器返回了中间结果,如果想要将中间结果展示给用户(形成连续上屏的效果),
- // 可以利用与该状态同时返回的数据,每当接到新的该类消息应当清空显示区域的文字以免重复
- NSLog(@"逐句显示");
- [self printLogTextView:[NSString stringWithFormat:@"服务器返回了中间结 - %@.\n\n", [self getDescriptionForDic:aObj]]];
- self.contentTextView.text = @"";
- NSArray *contentArr = aObj[@"results_recognition"];
- NSString *contentStr = contentArr[0];
- self.contentTextView.text = contentStr;
- break;
- }
- case EVoiceRecognitionClientWorkStatusFinish: {
- 该状态值表示语音识别服务器返回了最终结果,结果以数组的形式保存在 aObj 对象中
- // 接受到该消息时应当清空显示区域的文字以免重复
- NSLog(@"返回了最终结果");
- /*
- "origin_result" = {
- "corpus_no" = 6643061564690340286;
- "err_no" = 0;
- result = {
- word = (
- "\U597d\U7684"
- );
- };
- sn = "5EEAC770-DDD2-4D35-8ABF-F407276A7934";
- "voice_energy" = "29160.45703125";
- };
- "results_recognition" = (
- "\U597d\U7684"
- );
- */
- [self printLogTextView:[NSString stringWithFormat:@"最终结果 - %@.\n", [self getDescriptionForDic:aObj]]];
- if (aObj) {
- // NSArray *contentArr = aObj[@"results_recognition"];
- // NSString *contentStr = contentArr[0];
- // NSLog(@"contentStr = %@",contentStr);
- self.contentTextView.text = [self getDescriptionForDic:aObj];
- }
- break;
- }
- case EVoiceRecognitionClientWorkStatusMeterLevel: {
- NSLog(@"当前音量回调");
- break;
- }
- case EVoiceRecognitionClientWorkStatusCancel: {
- NSLog(@"用户主动取消");
- [self printLogTextView:@"用户主动取消.\n"];
- break;
- }
- case EVoiceRecognitionClientWorkStatusError: {
- // 错误状态 没有语音输入
- NSLog(@"错误状态");
- NSError * error = (NSError *)aObj;
- if (error.code == 2228236) {
- 离线引擎错误状态:
- //识别失败,无法识别。(语法模式下,可能为语音不在自定义的语法规则之下)
- [self printLogTextView:[NSString stringWithFormat:@"错误状态 -语法模式下,可能为语音不在自定义的语法规则之下\n %@.\n", (NSError *)aObj]];
- }else if (error.code == 2228230){
- [self printLogTextView:[NSString stringWithFormat:@"错误状态 -dat模型文件不可用,请设置 BDS_ASR_OFFLINE_ENGINE_DAT_FILE_PATH\n %@.\n", (NSError *)aObj]];
- }else if (error.code == 2228231){
- [self printLogTextView:[NSString stringWithFormat:@"错误状态 -grammar文件无效,请设置 BDS_ASR_OFFLINE_ENGINE_GRAMMER_FILE_PATH\n %@.\n", (NSError *)aObj]];
- }else if (error.code == 2225219){
- [self printLogTextView:[NSString stringWithFormat:@"错误状态 -音频质量过低,无法识别\n %@.\n", (NSError *)aObj]];
- }else{
- [self printLogTextView:[NSString stringWithFormat:@"错误状态 - %@.\n", (NSError *)aObj]];
- }
- break;
- }
- case EVoiceRecognitionClientWorkStatusLoaded: {
- NSLog(@"离线引擎加载完成");
- [self printLogTextView:@"离线引擎加载完成.\n"];
- break;
- }
- case EVoiceRecognitionClientWorkStatusUnLoaded: {
- NSLog(@"离线引擎卸载完成");
- [self printLogTextView:@"离线引擎卸载完成.\n"];
- break;
- }
- case EVoiceRecognitionClientWorkStatusChunkThirdData: {
- NSLog(@"识别结果中的第三方数据");
- [self printLogTextView:[NSString stringWithFormat:@"识别结果中的第三方数据: %lu\n", (unsigned long)[(NSData *)aObj length]]];
- break;
- }
- case EVoiceRecognitionClientWorkStatusChunkNlu: {
- NSLog(@"别结果中的语义结果");
- NSString *nlu = [[NSString alloc] initWithData:(NSData *)aObj encoding:NSUTF8StringEncoding];
- [self printLogTextView:[NSString stringWithFormat:@"识别结果中的语义结果: %@\n", nlu]];
- NSLog(@"%@", nlu);
- break;
- }
- case EVoiceRecognitionClientWorkStatusChunkEnd: {
- NSLog(@"识别过程结束");
- [self printLogTextView:[NSString stringWithFormat:@"识别过程结束, sn: %@.\n", aObj]];
- break;
- }
- case EVoiceRecognitionClientWorkStatusFeedback: {
- NSLog(@"识别过程反馈的打点数据");
- NSDictionary *logDic = [self parseLogToDic:aObj];
- [self printLogTextView:[NSString stringWithFormat:@"识别过程反馈的打点数据: %@\n", logDic]];
- break;
- }
- case EVoiceRecognitionClientWorkStatusRecorderEnd: {
- //录音机关闭,页面跳转需检测此时间,规避状态条 (iOS)
- NSLog(@"录音机关闭");
- [self printLogTextView:@"录音机关闭.\n"];
- break;
- }
- case EVoiceRecognitionClientWorkStatusLongSpeechEnd: {
- NSLog(@"长语音结束状态");
- [self printLogTextView:@"长语音结束状态.\n"];
- break;
- }
- default:
- break;
- }
- }
- 语音合成同样
- 如果同时需要语音识别和语音合成需要将两个.a文件其中的一个换个名字重新导入即可
- 1.导入语音合成所需头文件#import "BDSSpeechSynthesizer.h" #import <AVFoundation/AVFoundation.h>
- // 设置apiKey和secretKey
- [[BDSSpeechSynthesizer sharedInstance] setApiKey:API_KEY withSecretKey:SECRET_KEY];
- [BDSSpeechSynthesizer setLogLevel:BDS_PUBLIC_LOG_VERBOSE];
- // 设置委托对象
- [[BDSSpeechSynthesizer sharedInstance] setSynthesizerDelegate:self];
- [[AVAudioSession sharedInstance] setCategory:AVAudioSessionCategoryPlayback error:nil];
- // //configureOfflineTTS
- // //声学模型数据文件路径
- // NSString *ChineseSpeechData = [[NSBundle mainBundle] pathForResource:@"Chinese_Speech_Female" ofType:@"dat"];
- // //NSString *EnglishSpeechData = [[NSBundle mainBundle] pathForResource:@"English_Speech_Female" ofType:@"dat"];
- // //文本分析数据文件路径
- // NSString *ChineseTextData = [[NSBundle mainBundle] pathForResource:@"Chinese_text" ofType:@"dat"];
- // // NSString *EnglishTextData = [[NSBundle mainBundle] pathForResource:@"English_text" ofType:@"dat"];
- // //授权文件路径,如果没有本地授权可传入nil
- // NSString *LicenseData = [[NSBundle mainBundle] pathForResource:@"bdtts_license" ofType:@"dat"];
- //
- // //启动合成引擎
- // BDSErrEngine loadErr = [[BDSSpeechSynthesizer sharedInstance] startTTSEngine:ChineseTextData speechDataPath:ChineseSpeechData licenseFilePath:LicenseData withAppCode:APP_ID];
- //
- // if(loadErr != 0)
- // {
- // // 处理出错状况
- // }
- //批量开始文本合成但不朗读或添加文本至当前合成过程
- //(NSInteger)synthesizeSentence:(NSString*)sentence withError:
- // 开始合成并播放
- // NSError* speakError = nil;
- // if([[BDSSpeechSynthesizer sharedInstance] speakSentence:@"您好添加文本至当前合成过程" withError:&speakError] == -1){
- // // 错误
- // NSLog(@"错误: %ld, %@", (long)speakError.code, speakError.localizedDescription);
- // }
- // if([[BDSSpeechSynthesizer sharedInstance] speakSentence:@"今天天气真不错" withError:&speakError] == -1){
- // // 错误
- // NSLog(@"错误: %ld, %@", (long)speakError.code, speakError.localizedDescription);
- // }
- // if([[BDSSpeechSynthesizer sharedInstance] speakSentence:@"Today's weather is really good!" withError:&speakError] == -1){
- // // 错误
- // NSLog(@"错误: %ld, %@", (long)speakError.code, speakError.localizedDescription);
- // }
- // //设置播放器音量
- // //[[BDSSpeechSynthesizer sharedInstance] setPlayerVolume:5];
- #pragma mark -- BDSSpeechSynthesizerDelegate
- - (void)synthesizerStartWorkingSentence:(NSInteger)SynthesizeSentence
- {
- NSLog(@"合成器开始工作 %ld", (long)SynthesizeSentence);
- }
- - (void)synthesizerFinishWorkingSentence:(NSInteger)SynthesizeSentence
- {
- NSLog(@"合成器结束工作 %ld", (long)SynthesizeSentence);
- }
- - (void)synthesizerSpeechStartSentence:(NSInteger)SpeakSentence
- {
- NSLog(@"开始朗读 %ld", (long)SpeakSentence);
- }
- - (void)synthesizerSpeechEndSentence:(NSInteger)SpeakSentence
- {
- NSLog(@"结束朗读 %ld", (long)SpeakSentence);
- }
- -(void)speak{
- // 开始合成并播放
- NSError* speakError = nil;
- if([[BDSSpeechSynthesizer sharedInstance] speakSentence:@"您好添加文本至当前合成过程" withError:&speakError] == -1){
- // 错误
- NSLog(@"错误: %ld, %@", (long)speakError.code, speakError.localizedDescription);
- }
- }
- -(void)cancel{
- //取消本次合成并停止朗读 BDSSynthesizerStatus cancel
- [[BDSSpeechSynthesizer sharedInstance] cancel];
- }
- -(void)pause:(UIButton *)btn{
- //pause 暂停文本合成并朗读
- //resume 继续文本合成并朗读
- if([[BDSSpeechSynthesizer sharedInstance] synthesizerStatus] == BDS_SYNTHESIZER_STATUS_PAUSED){
- //如果是暂停
- [[BDSSpeechSynthesizer sharedInstance] resume];//继续播放
- }else if([[BDSSpeechSynthesizer sharedInstance] synthesizerStatus] == BDS_SYNTHESIZER_STATUS_WORKING){
- //正在播放
- [[BDSSpeechSynthesizer sharedInstance] pause];//暂停播放
- }else{
- }
- }
iOS 百度语音识别(在线识别和离在线并行识别) 和语音合成
最新推荐文章于 2024-04-18 17:07:41 发布