最近在项目开发中,需要将语音识别转换成文本的功能。研究了下科大讯飞,附上Demo分享给大家。
研发前先得做一些准备。
1、注册科大讯飞开发者帐号(http://www.xfyun.cn)
2、下载开发平台(iOS、或Android,或其他)所需要的SDK(SDK包含:说明文档、SDK即iflyMSC.framework、Demo)
3、项目中添加SDK(添加时,先将SDK复制粘贴到项目文件,再通过addframe的方法添加到项目引用),及相关联的framework
添加方法:TARGETS-Build Phases-Link Binary With Libraries-"+"-Choose frameworks and libraries to add-add other,或选择对应的framework-add
4、使用时要添加对应的头文件
特别说明:
1、使用SDK关联的APPID存在于下载的Demo中,如果SDK有替换的话APPID应该跟着一起替换。
2、在使用前,务必在AppDelegate的方法中"
- (BOOL)application:(UIApplication *)application didFinishLaunchingWithOptions:(NSDictionary *)launchOptions {}"进行初始化操作。
3、需要有网络的情况下才能使用。
如图
下载的科大讯飞SDK文件
Demo中的APPID
添加SDK
添加关联framework
语音转文件实现代码
- <span style="font-size:14px;">.h文件
- #import <Foundation/Foundation.h>
- // 导入头文件
- #import "iflyMSC.framework/Headers/IFlyMSC.h"
- #import "iflyMSC.framework/Headers/IFlySpeechUtility.h"
- #import "iflyMSC/IFlySpeechConstant.h"
- #pragma mark - 初始化参数类
- /**************************************************************************/
- @interface IATConfig : NSObject
- + (IATConfig *)sharedInstance;
- + (NSString *)mandarin;
- + (NSString *)cantonese;
- + (NSString *)henanese;
- + (NSString *)chinese;
- + (NSString *)english;
- + (NSString *)lowSampleRate;
- + (NSString *)highSampleRate;
- + (NSString *)isDot;
- + (NSString *)noDot;
- /**
- 以下参数,需要通过 iFlySpeechRecgonizer 进行设置
- */
- @property (nonatomic, strong) NSString *speechTimeout;
- @property (nonatomic, strong) NSString *vadEos;
- @property (nonatomic, strong) NSString *vadBos;
- @property (nonatomic, strong) NSString *language;
- @property (nonatomic, strong) NSString *accent;
- @property (nonatomic, strong) NSString *dot;
- @property (nonatomic, strong) NSString *sampleRate;
- /**
- 以下参数无需设置 不必关
- */
- @property (nonatomic, assign) BOOL haveView;
- @property (nonatomic, strong) NSArray *accentIdentifer;
- @property (nonatomic, strong) NSArray *accentNickName;
- @end
- /**************************************************************************/
- #pragma mark - 语音听写类
- @interface VoiceConversion : NSObject
- /// 启动初始化语音程序
- + (void)VoiceInitialize;
- /// 开始录音
- - (void)voiceStart:(void (^)(BOOL isStart))startListening speechBegin:(void (^)(void))begin speechEnd:(void (^)(void))end speechError:(void (^)(BOOL isSuccess))error speechResult:(void (^)(NSString *text))result speechVolume:(void (^)(int volume))volume;
- /// 取消录音
- - (void)voiceCancel;
- /// 停止录音
- - (void)voiceStop;
- @end
- </span>
- <span style="font-size:14px;">.m文件
- #import "VoiceConversion.h"
- #pragma mark - 初始化参数类
- /**************************************************************************/
- static NSString *const PUTONGHUA = @"mandarin";
- static NSString *const YUEYU = @"cantonese";
- static NSString *const HENANHUA = @"henanese";
- static NSString *const ENGLISH = @"en_us";
- static NSString *const CHINESE = @"zh_cn";
- @implementation IATConfig
- - (id)init
- {
- self = [super init];
- if (self)
- {
- [self defaultSetting];
- return self;
- }
- return nil;
- }
- + (IATConfig *)sharedInstance
- {
- static IATConfig * instance = nil;
- static dispatch_once_t predict;
- dispatch_once(&predict, ^{
- instance = [[IATConfig alloc] init];
- });
- return instance;
- }
- - (void)defaultSetting
- {
- _speechTimeout = @"30000";
- _vadEos = @"3000";
- _vadBos = @"3000";
- _dot = @"1";
- _sampleRate = @"16000";
- _language = CHINESE;
- _accent = PUTONGHUA;
- _haveView = NO;//默认是不dai界面的
- _accentNickName = [[NSArray alloc] initWithObjects:@"粤语", @"普通话", @"河南话", @"英文", nil nil];
- }
- + (NSString *)mandarin
- {
- return PUTONGHUA;
- }
- + (NSString *)cantonese
- {
- return YUEYU;
- }
- + (NSString *)henanese
- {
- return HENANHUA;
- }
- + (NSString *)chinese
- {
- return CHINESE;
- }
- + (NSString *)english
- {
- return ENGLISH;
- }
- + (NSString *)lowSampleRate
- {
- return @"8000";
- }
- + (NSString *)highSampleRate
- {
- return @"16000";
- }
- + (NSString *)isDot
- {
- return @"1";
- }
- + (NSString *)noDot
- {
- return @"0";
- }
- @end
- /**************************************************************************/
- #pragma mark - 语音听写类
- static NSString *const VoiceAPPID = @"572016e4";
- static NSString *const VoiceTimeOut = @"20000";
- @interface VoiceConversion () <IFlySpeechRecognizerDelegate>
- @property (nonatomic, strong) NSMutableString *resultText;
- @property (nonatomic, strong) IFlySpeechRecognizer *iFlySpeechRecognizer;
- @property (nonatomic, copy) void (^beginSpeech)(void);
- @property (nonatomic, copy) void (^endSpeech)(void);
- @property (nonatomic, copy) void (^errorSpeech)(BOOL isSuccess);
- @property (nonatomic, copy) void (^resultSpeech)(NSString *text);
- @property (nonatomic, copy) void (^volumeSpeech)(int volume);
- @end
- @implementation VoiceConversion
- #pragma mark 初始化------------
- /// 启动初始化语音程序
- + (void)VoiceInitialize
- {
- // 设置sdk的log等级,log保存在下面设置的工作路径中
- [IFlySetting setLogFile:LVL_ALL];
- // 打开输出在console的log开关
- [IFlySetting showLogcat:YES];
- // 设置sdk的工作路径
- NSArray *paths = NSSearchPathForDirectoriesInDomains(NSCachesDirectory, NSUserDomainMask, YES);
- NSString *cachePath = [paths objectAtIndex:0];
- [IFlySetting setLogFilePath:cachePath];
- // Appid是应用的身份信息,具有唯一性,初始化时必须要传入Appid。初始化是一个异步过程,可放在 App 启动时执行初始化,具体代码可以参 照 Demo 的 MSCAppDelegate.m。未初始化时使用服务,一般会返回错误码 10111.
- NSString *initString = [[NSString alloc] initWithFormat:@"appid=%@", VoiceAPPID];
- [IFlySpeechUtility createUtility:initString];
- }
- #pragma mark 实例化------------
- - (void)dealloc
- {
- [self voiceCancel];
- }
- - (NSMutableString *)resultText
- {
- if (!_resultText)
- {
- _resultText = [[NSMutableString alloc] init];
- }
- return _resultText;
- }
- - (IFlySpeechRecognizer *)iFlySpeechRecognizer
- {
- if (_iFlySpeechRecognizer == nil)
- {
- _iFlySpeechRecognizer = [IFlySpeechRecognizer sharedInstance];
- [_iFlySpeechRecognizer setParameter:@"" forKey:[IFlySpeechConstant PARAMS]];
- // 设置听写模式
- [_iFlySpeechRecognizer setParameter:@"iat" forKey:[IFlySpeechConstant IFLY_DOMAIN]];
- }
- return _iFlySpeechRecognizer;
- }
- - (void)initializeVoice
- {
- self.iFlySpeechRecognizer.delegate = self;
- IATConfig *instance = [IATConfig sharedInstance];
- // 设置最长录音时间
- [self.iFlySpeechRecognizer setParameter:instance.speechTimeout forKey:[IFlySpeechConstant SPEECH_TIMEOUT]];
- // 设置后端点
- [self.iFlySpeechRecognizer setParameter:instance.vadEos forKey:[IFlySpeechConstant VAD_EOS]];
- // 设置前端点
- [self.iFlySpeechRecognizer setParameter:instance.vadBos forKey:[IFlySpeechConstant VAD_BOS]];
- // 网络等待时间
- [self.iFlySpeechRecognizer setParameter:@"20000" forKey:[IFlySpeechConstant NET_TIMEOUT]];
- // 设置采样率,推荐使用16K
- [self.iFlySpeechRecognizer setParameter:instance.sampleRate forKey:[IFlySpeechConstant SAMPLE_RATE]];
- if ([instance.language isEqualToString:[IATConfig chinese]])
- {
- // 设置语言
- [self.iFlySpeechRecognizer setParameter:instance.language forKey:[IFlySpeechConstant LANGUAGE]];
- // 设置方言
- [self.iFlySpeechRecognizer setParameter:instance.accent forKey:[IFlySpeechConstant ACCENT]];
- }
- else if ([instance.language isEqualToString:[IATConfig english]])
- {
- [self.iFlySpeechRecognizer setParameter:instance.language forKey:[IFlySpeechConstant LANGUAGE]];
- }
- // 设置是否返回标点符号
- [self.iFlySpeechRecognizer setParameter:instance.dot forKey:[IFlySpeechConstant ASR_PTT]];
- }
- #pragma mark 语音听写方法------------
- /// 开始录音
- - (void)voiceStart:(void (^)(BOOL isStart))startListening speechBegin:(void (^)(void))begin speechEnd:(void (^)(void))end speechError:(void (^)(BOOL isSuccess))error speechResult:(void (^)(NSString *text))result speechVolume:(void (^)(int volume))volume
- {
- [self.resultText setString:@""];
- // 回调设置
- self.beginSpeech = [begin copy];
- self.endSpeech = [end copy];
- self.errorSpeech = [error copy];
- self.resultSpeech = [result copy];
- self.volumeSpeech = [volume copy];
- // 初始化设置
- [self initializeVoice];
- [self.iFlySpeechRecognizer cancel];
- // 设置音频来源为麦克风
- [self.iFlySpeechRecognizer setParameter:IFLY_AUDIO_SOURCE_MIC forKey:@"audio_source"];
- // 设置听写结果格式为json
- [self.iFlySpeechRecognizer setParameter:@"json" forKey:[IFlySpeechConstant RESULT_TYPE]];
- // 保存录音文件,保存在sdk工作路径中,如未设置工作路径,则默认保存在library/cache下
- [self.iFlySpeechRecognizer setParameter:@"asr.pcm" forKey:[IFlySpeechConstant ASR_AUDIO_PATH]];
- BOOL isStart = [self.iFlySpeechRecognizer startListening];
- if (startListening)
- {
- // 如果开始录音失败,可能是上次请求未结束,暂不支持多路并发
- startListening(isStart);
- }
- }
- /// 取消听写
- - (void)voiceCancel
- {
- [self.iFlySpeechRecognizer cancel];
- }
- /// 停止录音
- - (void)voiceStop
- {
- [self.iFlySpeechRecognizer stopListening];
- }
- #pragma mark IFlySpeechRecognizerDelegate------------
- /**
- 识别结果返回代理
- @param :results识别结果
- @ param :isLast 表示是否最后一次结果
- */
- - (void)onResults:(NSArray *)results isLast:(BOOL)isLast
- {
- NSMutableString *resultString = [[NSMutableString alloc] init];
- NSDictionary *dic = results[0];
- for (NSString *key in dic)
- {
- [resultString appendFormat:@"%@",key];
- }
- NSString *resultFromJson = [[self class] stringFromJson:resultString];
- NSString *resultTextTemp = [NSString stringWithFormat:@"%@%@", self.resultText, resultFromJson];
- [self.resultText setString:resultTextTemp];
- if (self.resultSpeech)
- {
- self.resultSpeech(self.resultText);
- }
- }
- /**
- 识别会话结束返回代理
- @ param error 错误码,error.errorCode=0表示正常结束,非0表示发生错误。
- */
- - (void)onError:(IFlySpeechError *)error
- {
- if (self.errorSpeech)
- {
- BOOL isSuccess = (0 == error.errorCode);
- self.errorSpeech(isSuccess);
- }
- }
- /**
- 停止录音回调
- */
- - (void)onEndOfSpeech
- {
- if (self.endSpeech)
- {
- self.endSpeech();
- }
- }
- /**
- 开始识别回调
- */
- - (void)onBeginOfSpeech
- {
- if (self.beginSpeech)
- {
- self.beginSpeech();
- }
- }
- /**
- 音量回调函数 volume 0-30
- */
- - (void)onVolumeChanged:(int)volume
- {
- if (self.volumeSpeech)
- {
- self.volumeSpeech(volume);
- }
- }
- #pragma mark 解析方法------------
- /**************************************************************************/
- /**
- 解析命令词返回的结果
- */
- + (NSString *)stringFromAsr:(NSString *)params;
- {
- NSMutableString * resultString = [[NSMutableString alloc] init];
- NSString *inputString = nil;
- NSArray *array = [params componentsSeparatedByString:@"\n"];
- for (int index = 0; index < array.count; index++)
- {
- NSRange range;
- NSString *line = [array objectAtIndex:index];
- NSRange idRange = [line rangeOfString:@"id="];
- NSRange nameRange = [line rangeOfString:@"name="];
- NSRange confidenceRange = [line rangeOfString:@"confidence="];
- NSRange grammarRange = [line rangeOfString:@" grammar="];
- NSRange inputRange = [line rangeOfString:@"input="];
- if (confidenceRange.length == 0 || grammarRange.length == 0 || inputRange.length == 0 )
- {
- continue;
- }
- // check nomatch
- if (idRange.length != 0)
- {
- NSUInteger idPosX = idRange.location + idRange.length;
- NSUInteger idLength = nameRange.location - idPosX;
- range = NSMakeRange(idPosX, idLength);
- NSString *subString = [line substringWithRange:range];
- NSCharacterSet *subSet = [NSCharacterSet whitespaceAndNewlineCharacterSet];
- NSString *idValue = [subString stringByTrimmingCharactersInSet:subSet];
- if ([idValue isEqualToString:@"nomatch"])
- {
- return @"";
- }
- }
- // Get Confidence Value
- NSUInteger confidencePosX = confidenceRange.location + confidenceRange.length;
- NSUInteger confidenceLength = grammarRange.location - confidencePosX;
- range = NSMakeRange(confidencePosX,confidenceLength);
- NSString *score = [line substringWithRange:range];
- NSUInteger inputStringPosX = inputRange.location + inputRange.length;
- NSUInteger inputStringLength = line.length - inputStringPosX;
- range = NSMakeRange(inputStringPosX , inputStringLength);
- inputString = [line substringWithRange:range];
- [resultString appendFormat:@"%@ 置信度%@\n",inputString, score];
- }
- return resultString;
- }
- /**
- 解析听写json格式的数据
- params例如:
- {"sn":1,"ls":true,"bg":0,"ed":0,"ws":[{"bg":0,"cw":[{"w":"白日","sc":0}]},{"bg":0,"cw":[{"w":"依山","sc":0}]},{"bg":0,"cw":[{"w":"尽","sc":0}]},{"bg":0,"cw":[{"w":"黄河入海流","sc":0}]},{"bg":0,"cw":[{"w":"。","sc":0}]}]}
- */
- + (NSString *)stringFromJson:(NSString *)params
- {
- if (params == NULL)
- {
- return nil;
- }
- NSMutableString *tempStr = [[NSMutableString alloc] init];
- // 返回的格式必须为utf8的,否则发生未知错误
- NSData *dataJSON = [params dataUsingEncoding:NSUTF8StringEncoding];
- NSDictionary *resultDic = [NSJSONSerialization JSONObjectWithData:dataJSON options:kNilOptions error:nil];
- if (resultDic != nil)
- {
- NSArray *wordArray = [resultDic objectForKey:@"ws"];
- for (int i = 0; i < [wordArray count]; i++)
- {
- NSDictionary *wsDic = [wordArray objectAtIndex:i];
- NSArray *cwArray = [wsDic objectForKey:@"cw"];
- for (int j = 0; j < [cwArray count]; j++)
- {
- NSDictionary *wDic = [cwArray objectAtIndex:j];
- NSString *str = [wDic objectForKey:@"w"];
- [tempStr appendString: str];
- }
- }
- }
- return tempStr;
- }
- /**
- 解析语法识别返回的结果
- */
- + (NSString *)stringFromABNFJson:(NSString *)params
- {
- if (params == NULL)
- {
- return nil;
- }
- NSMutableString *tempStr = [[NSMutableString alloc] init];
- NSData *dataJSON = [params dataUsingEncoding:NSUTF8StringEncoding];
- NSDictionary *resultDic = [NSJSONSerialization JSONObjectWithData:dataJSON options:kNilOptions error:nil];
- NSArray *wordArray = [resultDic objectForKey:@"ws"];
- for (int i = 0; i < [wordArray count]; i++)
- {
- NSDictionary *wsDic = [wordArray objectAtIndex:i];
- NSArray *cwArray = [wsDic objectForKey:@"cw"];
- for (int j = 0; j < [cwArray count]; j++)
- {
- NSDictionary *wDic = [cwArray objectAtIndex:j];
- NSString *str = [wDic objectForKey:@"w"];
- NSString *score = [wDic objectForKey:@"sc"];
- [tempStr appendString: str];
- [tempStr appendFormat:@" 置信度:%@",score];
- [tempStr appendString: @"\n"];
- }
- }
- return tempStr;
- }
- /**************************************************************************/
- @end
- </span>
使用
- <span style="font-size:14px;">初始化方法
- /// 启动初始化语音程序
- + (void)VoiceInitialize
- {
- // 设置sdk的log等级,log保存在下面设置的工作路径中
- [IFlySetting setLogFile:LVL_ALL];
- // 打开输出在console的log开关
- [IFlySetting showLogcat:YES];
- // 设置sdk的工作路径
- NSArray *paths = NSSearchPathForDirectoriesInDomains(NSCachesDirectory, NSUserDomainMask, YES);
- NSString *cachePath = [paths objectAtIndex:0];
- [IFlySetting setLogFilePath:cachePath];
- // Appid是应用的身份信息,具有唯一性,初始化时必须要传入Appid。初始化是一个异步过程,可放在 App 启动时执行初始化,具体代码可以参 照 Demo 的 MSCAppDelegate.m。未初始化时使用服务,一般会返回错误码 10111.
- NSString *initString = [[NSString alloc] initWithFormat:@"appid=%@", VoiceAPPID];
- [IFlySpeechUtility createUtility:initString];
- }
- 初始化调用
- - (BOOL)application:(UIApplication *)application didFinishLaunchingWithOptions:(NSDictionary *)launchOptions
- {
- // Override point for customization after application launch.
- [VoiceConversion VoiceInitialize];
- return YES;
- }</span>
- <span style="font-size:14px;">#import "VoiceConversion.h"
- @interface ViewController ()
- @property (nonatomic, strong) VoiceConversion *voiceConversion;
- @property (nonatomic, strong) UILabel *messageLabel;
- @end
- @implementation ViewController
- - (void)viewDidLoad {
- [super viewDidLoad];
- // Do any additional setup after loading the view, typically from a nib.
- UIBarButtonItem *startItem = [[UIBarButtonItem alloc] initWithTitle:@"start" style:UIBarButtonItemStyleDone target:self action:@selector(startItemClick:)];
- UIBarButtonItem *stopItem = [[UIBarButtonItem alloc] initWithTitle:@"stop" style:UIBarButtonItemStyleDone target:self action:@selector(stopItemClick:)];
- UIBarButtonItem *cancelItem = [[UIBarButtonItem alloc] initWithTitle:@"cancel" style:UIBarButtonItemStyleDone target:self action:@selector(cancelItemClick:)];
- self.navigationItem.rightBarButtonItems = @[startItem, stopItem, cancelItem];
- self.title = @"科大讯飞语音";
- [self setUI];
- }
- - (void)didReceiveMemoryWarning {
- [super didReceiveMemoryWarning];
- // Dispose of any resources that can be recreated.
- }
- #pragma mark - 视图
- - (void)setUI
- {
- if ([self respondsToSelector:@selector(setEdgesForExtendedLayout:)])
- {
- [self setEdgesForExtendedLayout:UIRectEdgeNone];
- }
- self.messageLabel = [[UILabel alloc] initWithFrame:CGRectMake(10.0, 10.0, CGRectGetWidth(self.view.bounds) - 10.0 * 2, 40.0)];
- [self.view addSubview:self.messageLabel];
- self.messageLabel.backgroundColor = [UIColor colorWithWhite:0.5 alpha:0.3];
- self.messageLabel.textAlignment = NSTextAlignmentCenter;
- }
- #pragma mark - 响应
- - (void)startItemClick:(UIBarButtonItem *)item
- {
- ViewController __weak *weakSelf = self;
- [self.voiceConversion voiceStart:^(BOOL isStart) {
- NSLog(@"1 start");
- if (isStart)
- {
- weakSelf.messageLabel.text = @"正在录音";
- }
- else
- {
- weakSelf.messageLabel.text = @"启动识别服务失败,请稍后重试";
- }
- } speechBegin:^{
- NSLog(@"2 begin");
- } speechEnd:^{
- NSLog(@"3 end");
- } speechError:^(BOOL isSuccess) {
- NSLog(@"4 error");
- } speechResult:^(NSString *text) {
- NSLog(@"5 result");
- weakSelf.messageLabel.text = text;
- } speechVolume:^(int volume) {
- NSLog(@"6 volume");
- NSString *volumeString = [NSString stringWithFormat:@"音量:%d", volume];
- weakSelf.messageLabel.text = volumeString;
- }];
- }
- - (void)stopItemClick:(UIBarButtonItem *)item
- {
- [self.voiceConversion voiceStop];
- self.messageLabel.text = @"停止录音";
- }
- - (void)cancelItemClick:(UIBarButtonItem *)item
- {
- [self.voiceConversion voiceCancel];
- self.messageLabel.text = @"取消识别";
- }
- #pragma mark - getter
- - (VoiceConversion *)voiceConversion
- {
- if (!_voiceConversion)
- {
- _voiceConversion = [[VoiceConversion alloc] init];
- }
- return _voiceConversion;
- }
- @end
- </span>
-
顶
- 1