iOS 之简单语音听写功能

我用到的语音听写功能主要是集成讯飞的sdk完成功能,现在主要写一下步骤,以防自己以后会忘记

主要的步骤就是:在讯飞开发者网站注册一个账号——创建iOS应用——获取一个appID——下载SDK,http://www.xfyun.cn/doccenter/iOS
根据开发文档的描写添加辅助库  demo的下载地址:https://github.com/tuwanli/VoiceIdentify

注:添加 iflyMSC.framework 时,请注意工程 BuildSetting中的framwork path的设置,如果出现找不到framework的情况,可以将path 清空,在 xcode 中删除 framework,然后重新添加。
如果使用了离线识别,还需要增加 libc++.dylib。 

如果你出现找不到头文件的错误那大概就是因为sdk的路径出现了问题

看主要代码:
ViewController.h
<span style="font-size:18px;">//
//  ViewController.h
//  VoiceIdentify
//
//  Created by 涂婉丽 on 16/1/22.
//  Copyright © 2016年 涂婉丽. All rights reserved.
//

#import <UIKit/UIKit.h>
#import "iflyMSC/iflyMSC.h"
@class IFlyDataUploader;
@class IFlySpeechRecognizer;
@interface ViewController : UIViewController<IFlySpeechRecognizerDelegate,IFlyRecognizerViewDelegate,UIActionSheetDelegate>
@property (nonatomic, strong) NSString *pcmFilePath;//音频文件路径
@property (nonatomic, strong) IFlySpeechRecognizer *iFlySpeechRecognizer;//不带界面的识别对象
@property (nonatomic, strong) IFlyRecognizerView *iflyRecognizerView;//带界面的识别对象
@property (nonatomic, strong) IFlyDataUploader *uploader;//数据上传对象
- (IBAction)resignizeAction;
@property (unsafe_unretained, nonatomic) IBOutlet UITextView *textView;
@property (nonatomic, strong) NSString * result;
@end</span>

ViewController.m

//  ViewController.m
//  VoiceIdentify
//
//  Created by 涂婉丽 on 16/1/22.
//  Copyright © 2016年 涂婉丽. All rights reserved.
//

#import "ViewController.h"
#import <QuartzCore/QuartzCore.h>
#import "ISRDataHelper.h"
#define APPID_VALUE           @"56a1b495"
#define TIMEOUT_VALUE         @"20000"

#import "iflyMSC/IFlyMSC.h"
@interface ViewController ()

@end

@implementation ViewController

- (void)viewDidLoad {
    [super viewDidLoad];
    
    self.uploader = [[IFlyDataUploader alloc] init];
    
    //demo录音文件保存路径
    NSArray *paths = NSSearchPathForDirectoriesInDomains(NSCachesDirectory, NSUserDomainMask, YES);
    NSString *cachePath = [paths objectAtIndex:0];
    _pcmFilePath = [[NSString alloc] initWithFormat:@"%@",[cachePath stringByAppendingPathComponent:@"asr.pcm"]];
    _textView.clipsToBounds = YES;
    _textView.layer.cornerRadius = 5;
    _textView.layer.borderColor=[UIColor lightGrayColor].CGColor;
    _textView.layer.borderWidth = 0.5;
}
- (void)viewWillAppear:(BOOL)animated
{

    [super viewWillAppear:animated];
    [self initRecognizer ];
}
- (void)didReceiveMemoryWarning {
    [super didReceiveMemoryWarning];
    // Dispose of any resources that can be recreated.
}

- (IBAction)resignizeAction {
//    if(_iflyRecognizerView == nil)
//    {
//        [self initRecognizer ];
//    }
    
    [_textView setText:@""];
    [_textView resignFirstResponder];
    
    //设置音频来源为麦克风
    [_iflyRecognizerView setParameter:IFLY_AUDIO_SOURCE_MIC forKey:@"audio_source"];
    
    //设置听写结果格式为json
    [_iflyRecognizerView setParameter:@"plain" forKey:[IFlySpeechConstant RESULT_TYPE]];
    
    //保存录音文件,保存在sdk工作路径中,如未设置工作路径,则默认保存在library/cache下
    [_iflyRecognizerView setParameter:@"asr.pcm" forKey:[IFlySpeechConstant ASR_AUDIO_PATH]];
    
    [_iflyRecognizerView start];
}
- (void)initRecognizer
{
    NSLog(@"%s",__func__);
    
    _iflyRecognizerView = [[IFlyRecognizerView alloc] initWithCenter:self.view.center];
    _iflyRecognizerView.delegate = self;
    [_iflyRecognizerView setParameter: @"iat" forKey: [IFlySpeechConstant IFLY_DOMAIN]];
    //asr_audio_path保存录音文件名,如不再需要,设置value为nil表示取消,默认目录是documents
    [_iflyRecognizerView setParameter:@"asrview.pcm " forKey:[IFlySpeechConstant ASR_AUDIO_PATH]];
    
    _iflyRecognizerView.delegate = self;
    
    
    
    //设置最长录音时间
    [_iflyRecognizerView setParameter:@"30000" forKey:[IFlySpeechConstant SPEECH_TIMEOUT]];
    //设置后端点
    [_iflyRecognizerView setParameter:@"3000" forKey:[IFlySpeechConstant VAD_EOS]];
    //设置前端点
    [_iflyRecognizerView setParameter:@"3000" forKey:[IFlySpeechConstant VAD_BOS]];
    //网络等待时间
    [_iflyRecognizerView setParameter:@"20000" forKey:[IFlySpeechConstant NET_TIMEOUT]];
    
    //设置采样率,推荐使用16K
    [_iflyRecognizerView setParameter:@"16000" forKey:[IFlySpeechConstant SAMPLE_RATE]];
    //设置语言
    [_iflyRecognizerView setParameter:@"zh_cn" forKey:[IFlySpeechConstant LANGUAGE]];
    //设置是否返回标点符号
    [_iflyRecognizerView setParameter:@"1" forKey:[IFlySpeechConstant ASR_PTT]];
    
}
/**
 无界面,听写结果回调
 results:听写结果
 isLast:表示最后一次
 ****/
- (void) onResults:(NSArray *) results isLast:(BOOL)isLast
{
    
    NSMutableString *resultString = [[NSMutableString alloc] init];
    NSDictionary *dic = results[0];
    for (NSString *key in dic) {
        [resultString appendFormat:@"%@",key];
    }
    _result =[NSString stringWithFormat:@"%@%@", _textView.text,resultString];
    NSString * resultFromJson =  [ISRDataHelper stringFromJson:resultString];
    _textView.text = [NSString stringWithFormat:@"%@%@", _textView.text,resultFromJson];
    
    if (isLast){
        NSLog(@"听写结果(json):%@测试",  self.result);
    }
    NSLog(@"_result=%@",_result);
    NSLog(@"resultFromJson=%@",resultFromJson);
    NSLog(@"isLast=%d,_textView.text=%@",isLast,_textView.text);
}
/**
 有界面,听写结果回调
 resultArray:听写结果
 isLast:表示最后一次
 ****/
- (void)onResult:(NSArray *)resultArray isLast:(BOOL)isLast
{
    NSLog(@"---------------%@",resultArray);
    NSLog(@"bbbbbbbbbbbbb%d",isLast);
    NSMutableString *result = [[NSMutableString alloc] init];
    NSDictionary *dic = [resultArray objectAtIndex:0];
    NSLog(@"==============%@",dic);
    for (NSString *key in dic) {
        [result appendFormat:@"%@",key];
    }
    _textView.text = [NSString stringWithFormat:@"%@%@",_textView.text,result];
}
/**
 开始识别回调
 ****/
- (void) onBeginOfSpeech
{
    NSLog(@"onBeginOfSpeech");
}

/**
 停止录音回调
 ****/
- (void) onEndOfSpeech
{
    NSLog(@"onEndOfSpeech");
    
}
/**
 听写结束回调(注:无论听写是否正确都会回调)
 error.errorCode =
 0     听写正确
 other 听写出错
 ****/
- (void) onError:(IFlySpeechError *) error
{
    NSLog(@"%s",__func__);
    
    NSLog(@"errorCode:%d",[error errorCode]);
 
}
@end
</span>


ISRDataHelper.h

<span style="font-size:18px;">#import <Foundation/Foundation.h>

@interface ISRDataHelper : NSObject


// 解析命令词返回的结果
+ (NSString*)stringFromAsr:(NSString*)params;

/**
 解析JSON数据
 ****/
+ (NSString *)stringFromJson:(NSString*)params;//


/**
 解析语法识别返回的结果
 ****/
+ (NSString *)stringFromABNFJson:(NSString*)params;

@end

ISRDataHelper.m

#import "ISRDataHelper.h"

@implementation ISRDataHelper

/**
 解析命令词返回的结果
 ****/
+ (NSString*)stringFromAsr:(NSString*)params;
{
    NSMutableString * resultString = [[NSMutableString alloc]init];
    NSString *inputString = nil;
    
    NSArray *array = [params componentsSeparatedByString:@"\n"];
    
    for (int  index = 0; index < array.count; index++)
    {
        NSRange range;
        NSString *line = [array objectAtIndex:index];
        
        NSRange idRange = [line rangeOfString:@"id="];
        NSRange nameRange = [line rangeOfString:@"name="];
        NSRange confidenceRange = [line rangeOfString:@"confidence="];
        NSRange grammarRange = [line rangeOfString:@" grammar="];
        
        NSRange inputRange = [line rangeOfString:@"input="];
        
        if (confidenceRange.length == 0 || grammarRange.length == 0 || inputRange.length == 0 )
        {
            continue;
        }
        
        //check nomatch
        if (idRange.length!=0) {
            NSUInteger idPosX = idRange.location + idRange.length;
            NSUInteger idLength = nameRange.location - idPosX;
            range = NSMakeRange(idPosX,idLength);
            NSString *idValue = [[line substringWithRange:range]
                                 stringByTrimmingCharactersInSet: [NSCharacterSet whitespaceAndNewlineCharacterSet] ];
            if ([idValue isEqualToString:@"nomatch"]) {
                return @"";
            }
        }
        
        //Get Confidence Value
        NSUInteger confidencePosX = confidenceRange.location + confidenceRange.length;
        NSUInteger confidenceLength = grammarRange.location - confidencePosX;
        range = NSMakeRange(confidencePosX,confidenceLength);
        
        
        NSString *score = [line substringWithRange:range];
        
        NSUInteger inputStringPosX = inputRange.location + inputRange.length;
        NSUInteger inputStringLength = line.length - inputStringPosX;
        
        range = NSMakeRange(inputStringPosX , inputStringLength);
        inputString = [line substringWithRange:range];
        
        [resultString appendFormat:@"%@ 置信度%@\n",inputString, score];
    }
    
    return resultString;
    
}

/**
 解析听写json格式的数据
 params例如:
 {"sn":1,"ls":true,"bg":0,"ed":0,"ws":[{"bg":0,"cw":[{"w":"白日","sc":0}]},{"bg":0,"cw":[{"w":"依山","sc":0}]},{"bg":0,"cw":[{"w":"尽","sc":0}]},{"bg":0,"cw":[{"w":"黄河入海流","sc":0}]},{"bg":0,"cw":[{"w":"。","sc":0}]}]}
 ****/
+ (NSString *)stringFromJson:(NSString*)params
{
    if (params == NULL) {
        return nil;
    }
    
    NSMutableString *tempStr = [[NSMutableString alloc] init];
    NSDictionary *resultDic  = [NSJSONSerialization JSONObjectWithData:    //返回的格式必须为utf8的,否则发生未知错误
                                [params dataUsingEncoding:NSUTF8StringEncoding] options:kNilOptions error:nil];

    if (resultDic!= nil) {
        NSArray *wordArray = [resultDic objectForKey:@"ws"];
        
        for (int i = 0; i < [wordArray count]; i++) {
            NSDictionary *wsDic = [wordArray objectAtIndex: i];
            NSArray *cwArray = [wsDic objectForKey:@"cw"];
            
            for (int j = 0; j < [cwArray count]; j++) {
                NSDictionary *wDic = [cwArray objectAtIndex:j];
                NSString *str = [wDic objectForKey:@"w"];
                [tempStr appendString: str];
            }
        }
    }
    return tempStr;
}


/**
 解析语法识别返回的结果
 ****/
+ (NSString *)stringFromABNFJson:(NSString*)params
{
    if (params == NULL) {
        return nil;
    }
    NSMutableString *tempStr = [[NSMutableString alloc] init];
    NSDictionary *resultDic  = [NSJSONSerialization JSONObjectWithData:
                                [params dataUsingEncoding:NSUTF8StringEncoding] options:kNilOptions error:nil];
    
    NSArray *wordArray = [resultDic objectForKey:@"ws"];
        for (int i = 0; i < [wordArray count]; i++) {
            NSDictionary *wsDic = [wordArray objectAtIndex: i];
            NSArray *cwArray = [wsDic objectForKey:@"cw"];
            
            for (int j = 0; j < [cwArray count]; j++) {
                NSDictionary *wDic = [cwArray objectAtIndex:j];
                NSString *str = [wDic objectForKey:@"w"];
                NSString *score = [wDic objectForKey:@"sc"];
                [tempStr appendString: str];
                [tempStr appendFormat:@" 置信度:%@",score];
                [tempStr appendString: @"\n"];
            }
        }
    return tempStr;
}

@end


好想睡觉-----------睡觉


  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
2 预备工作 2.1 创建iOS工程 在XCode中建立你的工程,或者打开已经建立的工程。 2.2 添加静态库 将开发工具包中lib目录下的iflyMSC.framework添加到新建工程中(如下图所示)。 提交 图一 图二 提交 图三 2.3 添加framework 按下图添加SDK所需要的iOS库,请注意libz.dylib,CoreTelephoney.framework不要遗漏。 提交 图四 注:如果使用的是离线识别,还需要增加libc++.dylib。 2.4 确认SDK的路径 提交 图五 请确认上图红色部分的路径能够找到iflyMSC.framework。为了支持多人开发,建议双击红色部分,把路径改为相对路径,例如像下图所示。 提交 图六 注意:请把不必要的路径删除。例如更新了SDK后,新的SDK与旧的SDK不在同一路径,请把旧的路径删除,避免引用到旧的库。对应集成SDK后发现编译失败,提示找不到头文件,请先检查这个路径是否正确。 2.5 导入头文件 在你需要使用MSC服务的文件中导入相应的头文件 例如: C/C++ Code //带界面的语音识别控件 #import “iflyMSC/IFlyRecognizerViewDelegate.h” #import “iflyMSC/IFlyRecognizerView.h” C/C++ Code //不带界面的语音识别控件 #import “iflyMSC/IFlySpeechRecognizerDelegate.h” #import “iflyMSC/IFlySpeechRecognizer.h” C/C++ Code //不带界面的语音合成控件 #import “iflyMSC/IFlySpeechSynthesizerDelegate.h” #import “iflyMSC/IFlySpeechSynthesizer.h” 2.6 集成帮助文档到Xcode 打开终端(termainl或iterm),cd 到压缩包的doc 目录,执行以下命令: 注:不同的xcode版本,对应的docset路径可能有变化,需要根据实际路径来操作。 C/C++ Code cp -R -f -a com.iflytek.documentation.IFlyMSC.docset ~/Library/Developer/Shared/Documentation/DocSets/ 然后执行命令 C/C++ Code open ~/Library/Developer/Shared/Documentation/DocSets/ 请核对文档的版本为最新下载的版本 提交 图七 打开Xcode的帮助文档就可以看到已经集成的文档 提交 图八 2.7 初始化 必须在初始化后才可以使用语音服务,初始化是异步过程,推荐在程序入口处调用。 Appid是应用的身份信息,具有唯一性,初始化时必须要传入Appid。可以从demo的Definition.h APPID_VALUE中查看此信息。Demo和SDK申请地址:http://xfyun.cn C/C++ Code //将“12345678”替换成您申请的APPID。 NSString *initString = [[NSString alloc] initWithFormat:@"appid=%@",@” 12345678”]; [IFlySpeechUtility createUtility:initString]; 3 语音听写 使用示例如下: C/C++ Code //头文件定义 //需要实现IFlyRecognizerViewDelegate,为识别会话的服务代理 @interface RecognizerViewController : UIViewController<IFlyRecognizerViewDelegate> { IFlyRecognizerView *_iflyRecognizerView; } //初始化语音识别控件 _iflyRecognizerView = [[IFlyRecognizerView alloc] initWithCenter:self.view.center]; _iflyRecognizerView.delegate = self; [_iflyRecognizerView setParameter: @"iat" forKey: [IFlySpeechConstant IFLY_DOMAIN]]; //asr_audio_path保存录音文件名,如不再需要,设置value为nil表示取消,默认目录是documents [_iflyRecognizerView setParameter:@"asrview.pcm " forKey:[IFlySpeechConstant ASR_AUDIO_PATH]]; //启动识别服务 [_iflyRecognizerView start]; /*识别结果返回代理 @param resultArray 识别结果 @ param isLast 表示是否最后一次结果 */ - (void)onResult: (NSArray *)resultArray isLast:(BOOL) isLast { } /*识别会话错误返回代理 @ param error 错误码 */ - (void)onError: (IFlySpeechError *) error { } 4 语音识别 4.1 在线语音识别 上传联系人,使用示例如下: C/C++ Code //创建上传对象 _uploader = [[IFlyDataUploader alloc] init]; //获取联系人集合 IFlyContact *iFlyContact = [[IFlyContact alloc] init]; NSString *contactList = [iFlyContact contact]; //设置参数 [_uploader setParameter:@"uup" forKey:@"subject"]; [_uploader setParameter:@"contact" forKey:@"dtt"]; //启动上传 [_uploader uploadDataWithCompletionHandler:^(NSString * grammerID, IFlySpeechError *error) { //接受返回的grammerID和error [self onUploadFinished:grammerID error:error]; }name:@"contact" data: contactList]; 上传用户词表,使用示例如下: C/C++ Code //创建上传对象 _uploader = [[IFlyDataUploader alloc] init]; //生成用户词表对象 //用户词表 #define USERWORDS @"{\"userword\":[{\"name\":\"iflytek\",\"words\":[\"德国盐猪手\",\"1912酒吧街\",\"清蒸鲈鱼\",\"挪威三文鱼\",\"黄埔军校\",\"横沙牌坊\",\"科大讯飞\"]}]}" IFlyUserWords *iFlyUserWords = [[IFlyUserWords alloc] initWithJson:USERWORDS ]; #define NAME @"userwords" //设置参数 [_uploader setParameter:@"iat" forKey:@"sub"]; [_uploader setParameter:@"userword" forKey:@"dtt"]; //上传词表 [_uploader uploadDataWithCompletionHandler:^(NSString * grammerID, IFlySpeechError *error) { //接受返回的grammerID和error [self onUploadFinished:grammerID error:error]; } name:NAME data:[iFlyUserWords toString]]; abnf语法上传,示例如下: C/C++ Code // ABNF语法示例,可以说”北京到上海” #define ABNFPARAM @”sub=asr,dtt=abnf” #define ABNFDATA = “#ABNF 1.0 gb2312; language zh-CN; mode voice; root $main; $main = $place1 到$place2 ; $place1 = 北京 | 武汉 | 南京 | 天津 | 天京 | 东京; $place2 = 上海 | 合肥;” //创建上传对象 _uploader = [[IFlyDataUploader alloc] init]; //设置参数 [_uploader setParameter:@"asr" forKey:@"sub"]; [_uploader setParameter:@"abnf" forKey:@"dtt"]; //上传abnf语法 [_uploader uploadDataWithCompletionHandler:^(NSString * grammerID, IFlySpeechError *error) { //接受返回的grammerID和error [self setGrammerId:grammerID]; }name:ABNFNAME data:ABNFDATA]; 4.2 本线语音识别 1) 创建识别对象(注:如果使用的是离线识别,还需要增加libc++.dylib) C/C++ Code //此方法为demo中封装,具体实现请参照demo。 self.iFlySpeechRecognizer = [RecognizerFactory CreateRecognizer:self Domain:@"asr"]; 2)设置参数 C/C++ Code //开启候选结果 [_iflySpeechRecognizer setParameter:@"1" forKey:@"asr_wbest"]; //设置引擎类型,clound或者local [_iflySpeechRecognizer setParameter:@”local” forKey:[IFlySpeechConstant ENGINE_TYPE]]; //设置字符编码为utf-8 [_iflySpeechRecognizer setParameter:@"utf-8" forKey:[IFlySpeechConstant TEXT_ENCODING]]; //语法类型,本地是bnf,在线识别是abnf [_iflySpeechRecognizer setParameter:@”bnf” forKey:[IFlyResourceUtil GRAMMARTYPE]]; //启动asr识别引擎 [[IFlySpeechUtility getUtility] setParameter:@"asr" forKey:[IFlyResourceUtil ENGINE_START]]; //设置服务类型为asr识别 [_iflySpeechRecognizer setParameter:@"asr" forKey:[IFlySpeechConstant IFLY_DOMAIN]]; //设置语法构建路径,该路径为sandbox下的目录,请确保目录存在 [_iflySpeechRecognizer setParameter:_grammBuildPath forKey:[IFlyResourceUtil GRM_BUILD_PATH]]; //设置引擎资源文件路径,如demo中的aitalkResource中的common.mp3 [_iflySpeechRecognizer setParameter:_aitalkResourcePath forKey:[IFlyResourceUtil ASR_RES_PATH]]; 3)编译语法文本
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值