iOS自带的Speech语音识别功能封装

1、在plist 文件加入以下配置:

<dict>

<key>Speech Recognition Usage Description</key>

<string>语音识别</string>

<key>NSMicrophoneUsageDescription</key>

<string>麦克风权限</string>

</dict>

2、单例管理: 

//
//  SpeechManager.swift
//  ChatgptClient
//
//  Created by 柯木超 on 2023/5/16.
//

import UIKit
import Speech

class SpeechManager: NSObject {
    
    static let shared = SpeechManager()
    
    // 定义语言识别需要用到的几个对象的引用
    private let speechRecognizer = SFSpeechRecognizer(locale: Locale(identifier: "zh-CN"))! // 创建与指定区域设置关联的语音识别器
    private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest? // 语音识别的请求
    private var recognitionTask: SFSpeechRecognitionTask? // 语音识别的任务类
    private let audioEngine = AVAudioEngine() // 音频引擎,用于音频输入
    private var inputNode:AVAudioInputNode!

    override init() {
        super.init()
        do {
            try initConfig()
        } catch {
            print("录音不可用!")
        }

    }
    
    func initConfig()  throws {
        // 配置应用程序的音频会话
        let audioSession = AVAudioSession.sharedInstance() // 管理音频硬件资源的分配
        try audioSession.setCategory(.playAndRecord, options: [ .mixWithOthers]) // 设置音频会话的类别、模式和选项。
        try audioSession.setActive(true, options: .notifyOthersOnDeactivation) // 激活音频会话
        inputNode = audioEngine.inputNode // inputNode|outputNode分别对应硬件的麦克风和扬声器
    }
    
    //开启麦克风权限
    func openAudioSession() {
        
        let permissionStatus = AVAudioSession.sharedInstance().recordPermission
        if permissionStatus == AVAudioSession.RecordPermission.undetermined {
            AVAudioSession.sharedInstance().requestRecordPermission { (granted) in
                //此处可以判断权限状态来做出相应的操作,如改变按钮状态
                if !granted {
                    DispatchQueue.main.async {
                        ToastUtils.showMessage("不开启麦克风权限没法进行语音聊天哦!!!")
                    }
                }
            }
        }
    }
    
    func IsOpenAudioSession() -> Bool{
        let authStatus = AVCaptureDevice.authorizationStatus(for: AVMediaType.audio)
        return authStatus != .restricted && authStatus != .denied
    }

    func stopRecording() {
        self.audioEngine.stop()
        self.recognitionRequest?.endAudio()
    }
    
    // 录制方法
    func startRecording(comple:@escaping (_ resultStr: String?) -> ()) throws {
        if !IsOpenAudioSession() {
            openAudioSession()
            return
        }
        // 取消上一次正在识别任务(如果有的话)
        recognitionTask?.cancel()
        self.recognitionTask = nil

        // 创建并配置语音识别请求
        recognitionRequest = SFSpeechAudioBufferRecognitionRequest() // 从捕获的音频内容(如来自设备麦克风的音频)识别语音的请求
        guard let recognitionRequest = recognitionRequest else { fatalError("无法创建SFSpeechAudioBufferRecognitionRequest对象") }
        
        if #available(iOS 16, *) {
            // iOS 16 已经支持自动添加标点符号,不需要再喊标点符号了
            recognitionRequest.addsPunctuation = true
        }
        if #available(iOS 13, *) {
            if speechRecognizer.supportsOnDeviceRecognition {
                recognitionRequest.requiresOnDeviceRecognition = true
            }
        }

        
        // 设置在音频录制完成之前返回结果
        // 每产生一种结果就马上返回
        recognitionRequest.shouldReportPartialResults = true
        
        // 将语音识别数据仅限于设备上
        if #available(iOS 13, *) {
            // 将此属性设置为true以防止SFSpeechRecognitionRequest通过网络发送音频
            // 设备上的请求将不那么准确。
            recognitionRequest.requiresOnDeviceRecognition = true
        }
        
        // 为语音识别会话创建识别任务
        // 保留对任务的引用,以便可以取消该任务
        recognitionTask = speechRecognizer.recognitionTask(with: recognitionRequest) { result, error in
            var isFinal = false
            
            if let result = result {
                isFinal = result.isFinal
                print("【识别内容】\(result.bestTranscription.formattedString)")
            }
            
            if error != nil || isFinal {
                // 如果出现问题,停止识别语音
                self.audioEngine.stop()
                self.inputNode.removeTap(onBus: 0)
                self.recognitionRequest = nil
                self.recognitionTask = nil
                
                comple(validString(result?.bestTranscription.formattedString))
            }
        }

        // 配置麦克风输入
        let recordingFormat = inputNode.outputFormat(forBus: 0)
        inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { (buffer: AVAudioPCMBuffer, when: AVAudioTime) in
            self.recognitionRequest?.append(buffer)
        }
        
        audioEngine.prepare()
        try audioEngine.start()

    }
}

3、使用方法:

(1) 开始识别

do {
      try SpeechManager.shared.startRecording { resultStr in
        if validString(resultStr) != "" {
            // 这里做识别后的操作
         } else {
            print("chat 返回数据为空")
           }
         }
       }
     } catch {
         print("语音输入失败")
     }

(2) 结束识别:

SpeechManager.shared.stopRecording()

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
1. 引入SDK 在podfile文件中添加以下代码: ``` pod 'QCloudAiSDK' ``` 执行pod install命令,导入SDK。如果有问题可以尝试更新pod库: ``` pod repo update ``` 2. 获取API密钥 在腾讯云官网控制台开通语音识别服务,并获取API密钥。 3. 引入头文件 在需要使用语音识别功能的文件中引入头文件: ```objective-c #import <QCloudCore/QCloudCore.h> #import <QCloudAiPlatformSDK/QCloudAiPlatformSDK.h> ``` 4. 配置SDK参数 创建一个QCloudAuthentationV2对象,并传入API密钥: ```objective-c QCloudCredential* credential = [QCloudCredential new]; credential.secretID = @"您的API密钥ID"; credential.secretKey = @"您的API密钥Key"; credential.expirationDate = [NSDate dateWithTimeIntervalSinceNow:3600*24*30]; QCloudServiceConfiguration* configuration = [QCloudServiceConfiguration new]; configuration.appID = @"您的APPID"; configuration.regionName = @"ap-guangzhou"; configuration.credential = credential; ``` 修改appID和regionName为对应的信息。 5. 调用语音识别API 创建一个QCloudGetRecognitionResultRequest对象,并传入需要识别的音频文件路径和语音识别接口的配置: ```objective-c QCloudGetRecognitionResultRequest* recognitionRequest = [QCloudGetRecognitionResultRequest new]; recognitionRequest.filePath = @"音频文件路径"; recognitionRequest.engineModelType = QCloudASREngineModelType16k_qc; recognitionRequest.voiceFormat = QCloudASRVoiceFormat_MP3; recognitionRequest.hotwordId = @"0"; recognitionRequest.enableFlush = YES; recognitionRequest.enableVad = YES; recognitionRequest.workMode = QCloudASRWorkModeRecognition; recognitionRequest.filterDirty = NO; recognitionRequest.filterModal = NO; recognitionRequest.filterPunc = NO; recognitionRequest.convertNumMode = QCloudASRConvertNumModeWord; recognitionRequest.queryType = QCloudASRQueryTypeJSON; recognitionRequest.channelNum = 1; recognitionRequest.resType = QCloudASRResType16k; recognitionRequest.source = @"sdk"; [[QCloudAiPlatformOCRService sharedInstance] GetRecognitionResult:recognitionRequest appid:@"您的APPID" region:@"ap-guangzhou" configuration:configuration withCompletionHandler:^(id outputObject, NSError *error) { NSLog(@"output: %@, error: %@", outputObject, error); }]; ``` 注释中有每个参数的说明,根据需要调整。 6. 完整代码 ```objective-c #import "ViewController.h" #import <QCloudCore/QCloudCore.h> #import <QCloudAiPlatformSDK/QCloudAiPlatformSDK.h> @interface ViewController () @end @implementation ViewController - (void)viewDidLoad { [super viewDidLoad]; [self requestSpeechRecognition]; } - (void)requestSpeechRecognition { QCloudCredential* credential = [QCloudCredential new]; credential.secretID = @"您的API密钥ID"; credential.secretKey = @"您的API密钥Key"; credential.expirationDate = [NSDate dateWithTimeIntervalSinceNow:3600*24*30]; QCloudServiceConfiguration* configuration = [QCloudServiceConfiguration new]; configuration.appID = @"您的APPID"; configuration.regionName = @"ap-guangzhou"; configuration.credential = credential; QCloudGetRecognitionResultRequest* recognitionRequest = [QCloudGetRecognitionResultRequest new]; recognitionRequest.filePath = @"音频文件路径"; recognitionRequest.engineModelType = QCloudASREngineModelType16k_qc; recognitionRequest.voiceFormat = QCloudASRVoiceFormat_MP3; recognitionRequest.hotwordId = @"0"; recognitionRequest.enableFlush = YES; recognitionRequest.enableVad = YES; recognitionRequest.workMode = QCloudASRWorkModeRecognition; recognitionRequest.filterDirty = NO; recognitionRequest.filterModal = NO; recognitionRequest.filterPunc = NO; recognitionRequest.convertNumMode = QCloudASRConvertNumModeWord; recognitionRequest.queryType = QCloudASRQueryTypeJSON; recognitionRequest.channelNum = 1; recognitionRequest.resType = QCloudASRResType16k; recognitionRequest.source = @"sdk"; [[QCloudAiPlatformOCRService sharedInstance] GetRecognitionResult:recognitionRequest appid:@"您的APPID" region:@"ap-guangzhou" configuration:configuration withCompletionHandler:^(id outputObject, NSError *error) { NSLog(@"output: %@, error: %@", outputObject, error); }]; } @end ``` 在上述代码中,获取API密钥的部分需要替换成您的实际信息,其他部分根据需要调整。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值