一、项目info.plist配置
objectivec<key>NSSpeechRecognitionUsageDescription</key> <string>我们需要使用语音识别功能将您的语音转换为文字,以便进行实时记录。</string> <key>NSMicrophoneUsageDescription</key> <string>我们需要访问麦克风以录制您的语音用于识别。</string>
在Objective-C中实现语音实时转文字可依托苹果原生框架完成,核心方案如下:
1. 核心依赖框架
使用iOS系统自带的Speech框架 ,无需额外引入第三方SDK,通过SFSpeechRecognizer、SFSpeechAudioBufferRecognitionRequest等核心类即可实现实时语音转写,支持离线识别,适配iOS 10及以上系统。
2. 关键实现步骤
- 提前申请麦克风权限与语音识别权限,避免权限缺失导致功能异常
- 初始化
SFSpeechRecognizer实例,指定目标识别语言(如简体中文) - 配置音频捕获会话,通过
SFSpeechAudioBufferRecognitionRequest持续接收音频流 - 实时回调获取
SFSpeechRecognitionResult,提取bestTranscription字段得到实时转写文本
3. 原生方案优势
相比第三方工具,该方案无网络依赖、延迟更低,可实现系统级的实时字幕、会议实时记录等功能,同时能更好地保障用户语音数据隐私。
二、逻辑实现
1.Speech2TextManager.h
objectivec// // Speech2TextManager.h // QGB_IM2 // // Created by carbonzhao on 2026/7/1. // #import <Foundation/Foundation.h> #import <Speech/Speech.h> #import <AVFoundation/AVFoundation.h> NS_ASSUME_NONNULL_BEGIN @interface Speech2TextManager : NSObject @property (nonatomic, strong, readonly) SFSpeechRecognizer *speechRecognizer; @property (nonatomic, assign, readonly) BOOL isRecording; @property (nonatomic, copy) void (^Speech2TextManagerDidReceiveTranscriptionBlock)(Speech2TextManager *manager,NSString *text,BOOL isFinal); @property (nonatomic, copy) void (^Speech2TextManagerDidFailWithErrorBlock)(Speech2TextManager *manager,NSError * _Nonnull error); + (instancetype)sharedInstance; - (void)requestPermissionsWithCompletion:(void(^)(BOOL granted))completion; - (void)startRecording; - (void)stopRecording; @end NS_ASSUME_NONNULL_END
2.Speech2TextManager.m
objectivec#import "Speech2TextManager.h" @interface Speech2TextManager () <SFSpeechRecognitionTaskDelegate,SFSpeechRecognizerDelegate> @property (nonatomic, strong) AVAudioEngine *audioEngine; @property (nonatomic, strong) SFSpeechAudioBufferRecognitionRequest *recognitionRequest; @property (nonatomic, strong) SFSpeechRecognitionTask *recognitionTask; @property (nonatomic, strong) NSOperationQueue *operationQueue; @end @implementation Speech2TextManager + (instancetype)sharedInstance { static Speech2TextManager *instance = nil; static dispatch_once_t onceToken; dispatch_once(&onceToken,^{ instance = [[self alloc] init]; }); return instance; } - (instancetype)init { self = [super init]; if (self) { // 初始化中文识别器 NSLocale *locale = [[NSLocale alloc] initWithLocaleIdentifier:@"zh-CN"]; _speechRecognizer = [[SFSpeechRecognizer alloc] initWithLocale:locale]; _speechRecognizer.delegate = self; _audioEngine = [[AVAudioEngine alloc] init]; _operationQueue = [[NSOperationQueue alloc] init]; _operationQueue.maxConcurrentOperationCount = 1; } return self; } #pragma mark - Permissions - (void)requestPermissionsWithCompletion:(void(^)(BOOL granted))completion { [SFSpeechRecognizer requestAuthorization:^(SFSpeechRecognizerAuthorizationStatus status) { dispatch_async(dispatch_get_main_queue(),^{ if (status != SFSpeechRecognizerAuthorizationStatusAuthorized) { if (completion) { completion(NO); } return; } [[AVAudioSession sharedInstance] requestRecordPermission:^(BOOL granted) { if (completion) { completion(granted); } }]; }); }]; } #pragma mark - Recording Control - (void)startRecording { if (self.isRecording) return; // 检查识别器可用性 if (!self.speechRecognizer.isAvailable) { NSError *error = [NSError errorWithDomain:@"Speech2TextManagerError" code:-1 userInfo:@{NSLocalizedDescriptionKey: @"语音识别服务不可用"}]; self.Speech2TextManagerDidFailWithErrorBlock(self,error); return; } // 配置音频会话 AVAudioSession *session = [AVAudioSession sharedInstance]; NSError *sessionError = nil; [session setCategory:AVAudioSessionCategoryRecord mode:AVAudioSessionModeMeasurement options:AVAudioSessionCategoryOptionDuckOthers error:&sessionError]; [session setActive:YES error:&sessionError]; if (sessionError) { self.Speech2TextManagerDidFailWithErrorBlock(self,sessionError); return; } // 创建识别请求 self.recognitionRequest = [[SFSpeechAudioBufferRecognitionRequest alloc] init]; self.recognitionRequest.shouldReportPartialResults = YES; // 开启实时部分结果 // 启动识别任务 self.recognitionTask = [self.speechRecognizer recognitionTaskWithRequest:self.recognitionRequest delegate:self]; // 配置音频引擎 AVAudioFormat *recordingFormat = [self.audioEngine.inputNode outputFormatForBus:0]; [self.audioEngine.inputNode installTapOnBus:0 bufferSize:1024 format:recordingFormat block:^(AVAudioPCMBuffer * _Nonnull buffer, AVAudioTime * _Nonnull when) { [self.recognitionRequest appendAudioPCMBuffer:buffer]; }]; [self.audioEngine prepare]; NSError *engineError = nil; [self.audioEngine startAndReturnError:&engineError]; if (engineError) { [self stopRecording]; self.Speech2TextManagerDidFailWithErrorBlock(self,engineError); } } - (void)stopRecording { if (!self.isRecording) return; [self.audioEngine.inputNode removeTapOnBus:0]; [self.audioEngine stop]; [self.recognitionRequest endAudio]; // 清理任务 self.recognitionTask = nil; self.recognitionRequest = nil; } - (BOOL)isRecording { return self.audioEngine.isRunning; } #pragma mark - SFSpeechRecognitionTaskDelegate - (void)speechRecognitionTask:(SFSpeechRecognitionTask *)task didHypothesizeTranscription:(SFTranscription *)transcription { // 实时中间结果 NSString *text = transcription.formattedString; if (self.Speech2TextManagerDidReceiveTranscriptionBlock) { self.Speech2TextManagerDidReceiveTranscriptionBlock(self,text,NO); } } - (void)speechRecognitionTask:(SFSpeechRecognitionTask *)task didFinishRecognition:(SFSpeechRecognitionResult *)recognitionResult { // 最终结果 NSString *text = recognitionResult.bestTranscription.formattedString; if (self.Speech2TextManagerDidReceiveTranscriptionBlock) { self.Speech2TextManagerDidReceiveTranscriptionBlock(self,text,YES); } } - (void)speechRecognitionTaskFinishedReadingAudio:(SFSpeechRecognitionTask *)task { // 音频读取完毕,但可能还在处理最后的结果 } - (void)speechRecognitionTask:(SFSpeechRecognitionTask *)task didFinishSuccessfully:(BOOL)successfully { if (!successfully && task.error) { self.Speech2TextManagerDidFailWithErrorBlock(self,task.error); } } #pragma mark - SFSpeechRecognizerDelegate - (void)speechRecognizer:(SFSpeechRecognizer *)speechRecognizer availabilityDidChange:(BOOL)available { if (!available) { NSError *error = [NSError errorWithDomain:@"Speech2TextManagerError" code:-2 userInfo:@{NSLocalizedDescriptionKey: @"语音识别服务暂时不可用"}]; self.Speech2TextManagerDidFailWithErrorBlock(self,error); } } @end
交互UI在此就不贴了,你自己实现