鸿蒙Next使用AudioCapturer实现音频录制和AI语言转文字

上篇介绍了使用AVRecorder录制音频，这篇记录一下使用AudioCapturer 实现录制音频，并且实现在录制的同时，使用AI实现语音转文字的功能。

效果演示：

录制实现步骤：

1.配置音频采集参数AudioCapturerOptions：AudioStreamInfo音频流信息和AudioCapturerInfo音频采集器信息

yaml 复制代码

let audioStreamInfo: audio.AudioStreamInfo = {
  samplingRate: audio.AudioSamplingRate.SAMPLE_RATE_48000, // 采样率。
  channels: audio.AudioChannel.CHANNEL_2, // 通道。
  sampleFormat: audio.AudioSampleFormat.SAMPLE_FORMAT_S16LE, // 采样格式。
  encodingType: audio.AudioEncodingType.ENCODING_TYPE_RAW // 编码格式。
};
let audioCapturerInfo: audio.AudioCapturerInfo = {
  source: audio.SourceType.SOURCE_TYPE_MIC, // 音源类型：Mic音频源。根据业务场景配置，参考SourceType。
  capturerFlags: 0 // 音频采集器标志。
};
let audioCapturerOptions: audio.AudioCapturerOptions = {
  streamInfo: audioStreamInfo,
  capturerInfo: audioCapturerInfo
};

2.创建AudioCapturer实例,并添加回调函数

javascript 复制代码

audio.createAudioCapturer(audioCapturerOptions, (err, data) => {
   if (err) {
     console.error(`Invoke createAudioCapturer failed, code is ${err.code}, message is ${err.message}`);
   } else {
     console.info('Invoke createAudioCapturer succeeded.');
    this.audioCapturer = capturer; //实例化成功赋值
   }
 });
//新建音频文件
this.fileName = new Date().getFullYear()+'_'+(new Date().getMonth()+1)+'_'+new Date().getDate()+'_'+new Date().getTime()+'.pcm';
this.audioFile  = fs.openSync((context.filesDir +'/'+ this.fileName), fs.OpenMode.READ_WRITE | fs.OpenMode.CREATE);
//监听音频数据读取回调事件
audioCapturer.on('readData', (buffer: ArrayBuffer) => {
  let options: Options = {
    offset: this.bufferSize,
    length: buffer.byteLength
  }
  if (this.audioFile !== undefined)
    fs.writeSync(this.audioFile!.fd, buffer, options);
  this.bufferSize += buffer.byteLength;
})
 //监听状态变化事件（当AudioCapturer状态发生变化时触发）
audioCapturer.on('stateChange', (state: audio.AudioState) => {
  this.currentState = state
});

AudioState状态如下：

名称	值	说明
STATE_INVALID	-1	无效状态。
STATE_NEW	0	创建新实例状态。
STATE_PREPARED	1	准备状态。
STATE_RUNNING	2	运行状态。
STATE_STOPPED	3	停止状态。
STATE_RELEASED	4	释放状态。
STATE_PAUSED	5	暂停状态。
3.开始录制、结束录制、释放资源
AudioCapturer状态变化示意图

语言识别实现步骤： 语音识别（SpeechRecognizer）将一段中文音频信息（中文、中文语境下的英文；短语音模式不超过60s，长语音模式不超过8h）转换为文本，音频信息可以为pcm音频文件或者实时语音。 1.调用createEngine方法，对引擎进行初始化

typescript 复制代码

// 设置创建引擎参数
    let extraParam: Record<string, Object> = {
      "locate": "CN",   //区域信息  当前仅支持"CN"
      "recognizerMode": "long", //语言模式 当前支持"short"和"long"
      "sysGeneralLexicon":['鸿蒙',]  //支持配置热词提高识别率
    };
    let initParamsInfo: speechRecognizer.CreateEngineParams = {
      language: 'zh-CN', //语种，当前仅支持"zh-CN"中文
      online: 1,         //1为离线，当前仅支持离线模式
      extraParams: extraParam
    };

    // 调用createEngine方法
    speechRecognizer.createEngine(initParamsInfo, (err: BusinessError, speechRecognitionEngine:
      speechRecognizer.SpeechRecognitionEngine) => {
      if (!err) {
        console.info('Succeeded in creating engine.');
        // 接收创建引擎的实例
        this.asrEngine = speechRecognitionEngine;
        this.setListener()
      } else {
        // 无法创建引擎时返回错误码1002200001，原因：语种不支持、模式不支持、初始化超时、资源不存在等导致创建引擎失败
        // 无法创建引擎时返回错误码1002200006，原因：引擎正在忙碌中，一般多个应用同时调用语音识别引擎时触发
        // 无法创建引擎时返回错误码1002200008，原因：引擎已被销毁
        console.info( `Failed to create engine. Code: ${err.code}, message: ${err.message}.`);
      }
    });

2.初始化成功之后，设置监听回调

typescript 复制代码

// 创建回调对象
   let  setListener: speechRecognizer.RecognitionListener = {
      // 开始识别成功回调
      onStart(sessionId: string, eventMessage: string) {
        console.info( `onStart, sessionId: ${sessionId} eventMessage: ${eventMessage}`);
      },
      // 事件回调
      onEvent(sessionId: string, eventCode: number, eventMessage: string) {
        console.info( `onEvent, sessionId: ${sessionId} eventCode: ${eventCode} eventMessage: ${eventMessage}`);
      },
      // 识别结果回调，包括中间结果和最终结果
      onResult(sessionId: string, result: speechRecognizer.SpeechRecognitionResult) {
        console.info( `onResult, sessionId: ${sessionId} sessionId: ${JSON.stringify(result)}`);
        context.eventHub.emit('updateMessage', result.result,result.isFinal);
      },
      // 识别完成回调
      onComplete(sessionId: string, eventMessage: string) {
        console.info( `onComplete, sessionId: ${sessionId} eventMessage: ${eventMessage}`);
      },
      // 错误回调，错误码通过本方法返回
      // 返回错误码1002200002，开始识别失败，重复启动startListening方法时触发
      // 更多错误码请参考错误码参考
      onError(sessionId: string, errorCode: number, errorMessage: string) {
        // console.info( `onError, sessionId: ${sessionId} errorCode: ${errorCode} errorMessage: ${errorMessage}`);
      },
    }
    // 设置回调
    this.asrEngine!.setListener(setListener);

识别结果在onResult回调中返回，SpeechRecognitionResult信息如下：

名称	说明
isFinal	是否为本条子句最终结果的判断
isLast	是否为最后一条句子的判断。
result	最优识别结果

3.设置麦克风转文字功能开始识别的相关参数

typescript 复制代码

//音频配置信息
    let audioParam: speechRecognizer.AudioInfo = {
      audioType: 'pcm', //音频类型。 当前仅支持"pcm"
      sampleRate: 16000,  //音频的采样率。 当前仅支持16000采样率。
      soundChannel: 1,  //音频返回的通道数信息  当前仅支持通道1
      sampleBit: 16   //音频返回的采样位数  当前仅支持16位
    }
    //启动语音识别的相关参数
    let extraParam: Record<string, Object> = {
      "recognitionMode": 0,  //0：实时录音识别（需应用开启录音权限 1：实时音频转文字识别，开启此模式时需要额外调用writeAudio方法，传入待识别音频流
      "vadBegin": 2000,
      "vadEnd": 3000,
      "maxAudioDuration": 8 * 60 * 60 * 1000, //最大支持音频时长
      //短语音模式支持范围[20000-60000]，单位ms，不传参时默认20000ms。
      //长语音模式支持范围[20000 - 8 * 60 * 60 * 1000]，单位ms
    }
    let recognizerParams: speechRecognizer.StartParams = {
      sessionId: this.sessionId,
      audioInfo: audioParam,
      extraParams: extraParam
    }

4.开始识别

kotlin 复制代码

console.info( 'startListening start');
this.speechResult=''
this.speechCurrent=''
this.asrEngine!.startListening(recognizerParams);

5.传入音频流，在audioCapturer，'readData'回调函数中将麦克风获取的音频流传给语言识别引擎

javascript 复制代码

let uint8Array: Uint8Array = new Uint8Array(buffer);
// 语言识别在运行状态下 写入音频流
if (this.asrEngine!==undefined&&this.currentState==2)
this.asrEngine!.writeAudio(this.sessionId, uint8Array)

6.结束录制之后停止语音识别

关注私信，获取完整源码