vue3 实现音频转文字组件

使用recorder-core第三方插件实现音频转纯文本的功能。

工具类文件
recoder.ts

typescript 复制代码

import Recorder from 'recorder-core'
import 'recorder-core/src/engine/wav'
import 'recorder-core/src/extensions/lib.fft.js'
import 'recorder-core/src/extensions/frequency.histogram.view'
interface RecorderConfig {
  onProcess?: Promise<any> | Function
  [keyname: string]: any
}
interface FrequencyHistogramViewConfig {
  [keyname: string]: any
}
let recorderInstance: any = null
export const RecorderContructor = Recorder
export const createRecorder = (config?: RecorderConfig) => {
  if (recorderInstance) {
    return recorderInstance
  }
  recorderInstance = Recorder({
    type: 'wav', // 录音格式，可以换成wav等其他格式
    sampleRate: 16000, // 录音的采样率，越大细节越丰富越细腻
    bitRate: 16, // 录音的比特率，越大音质越好
    ...(config || {})
    // onProcess: (buffers, powerLevel, bufferDuration, bufferSampleRate, newBufferIdx, asyncEnd) => {
    //   // 录音实时回调，大约1秒调用12次本回调
    //   // 可实时绘制波形，实时上传（发送）数据
    //   if (this.wave) {
    //     this.wave.input(buffers[buffers.length - 1], powerLevel, bufferSampleRate)
    //   }
    // }
  })
  return recorderInstance
}
export const destoryRecorder = () => {
  if (recorderInstance) {
    recorderInstance.close()
    recorderInstance = null
    Recorder.Destroy()
  }
}
export const createRecorderWithWaveView = (el: HTMLElement, config?: FrequencyHistogramViewConfig) => {
  return Recorder.FrequencyHistogramView({
    elem: el,
    lineCount: 30,
    position: 0,
    minHeight: 1,
    fallDuration: 400,
    stripeEnable: false,
    mirrorEnable: true,
    linear: [0, '#fff', 1, '#fff'],
    ...(config || {})
  })
}

组件文案

AudioInput.vue

html 复制代码

<template>
  <div v-if="visibleModal" class="custom-Modal-container">
    <Teleport to="body">
      <div class="modal_box" ref="modalRef">
        <div class="modal_mask" @click.stop="closeModal"></div>
        <div class="modal_content">
          <div class="audio_box">
            <div class="audio_header">
              <span class="audio_title_text">
                <span v-if="audioStatus == 'input'">收音中</span>
                <span v-else-if="audioStatus == 'transform' || audioStatus == 'end'">识别中</span>
                <span v-else-if="audioStatus == 'unknown'">停止收音</span>...
              </span>
              <svg-icon class="close_icon" iconFileName="关闭" />
            </div>
            <div class="audio_content">
              <div class="input_content_box">
                <div class="input_content">
                  <span v-if="audioStatus == 'input'">请说，我在聆听...</span>
                  <span v-else-if="audioStatus == 'transform' || audioStatus == 'end'">
                    {{ audioContentText }}
                  </span>
                  <span class="unknow_tip_text" v-else-if="audioStatus == 'unknown'">未能识别，请点击图标重试</span>
                </div>
                <div v-if="audioStatus == 'input'" class="input_tip_text">您可以说出您需要搜索的内容关键词</div>
                <div v-if="audioStatus == 'unknown'" class="input_tip_text">说出您需要搜索的内容关键词</div>
              </div>
              <div class="audio_icon_box" :class="audioStatus">
                <i v-if="audioStatus == 'unknown'" class="img_box input_audio" @click="reStartRecorderHandle"></i>
                <i v-if="audioStatus == 'end'" class="img_box input_audio" @click="confirmSearchHandle"></i>
                <i v-if="audioStatus == 'input'" class="img_box input_audio" @click="finishRecorderHandle"></i>
                <i v-if="audioStatus == 'transform'" class="img_box input_audio" @click="closeModal"></i>
              </div>
              <div ref="recorderWaveRef" class=""></div>
            </div>
          </div>
        </div>
      </div>
    </Teleport>
  </div>
</template>
<script setup lang="ts">
import { onMounted, onUnmounted, ref, watch } from 'vue'
import { v4 as uuidv4 } from 'uuid'
// 语音输入工具
import { createRecorder, createRecorderWithWaveView, destoryRecorder } from './recorder'

// api
import { getVoiceToText } from '@/services/common'

// type interface
type AudioInputStatus = 'ready' | 'input' | 'transform' | 'end' | 'unknown'

const visibleModal = defineModel<boolean>()
const emit = defineEmits(['close', 'complete'])

const audioStatus = ref<AudioInputStatus>('ready')
const modalRef = ref<any>(null)
const audioContentBlobData = ref<string>('')
const audioContentText = ref<string>('')
// recorder
const recorderIntance = ref<any>(null)
const recorderWaveInstance = ref<any>(null)
const recorderWaveRef = ref<any>(null)
const isLoadingRecorder = ref<boolean>(false)

/** *************** method ************** **/
const initRecorder = () => {
  recorderIntance.value = createRecorder({
    onProcess: (buffers: any[], powerLevel: any, bufferDuration, bufferSampleRate: any, newBufferIdx, asyncEnd) => {
      // 录音实时回调，大约1秒调用12次本回调
      // 可实时绘制波形，实时上传（发送）数据
      if (recorderWaveInstance.value) {
        recorderWaveInstance.value.input(buffers[buffers.length - 1], powerLevel, bufferSampleRate)
      }
    }
  })
}
// 开始录音
const startRecorder = async () => {
  audioStatus.value = 'input'
  audioContentBlobData.value = ''
  audioContentText.value = ''
  isLoadingRecorder.value = true
  await new Promise((resolve, reject) => {
    recorderIntance.value.open(
      async () => {
        console.log('录音已打开')
        resolve(true)
      },
      (msg: string, isUserNotAllow: boolean) => {
        console.error('打开录音出错：' + msg, 'isUserNotAllow: ', isUserNotAllow)
        reject(false)
      }
    )
  })
  try {
    if (recorderWaveRef.value) {
      // 创建音频可视化图形绘制对象
      recorderWaveInstance.value = createRecorderWithWaveView(recorderWaveRef.value)
    }
  } catch (err) {
    console.error('音频可视化图形绘制出错', err)
  }

  try {
    console.log('尝试录音打开')
    isLoadingRecorder.value = false
    await recorderIntance.value.start()
    console.log('录音已打开')
  } catch {
    console.error('打开录音出错')
    audioStatus.value = 'unknown'
  } finally {
    isLoadingRecorder.value = false
  }
}
// 结束录音
const stopRecorderHandle = async () => {
  audioStatus.value = 'transform'
  try {
    console.log('尝试终止录音')
    const { blob, duration } = await new Promise((resolve, reject) => {
      recorderIntance.value.stop(
        (blob: any, duration: any) => {
          resolve({ blob, duration })
        },
        err => {
          console.error('终止录音出错：' + err)
          recorderIntance.value.close()
          reject({ error: true, msg: err })
        }
      )
    })
    // 简单利用URL生成本地文件地址，此地址只能本地使用，比如赋值给audio.src进行播放，赋值给a.href然后a.click()进行下载（a需提供download="xxx.mp3"属性）
    // this.localUrl = URL.createObjectURL(blob)
    // console.log('录音成功blob', blob)
    // console.log('localUrl', this.localUrl)
    console.log('时长:' + duration + 'ms')
    await recorderIntance.value.close()
    audioContentBlobData.value = blob
  } catch {
    audioStatus.value = 'input'
  }
}

// 重置输入
const reStartRecorderHandle = async () => {
  if (isLoadingRecorder.value) return
  isLoadingRecorder.value = false
  await stopRecorderHandle().catch(err => err)
  await startRecorder()
}
// 完成录音
const finishRecorderHandle = async () => {
  if (isLoadingRecorder.value) return
  isLoadingRecorder.value = true
  try {
    await stopRecorderHandle()
    // 获取语音转文本并返回文案
    await fetchVoiceToText()
    audioStatus.value = 'end'
  } catch {
    audioStatus.value = 'unknown'
  } finally {
    isLoadingRecorder.value = false
  }
}
// 把录音转成文本
const fetchVoiceToText = async () => {
  const voice_data = audioContentBlobData.value
  const formData = new FormData()
  formData.append('voice_data', voice_data)
  formData.append('seq', 0)
  formData.append('end', 1)
  formData.append('voice_id', uuidv4())
  formData.append('voice_format', 12)

  const { code, data } = await getVoiceToText(formData)

  if (code === 200) {
    console.log(data)
    const { text } = data
    audioContentText.value = text
  }
}
const confirmSearchHandle = () => {
  const text = audioContentText.value
  emit('complete', text)
  audioStatus.value = 'ready'
  visibleModal.value = false
  audioContentBlobData.value = ''
  audioContentText.value = ''
}
const closeModal = async () => {
  await stopRecorderHandle()
  audioStatus.value = 'ready'
  visibleModal.value = false
  audioContentBlobData.value = ''
  audioContentText.value = ''
  emit('close')
}
/** ***************** watch ************** **/
watch(visibleModal, async val => {
  if (val) {
    await startRecorder()
  }
})
/** *******************    life cycle ******************* **/
onMounted(() => {
  initRecorder()
})
onUnmounted(() => {
  recorderIntance.value = null
  destoryRecorder()
})
/**  ************** component expose *********** **/
defineExpose({
  closeModal,
  visibleModal
})
</script>
<style lang="scss" scoped>
.modal_box {
  position: fixed;
  width: 100%;
  left: 0;
  top: 0;
  right: 0;
  bottom: 0;
  z-index: 2000;
}
.modal_mask {
  position: absolute;
  left: 0;
  bottom: 0;
  width: 100%;
  height: 100%;
  background: rgba(0, 0, 0, 0.5);
}
.modal_content {
  position: absolute;
  bottom: 32px;
  z-index: 1;
  left: 0;
  right: 0;
  padding: 0 12px;
  box-sizing: border-box;
  * {
    box-sizing: border-box;
  }
  .audio_box {
    border-radius: 10px;
    position: relative;
    width: auto;
    background-image: linear-gradient(91deg, #7d79ff 9%, #43e1ff 93%);
    .audio_header {
      height: 44px;
      border-radius: 10px 10px 0 0;

      color: #ffffff;
      font-size: 14px;
      font-weight: bold;
      display: flex;
      align-items: center;
      justify-content: space-between;
      padding-left: 22px;
      padding-right: 18px;
    }
    .close_icon {
      width: 12px;
      height: 12px;
    }
  }
  .audio_content {
    height: 248px;
    padding: 24px;
    border-radius: 10px;
    background-color: #ffffff;
  }
  .input_content_box {
    height: 64px;
    text-align: center;
    overflow-y: auto;
  }
  .input_content {
    text-align: center;
    font-size: 16px;
    color: $color-text;
    font-weight: bold;
    line-height: 22px;
  }
  .unknow_tip_text {
    color: $color-danger;
  }
  .input_tip_text {
    margin-top: 10px;
    font-size: 12px;
    line-height: 17px;
    color: $color-text-light-3;
    text-align: center;
  }
  .audio_icon_box {
    display: flex;
    align-items: center;
    justify-content: center;
    .img_box {
      width: 138px;
      height: 138px;
      display: block;
      &.input_audio {
        background: url('@/assets/images/audio_input_icon.png') no-repeat center center;
      }
    }
  }
}
</style>