Vue3集成百度实时语音识别

示例

SpeechRecognitionModal.vue 组件

javascript 复制代码
<template>
  <transition name="modal-fade">
    <div v-if="isOpen" class="modal-overlay" @click.self="handleOverlayClick">
      <div class="modal-container">
        <div class="modal-header">
          <h2>语音输入</h2>
          <button class="close-button" @click="closeModal">&times;</button>
        </div>

        <div class="modal-body">
          <div class="status-indicator">
            <div v-if="isRecording" class="mic-animation">
              <div class="mic-icon">
                <svg viewBox="0 0 24 24">
                  <path
                    d="M12,2A3,3 0 0,1 15,5V11A3,3 0 0,1 12,14A3,3 0 0,1 9,11V5A3,3 0 0,1 12,2M19,11C19,14.53 16.39,17.44 13,17.93V21H11V17.93C7.61,17.44 5,14.53 5,11H7A5,5 0 0,0 12,16A5,5 0 0,0 17,11H19Z" />
                </svg>
              </div>
              <div class="sound-wave">
                <div class="wave"></div>
                <div class="wave"></div>
                <div class="wave"></div>
              </div>
            </div>
            <div v-else class="mic-ready">
              <svg viewBox="0 0 24 24">
                <path
                  d="M12,2A3,3 0 0,1 15,5V11A3,3 0 0,1 12,14A3,3 0 0,1 9,11V5A3,3 0 0,1 12,2M19,11C19,14.53 16.39,17.44 13,17.93V21H11V17.93C7.61,17.44 5,14.53 5,11H7A5,5 0 0,0 12,16A5,5 0 0,0 17,11H19Z" />
              </svg>
            </div>
            <p class="status-text">{{ statusText }}</p>
            <div v-if="recognitionError" class="error-message">
              <svg viewBox="0 0 24 24" class="error-icon">
                <path
                  d="M12,2C17.53,2 22,6.47 22,12C22,17.53 17.53,22 12,22C6.47,22 2,17.53 2,12C2,6.47 6.47,2 12,2M15.59,7L12,10.59L8.41,7L7,8.41L10.59,12L7,15.59L8.41,17L12,13.41L15.59,17L17,15.59L13.41,12L17,8.41L15.59,7Z" />
              </svg>
              <span>{{ friendlyErrorMessage }}</span>
            </div>
          </div>

          <div class="result-container">
            <div class="result-content" :class="{ 'has-result': displayText }">
              {{ displayText }}
            </div>
            <!-- 声音提示(只在需要时显示) -->
            <div v-if="friendlyErrorMessage &&
              [3301, 3305, 3312, '-3005', '-3006'].includes(recognitionError)" class="voice-hint">
              {{ friendlyErrorMessage }}
            </div>
          </div>
        </div>

        <div class="modal-footer">
          <button @click="toggleRecording" class="control-button" :class="{ 'listening': isRecording }"
            :disabled="!isBrowserSupported">
            {{ isRecording ? '停止录音' : '开始录音' }}
          </button>
          <button @click="confirmResult" class="confirm-button" :disabled="!transcript">
            使用内容
          </button>
        </div>
      </div>
    </div>
  </transition>
</template>

<script setup>
import { ref, computed, onMounted, onBeforeUnmount } from 'vue';

const props = defineProps({
  isOpen: {
    type: Boolean,
    required: true
  },
  baiduConfig: {
    type: Object,
    required: true,
    default: () => ({
      appid: 0, // 百度云控制台应用 appid
      appkey: '', // api key
      dev_pid: 15372,
      format: 'pcm',
      sample: 16000
    })
  }
});

const emit = defineEmits(['close', 'confirm']);

const isRecording = ref(false);
const transcript = ref('');
const recognitionError = ref(null);
const isBrowserSupported = ref(true);
const interimTranscript = ref(''); // 临时识别结果
let audioContext = null;
let mediaStream = null;
let processor = null;
let socket = null;

// 错误映射表
const errorMessageMap = {
  3300: '输入参数不正确',
  3301: '请提高音量并清晰发音',
  3302: '鉴权失败,请检查API密钥',
  3303: '服务器内部错误',
  3304: 'GPS信息获取失败',
  3305: '未检测到有效语音',
  3307: '识别引擎繁忙',
  3308: '请求超时',
  3309: '引擎错误',
  3310: '音频过长(超过60秒)',
  3311: '音频数据异常',
  3312: '发音不清晰',
  3313: '服务不可用',
  3314: '服务器过载',
  '-3005': '请提高音量并清晰发音', // 未检测到有效语音
  '-3006': '请提高音量并清晰发音', // 静音超时
  'no-speech': '未检测到语音,请靠近麦克风说话',
  'audio-capture': '无法访问麦克风',
  'not-allowed': '麦克风权限被拒绝',
  'network': '网络连接失败',
  'default': '识别服务异常'
};

// 计算属性
const friendlyErrorMessage = computed(() => {
  if (!recognitionError.value) return '';
  // 声音相关错误统一提示
  if ([3301, 3305, 3312, '-3005', '-3006'].includes(recognitionError.value)) {
    return '请提高音量并清晰发音';
  }

  return errorMessageMap[recognitionError.value] || errorMessageMap['default'];
});

const statusText = computed(() => {
  if (recognitionError.value) return '识别遇到问题';
  return isRecording.value ? '正在聆听中...' : '点击开始录音按钮';
});

// 显示文本优化(临时结果追加在最终结果后)
const displayText = computed(() => {
  let baseText = transcript.value || '';
  if (interimTranscript.value) {
    baseText += baseText ? ' ' + interimTranscript.value : interimTranscript.value;
  }
  return baseText || '请点击"开始录音"按钮并说话...';
});

// 生成随机ID函数
const generateRandomId = () => {
  return Math.random().toString(36).substring(2, 15) +
    Math.random().toString(36).substring(2, 15);
};

// 主逻辑
const initRecording = async () => {
  try {
    transcript.value = '';
    interimTranscript.value = '';
    recognitionError.value = null;

    // 1. 获取麦克风权限
    mediaStream = await navigator.mediaDevices.getUserMedia({
      audio: {
        sampleRate: 16000,
        channelCount: 1,
        echoCancellation: false,
        noiseSuppression: false,
        autoGainControl: false
      }
    });

    // 2. 初始化音频上下文
    audioContext = new (window.AudioContext || window.webkitAudioContext)({
      sampleRate: 16000
    });

    // 3. 建立WebSocket连接
    initWebSocket();

    // 4. 创建音频处理器
    const source = audioContext.createMediaStreamSource(mediaStream);
    processor = audioContext.createScriptProcessor(4096, 1, 1);

    processor.onaudioprocess = (e) => {
      if (!isRecording.value || !socket || socket.readyState !== WebSocket.OPEN) return;

      // 获取音频数据并转换为16位PCM
      const audioData = e.inputBuffer.getChannelData(0);
      const pcmData = convertFloat32ToInt16(audioData);

      // 发送音频数据帧 (Binary帧)
      socket.send(pcmData);
    };

    source.connect(processor);
    processor.connect(audioContext.destination);

    isRecording.value = true;

  } catch (error) {
    console.error('初始化失败:', error);
    handleError(error);
    stopRecording();
  }
};

// 初始化WebSocket连接
const initWebSocket = () => {
  const cuid = `web_${generateRandomId()}`;
  const sn = generateRandomId();

  socket = new WebSocket(`wss://vop.baidu.com/realtime_asr?sn=${sn}`);

  socket.onopen = () => {
    const startFrame = {
      type: "START",
      data: {
        ...props.baiduConfig,
        cuid: cuid
      }
    };
    socket.send(JSON.stringify(startFrame));
  };

  socket.onmessage = (event) => {
    try {
      const data = JSON.parse(event.data);

      // 重置非关键错误状态
      if (data.err_no === 0 || [3301, 3305, 3312, '-3005', '-3006'].includes(data.err_no)) {
        recognitionError.value = null;
      }

      if (data.err_no !== 0) {
        handleApiError(data);
        return;
      }

      if (data.type === "MID_TEXT") {
        // 临时结果追加显示(不覆盖已有结果)
        interimTranscript.value = data.result;
      } else if (data.type === "FIN_TEXT") {
        // 最终结果用句号分隔追加
        if (data.result) {
          transcript.value += transcript.value ? '。' + data.result : data.result;
        }
        interimTranscript.value = '';
      }
    } catch (e) {
      console.error('解析错误:', e);
    }
  };

  socket.onclose = (event) => {
    if (isRecording.value) stopRecording();
  };

  socket.onerror = (error) => {
    recognitionError.value = 'network';
    stopRecording();
  };
};

// 将Float32转换为Int16
const convertFloat32ToInt16 = (buffer) => {
  const length = buffer.length;
  const buf = new Int16Array(length);
  for (let i = 0; i < length; i++) {
    buf[i] = Math.min(1, buffer[i]) * 32767;
  }
  return buf;
};

// 错误处理
const handleApiError = (data) => {
  // 忽略"未检测到有效语音"等非关键错误
  if (data.err_no === -3005 || data.err_no === -3006) {
    // console.log('语音检测提示:', data.err_msg);
    return;
  }

  recognitionError.value = data.err_no || 'service-error';
  // console.error('识别错误:', data.err_msg || '未知错误');
};

const handleError = (error) => {
  recognitionError.value = error.name === 'NotAllowedError' ?
    'not-allowed' :
    error.message.includes('network') ? 'network' : 'audio-capture';
};

// 控制方法
const stopRecording = () => {
  isRecording.value = false;

  // 发送结束帧
  if (socket && socket.readyState === WebSocket.OPEN) {
    socket.send(JSON.stringify({ type: "FINISH" }));
    // 不立即关闭,等待服务端关闭
  }

  // 清理资源
  if (processor) {
    processor.disconnect();
    processor = null;
  }

  if (audioContext) {
    audioContext.close().catch(console.error);
    audioContext = null;
  }

  if (mediaStream) {
    mediaStream.getTracks().forEach(track => track.stop());
    mediaStream = null;
  }

  // 合并临时结果到最终结果
  if (interimTranscript.value) {
    transcript.value += interimTranscript.value + "\n";
    interimTranscript.value = '';
  }
};

const toggleRecording = () => {
  isRecording.value ? stopRecording() : initRecording();
};

const closeModal = () => {
  stopRecording();
  if (socket) {
    socket.close();
    socket = null;
  }
  emit('close');
};

const confirmResult = () => {
  emit('confirm', transcript.value);
  closeModal();
};

const handleOverlayClick = (event) => {
  if (event.target === event.currentTarget) closeModal();
};

// 生命周期
onMounted(() => {
  isBrowserSupported.value = !!navigator.mediaDevices && !!window.WebSocket;
});

onBeforeUnmount(() => {
  stopRecording();
  if (socket) {
    socket.close();
    socket = null;
  }
});
</script>

<style scoped>
/* 保持原有样式不变 */
.modal-overlay {
  position: fixed;
  top: 0;
  left: 0;
  right: 0;
  bottom: 0;
  background-color: rgba(0, 0, 0, 0.5);
  display: flex;
  justify-content: center;
  align-items: center;
  z-index: 1000;
}

.modal-container {
  background-color: white;
  border-radius: 12px;
  box-shadow: 0 4px 20px rgba(0, 0, 0, 0.15);
  width: 90%;
  max-width: 500px;
  max-height: 90vh;
  display: flex;
  flex-direction: column;
  overflow: hidden;
}

.modal-header {
  padding: 16px 24px;
  border-bottom: 1px solid #eee;
  display: flex;
  justify-content: space-between;
  align-items: center;
}

.modal-header h2 {
  margin: 0;
  font-size: 1.25rem;
  color: #333;
}

.close-button {
  background: none;
  border: none;
  font-size: 1.5rem;
  cursor: pointer;
  color: #666;
  padding: 0;
  line-height: 1;
  outline: none;
}

.modal-body {
  padding: 24px;
  flex: 1;
  overflow-y: auto;
}

.status-indicator {
  display: flex;
  flex-direction: column;
  align-items: center;
  margin-bottom: 24px;
}

.mic-animation {
  display: flex;
  align-items: center;
  gap: 12px;
  margin-bottom: 8px;
}

.mic-icon svg,
.mic-ready svg {
  width: 36px;
  height: 36px;
  fill: #4a6cf7;
}

.sound-wave {
  display: flex;
  align-items: center;
  gap: 4px;
  height: 36px;
}

.wave {
  width: 6px;
  height: 16px;
  background-color: #4a6cf7;
  border-radius: 3px;
  animation: wave 1.2s infinite ease-in-out;
}

.wave:nth-child(1) {
  animation-delay: -0.6s;
}

.wave:nth-child(2) {
  animation-delay: -0.3s;
}

.wave:nth-child(3) {
  animation-delay: 0s;
}

@keyframes wave {

  0%,
  60%,
  100% {
    transform: scaleY(0.4);
  }

  30% {
    transform: scaleY(1);
  }
}

.mic-ready svg {
  opacity: 0.7;
}

.status-text {
  margin: 0;
  color: #666;
  font-size: 0.9rem;
  text-align: center;
  font-weight: 500;
  color: #333;
  margin-bottom: 4px;
}

.result-container {
  background-color: #f8f9fa;
  border-radius: 8px;
  padding: 16px;
  min-height: 120px;
}

.voice-hint {
  display: flex;
  align-items: center;
  gap: 6px;
  margin-top: 8px;
  color: #ff9800;
  font-size: 0.85rem;
}

.result-content {
  color: #666;
  font-size: 0.95rem;
  line-height: 1.5;
}

.result-content.has-result {
  color: #333;
}

.info-message {
  display: flex;
  align-items: center;
  gap: 8px;
  margin-top: 8px;
  color: #666;
  font-size: 0.8rem;
}

.modal-footer {
  padding: 16px 24px;
  border-top: 1px solid #eee;
  display: flex;
  justify-content: flex-end;
  gap: 12px;
}

.control-button {
  padding: 8px 16px;
  background-color: #f0f2f5;
  border: none;
  border-radius: 6px;
  color: #333;
  cursor: pointer;
  font-weight: 500;
  transition: all 0.2s;
}

.control-button.listening {
  background-color: #ffebee;
  color: #f44336;
}

.control-button:hover {
  background-color: #e4e6eb;
}

.control-button:disabled {
  background-color: #e0e0e0;
  color: #9e9e9e;
  cursor: not-allowed;
}

.confirm-button {
  padding: 8px 16px;
  background-color: #4a6cf7;
  border: none;
  border-radius: 6px;
  color: white;
  cursor: pointer;
  font-weight: 500;
  transition: background-color 0.2s;
}

.confirm-button:hover {
  background-color: #3a5bd9;
}

.confirm-button:disabled {
  background-color: #cccccc;
  cursor: not-allowed;
}

.error-message {
  display: flex;
  align-items: center;
  justify-content: center;
  gap: 8px;
  margin-top: 12px;
  padding: 8px 12px;
  background-color: #ffebee;
  border-radius: 6px;
  color: #d32f2f;
  font-size: 0.9rem;
}

.error-icon {
  width: 18px;
  height: 18px;
  fill: #d32f2f;
}

.modal-fade-enter-active,
.modal-fade-leave-active {
  transition: opacity 0.3s ease;
}

.modal-fade-enter-from,
.modal-fade-leave-to {
  opacity: 0;
}
</style>

组件使用方法

javascript 复制代码
<SpeechRecognitionModal v-if="showModal" :isOpen="showModal" @close="showModal = false" @confirm="handleRecognitionResult" />

const inputText = ref('');
const showModal = ref(false);
const handleRecognitionResult = (text: any) => {
  inputText.value = text;
};
相关推荐
NAGNIP1 分钟前
一文搞懂深度学习中的通用逼近定理!
人工智能·算法·面试
冬奇Lab1 小时前
一天一个开源项目(第36篇):EverMemOS - 跨 LLM 与平台的长时记忆 OS,让 Agent 会记忆更会推理
人工智能·开源·资讯
冬奇Lab1 小时前
OpenClaw 源码深度解析(一):Gateway——为什么需要一个"中枢"
人工智能·开源·源码阅读
AngelPP5 小时前
OpenClaw 架构深度解析:如何把 AI 助手搬到你的个人设备上
人工智能
宅小年5 小时前
Claude Code 换成了Kimi K2.5后,我再也回不去了
人工智能·ai编程·claude
九狼5 小时前
Flutter URL Scheme 跨平台跳转
人工智能·flutter·github
ZFSS5 小时前
Kimi Chat Completion API 申请及使用
前端·人工智能
天翼云开发者社区7 小时前
春节复工福利就位!天翼云息壤2500万Tokens免费送,全品类大模型一键畅玩!
人工智能·算力服务·息壤
知识浅谈7 小时前
教你如何用 Gemini 将课本图片一键转为精美 PPT
人工智能
Ray Liang7 小时前
被低估的量化版模型,小身材也能干大事
人工智能·ai·ai助手·mindx