阿里云实时语音识别

前端:

javascript 复制代码
<script setup>
import { ref, onMounted, onBeforeUnmount } from 'vue'

const isRecording = ref(false)
const transcript = ref('')
let mediaRecorder = null
let ws = null

onMounted(() => {
  ws = new WebSocket('ws://localhost:3002')
  ws.onopen = () => {
    console.log('WebSocket连接成功')
  }
  ws.onerror = (event) => {
    console.error('WebSocket连接错误:', event)
  }
  ws.onmessage = (event) => {
    console.log("接收到消息:",event.data)
    transcript.value = event.data
  }
})

onBeforeUnmount(() => {
  if (ws) ws.close()
})

const startRecording = async () => {
  if (isRecording.value) return
  isRecording.value = true
  transcript.value = ''

  // 获取音频流
  const stream = await navigator.mediaDevices.getUserMedia({ audio: true })
  // 采集为标准WAV格式
  mediaRecorder = new MediaRecorder(stream, { mimeType: 'audio/webm;codecs=opus' })

  mediaRecorder.ondataavailable = (e) => {
    if (e.data.size > 0 && ws && ws.readyState === 1) {
      e.data.arrayBuffer().then(buffer => {
        // console.log(buffer)
        ws.send(buffer)
      })
    }
  }
  mediaRecorder.onstop = () => {
    console.log("停止录音")
    setTimeout(() => {
      // ws.send(JSON.stringify({ type: 'voiceToTextEnd'}))
    }, 500)
  }
  // ws.send(JSON.stringify({ type: 'voiceToTextStart'}))
  mediaRecorder.start(500) // 每500ms发送一次数据
}

const stopRecording = () => {
  if (!isRecording.value) return
  isRecording.value = false
  if (mediaRecorder) {
    mediaRecorder.stop()
    mediaRecorder.stream.getTracks().forEach(track => track.stop())
  }
}
</script>

<template>
  <div style="padding: 40px; max-width: 600px; margin: auto;">
    <el-button type="primary" @click="isRecording ? stopRecording() : startRecording()">
      {{ isRecording ? '停止录音' : '开始录音' }}
    </el-button>
    <div style="margin-top: 30px;">
      <el-card>
        <div>识别文本:</div>
        <div style="min-height: 40px; color: #333;width: 600px;">{{ transcript }}</div>
      </el-card>
    </div>
  </div>
</template>

<style scoped>
.logo {
  height: 6em;
  padding: 1.5em;
  will-change: filter;
  transition: filter 300ms;
}
.logo:hover {
  filter: drop-shadow(0 0 2em #646cffaa);
}
.logo.vue:hover {
  filter: drop-shadow(0 0 2em #42b883aa);
}
</style>

后端:

javascript 复制代码
const express = require('express');
const http = require('http');
const WebSocket = require('ws');
const Nls = require('alibabacloud-nls');
const { PassThrough } = require('stream');
const ffmpegPath = require('@ffmpeg-installer/ffmpeg').path;
const ffmpeg = require('fluent-ffmpeg');

const app = express();
const server = http.createServer(app);
const wss = new WebSocket.Server({ server });

const ALI_APP_KEY = '';// 请手动填写有效ALI_APP_KEY
const ALI_TOKEN = ''; // 请手动填写有效token

ffmpeg.setFfmpegPath(ffmpegPath);

let transcriber = null;
let ffmpegStream = null;
let inputStream = null;

wss.on('connection', (ws) => {
  console.log('新用户连接WebSocket成功')


  ws.on('message', async (data) => {
    if (!ffmpegStream) {
      inputStream = new PassThrough();
      ffmpegStream = ffmpeg()
        .input(inputStream)
        .inputFormat('webm')
        .inputOptions('-fflags +genpts')
        .audioCodec('pcm_s16le')
        .audioChannels(1)
        .audioFrequency(16000)
        .format('s16le')
        .outputOptions('-f s16le')
        .outputOptions('-acodec pcm_s16le')
        .outputOptions('-ar 16000')
        .outputOptions('-ac 1')
        .pipe();

      // 用手动填写的 token 实例化识别对象
      transcriber = new Nls.SpeechTranscription({
        url: 'wss://nls-gateway.cn-shanghai.aliyuncs.com/ws/v1',
        appkey: ALI_APP_KEY,
        token: ALI_TOKEN
      });

      transcriber.on('started', (msg) => {
        console.log('开始识别:',msg)
      });
      transcriber.on('changed', (msg) => {
        const data = JSON.parse(msg)
        console.log('changed:',data)
        ws.send(JSON.stringify({type: 'changed', content: data.payload?.result||''}));     
      });
      transcriber.on('completed', (msg) => {
        const data = JSON.parse(msg)
        console.log("completed:",data)
        ws.send(JSON.stringify({type: 'completed', content: data.payload?.result||''}));
      });
      transcriber.on('failed', (msg) => {
        console.log('识别失败:',msg)
      });
      transcriber.on('closed', () => {
        console.log('连接关闭')
      });

      // 启动识别
      try {
        await transcriber.start(transcriber.defaultStartParams(), true, 6000);
      } catch (err) {
         console.log('[识别启动失败] ' + err);
        return;
      }

      ffmpegStream.on('data', (pcmChunk) => {
        transcriber.sendAudio(pcmChunk);
      });      
      ffmpegStream.on('error', (err) => {
        stopAliyunTranscription()
      })
      ffmpegStream.on('end', () => {
        stopAliyunTranscription();
      })
    }
    inputStream.write(Buffer.from(data));
  });

  ws.on('close', async () => {
    stopAliyunTranscription()
  });
  
});
const stopAliyunTranscription = () => {
  if (inputStream) {
      inputStream.end()
      inputStream = null
  }
  if (ffmpegStream) {
      ffmpegStream.end()
      ffmpegStream = null
  }
  if (transcriber) {
      transcriber.shutdown()
      transcriber = null
  }
}
server.listen(3002, () => {
  console.log('WebSocket server running on ws://localhost:3002');
}); 
相关推荐
白-胖-子1 小时前
深入剖析大模型在文本生成式 AI 产品架构中的核心地位
人工智能·架构
想要成为计算机高手3 小时前
11. isaacsim4.2教程-Transform 树与Odometry
人工智能·机器人·自动驾驶·ros·rviz·isaac sim·仿真环境
静心问道3 小时前
InstructBLIP:通过指令微调迈向通用视觉-语言模型
人工智能·多模态·ai技术应用
宇称不守恒4.04 小时前
2025暑期—06神经网络-常见网络2
网络·人工智能·神经网络
小楓12014 小时前
醫護行業在未來會被AI淘汰嗎?
人工智能·醫療·護理·職業
数据与人工智能律师4 小时前
数字迷雾中的安全锚点:解码匿名化与假名化的法律边界与商业价值
大数据·网络·人工智能·云计算·区块链
chenchihwen4 小时前
大模型应用班-第2课 DeepSeek使用与提示词工程课程重点 学习ollama 安装 用deepseek-r1:1.5b 分析PDF 内容
人工智能·学习
说私域5 小时前
公域流量向私域流量转化策略研究——基于开源AI智能客服、AI智能名片与S2B2C商城小程序的融合应用
人工智能·小程序
Java樱木5 小时前
AI 编程工具 Trae 重要的升级。。。
人工智能
凪卄12135 小时前
图像预处理 二
人工智能·python·深度学习·计算机视觉·pycharm