阿里云实时语音识别

前端:

javascript 复制代码
<script setup>
import { ref, onMounted, onBeforeUnmount } from 'vue'

const isRecording = ref(false)
const transcript = ref('')
let mediaRecorder = null
let ws = null

onMounted(() => {
  ws = new WebSocket('ws://localhost:3002')
  ws.onopen = () => {
    console.log('WebSocket连接成功')
  }
  ws.onerror = (event) => {
    console.error('WebSocket连接错误:', event)
  }
  ws.onmessage = (event) => {
    console.log("接收到消息:",event.data)
    transcript.value = event.data
  }
})

onBeforeUnmount(() => {
  if (ws) ws.close()
})

const startRecording = async () => {
  if (isRecording.value) return
  isRecording.value = true
  transcript.value = ''

  // 获取音频流
  const stream = await navigator.mediaDevices.getUserMedia({ audio: true })
  // 采集为标准WAV格式
  mediaRecorder = new MediaRecorder(stream, { mimeType: 'audio/webm;codecs=opus' })

  mediaRecorder.ondataavailable = (e) => {
    if (e.data.size > 0 && ws && ws.readyState === 1) {
      e.data.arrayBuffer().then(buffer => {
        // console.log(buffer)
        ws.send(buffer)
      })
    }
  }
  mediaRecorder.onstop = () => {
    console.log("停止录音")
    setTimeout(() => {
      // ws.send(JSON.stringify({ type: 'voiceToTextEnd'}))
    }, 500)
  }
  // ws.send(JSON.stringify({ type: 'voiceToTextStart'}))
  mediaRecorder.start(500) // 每500ms发送一次数据
}

const stopRecording = () => {
  if (!isRecording.value) return
  isRecording.value = false
  if (mediaRecorder) {
    mediaRecorder.stop()
    mediaRecorder.stream.getTracks().forEach(track => track.stop())
  }
}
</script>

<template>
  <div style="padding: 40px; max-width: 600px; margin: auto;">
    <el-button type="primary" @click="isRecording ? stopRecording() : startRecording()">
      {{ isRecording ? '停止录音' : '开始录音' }}
    </el-button>
    <div style="margin-top: 30px;">
      <el-card>
        <div>识别文本:</div>
        <div style="min-height: 40px; color: #333;width: 600px;">{{ transcript }}</div>
      </el-card>
    </div>
  </div>
</template>

<style scoped>
.logo {
  height: 6em;
  padding: 1.5em;
  will-change: filter;
  transition: filter 300ms;
}
.logo:hover {
  filter: drop-shadow(0 0 2em #646cffaa);
}
.logo.vue:hover {
  filter: drop-shadow(0 0 2em #42b883aa);
}
</style>

后端:

javascript 复制代码
const express = require('express');
const http = require('http');
const WebSocket = require('ws');
const Nls = require('alibabacloud-nls');
const { PassThrough } = require('stream');
const ffmpegPath = require('@ffmpeg-installer/ffmpeg').path;
const ffmpeg = require('fluent-ffmpeg');

const app = express();
const server = http.createServer(app);
const wss = new WebSocket.Server({ server });

const ALI_APP_KEY = '';// 请手动填写有效ALI_APP_KEY
const ALI_TOKEN = ''; // 请手动填写有效token

ffmpeg.setFfmpegPath(ffmpegPath);

let transcriber = null;
let ffmpegStream = null;
let inputStream = null;

wss.on('connection', (ws) => {
  console.log('新用户连接WebSocket成功')


  ws.on('message', async (data) => {
    if (!ffmpegStream) {
      inputStream = new PassThrough();
      ffmpegStream = ffmpeg()
        .input(inputStream)
        .inputFormat('webm')
        .inputOptions('-fflags +genpts')
        .audioCodec('pcm_s16le')
        .audioChannels(1)
        .audioFrequency(16000)
        .format('s16le')
        .outputOptions('-f s16le')
        .outputOptions('-acodec pcm_s16le')
        .outputOptions('-ar 16000')
        .outputOptions('-ac 1')
        .pipe();

      // 用手动填写的 token 实例化识别对象
      transcriber = new Nls.SpeechTranscription({
        url: 'wss://nls-gateway.cn-shanghai.aliyuncs.com/ws/v1',
        appkey: ALI_APP_KEY,
        token: ALI_TOKEN
      });

      transcriber.on('started', (msg) => {
        console.log('开始识别:',msg)
      });
      transcriber.on('changed', (msg) => {
        const data = JSON.parse(msg)
        console.log('changed:',data)
        ws.send(JSON.stringify({type: 'changed', content: data.payload?.result||''}));     
      });
      transcriber.on('completed', (msg) => {
        const data = JSON.parse(msg)
        console.log("completed:",data)
        ws.send(JSON.stringify({type: 'completed', content: data.payload?.result||''}));
      });
      transcriber.on('failed', (msg) => {
        console.log('识别失败:',msg)
      });
      transcriber.on('closed', () => {
        console.log('连接关闭')
      });

      // 启动识别
      try {
        await transcriber.start(transcriber.defaultStartParams(), true, 6000);
      } catch (err) {
         console.log('[识别启动失败] ' + err);
        return;
      }

      ffmpegStream.on('data', (pcmChunk) => {
        transcriber.sendAudio(pcmChunk);
      });      
      ffmpegStream.on('error', (err) => {
        stopAliyunTranscription()
      })
      ffmpegStream.on('end', () => {
        stopAliyunTranscription();
      })
    }
    inputStream.write(Buffer.from(data));
  });

  ws.on('close', async () => {
    stopAliyunTranscription()
  });
  
});
const stopAliyunTranscription = () => {
  if (inputStream) {
      inputStream.end()
      inputStream = null
  }
  if (ffmpegStream) {
      ffmpegStream.end()
      ffmpegStream = null
  }
  if (transcriber) {
      transcriber.shutdown()
      transcriber = null
  }
}
server.listen(3002, () => {
  console.log('WebSocket server running on ws://localhost:3002');
}); 
相关推荐
星云数灵33 分钟前
信息系统项目的范围管理(12345智慧政务)
人工智能·信息系统项目管理·软考高项·软考高项优秀论文·论文写作得分技巧
智源研究院官方账号1 小时前
众智FlagOS 1.5发布:统一开源大模型系统软件栈,更全面、AI赋能更高效
人工智能·开源
小小测试开发1 小时前
给贾维斯加“手势控制”:从原理到落地,打造多模态交互的本地智能助
人工智能·python·交互
强盛小灵通专卖员1 小时前
airsim多无人机+无人车联合仿真辅导
人工智能·无人机·中文核心期刊·小论文·延毕·淘宝店铺-闪电科创
l12345sy1 小时前
Day31_【 NLP _1.文本预处理 _(2)文本张量表示方法】
人工智能·自然语言处理·word2vec·word embedding·cbow·skipgram
云卓SKYDROID1 小时前
无人机信号模块:技术要点与断联应对指南
人工智能·无人机·高科技·云卓科技
真智AI2 小时前
[特殊字符] AI时代依然不可或缺:精通后端开发的10个GitHub宝藏仓库
人工智能·github·系统设计·后端开发·github资源·编码实践
deepdata_cn2 小时前
大语言模型(LLM)的基本概念
人工智能·语言模型·自然语言处理
草莓熊Lotso2 小时前
从 “Hello AI” 到企业级应用:Spring AI 如何重塑 Java 生态的 AI 开发
java·人工智能·经验分享·后端·spring
大千AI助手3 小时前
BIG-Bench:大规模语言模型能力的全面评估与挑战
人工智能·语言模型·自然语言处理·大模型·图灵测试·big-bench·bbh