阿里云实时语音识别

前端:

javascript 复制代码
<script setup>
import { ref, onMounted, onBeforeUnmount } from 'vue'

const isRecording = ref(false)
const transcript = ref('')
let mediaRecorder = null
let ws = null

onMounted(() => {
  ws = new WebSocket('ws://localhost:3002')
  ws.onopen = () => {
    console.log('WebSocket连接成功')
  }
  ws.onerror = (event) => {
    console.error('WebSocket连接错误:', event)
  }
  ws.onmessage = (event) => {
    console.log("接收到消息:",event.data)
    transcript.value = event.data
  }
})

onBeforeUnmount(() => {
  if (ws) ws.close()
})

const startRecording = async () => {
  if (isRecording.value) return
  isRecording.value = true
  transcript.value = ''

  // 获取音频流
  const stream = await navigator.mediaDevices.getUserMedia({ audio: true })
  // 采集为标准WAV格式
  mediaRecorder = new MediaRecorder(stream, { mimeType: 'audio/webm;codecs=opus' })

  mediaRecorder.ondataavailable = (e) => {
    if (e.data.size > 0 && ws && ws.readyState === 1) {
      e.data.arrayBuffer().then(buffer => {
        // console.log(buffer)
        ws.send(buffer)
      })
    }
  }
  mediaRecorder.onstop = () => {
    console.log("停止录音")
    setTimeout(() => {
      // ws.send(JSON.stringify({ type: 'voiceToTextEnd'}))
    }, 500)
  }
  // ws.send(JSON.stringify({ type: 'voiceToTextStart'}))
  mediaRecorder.start(500) // 每500ms发送一次数据
}

const stopRecording = () => {
  if (!isRecording.value) return
  isRecording.value = false
  if (mediaRecorder) {
    mediaRecorder.stop()
    mediaRecorder.stream.getTracks().forEach(track => track.stop())
  }
}
</script>

<template>
  <div style="padding: 40px; max-width: 600px; margin: auto;">
    <el-button type="primary" @click="isRecording ? stopRecording() : startRecording()">
      {{ isRecording ? '停止录音' : '开始录音' }}
    </el-button>
    <div style="margin-top: 30px;">
      <el-card>
        <div>识别文本:</div>
        <div style="min-height: 40px; color: #333;width: 600px;">{{ transcript }}</div>
      </el-card>
    </div>
  </div>
</template>

<style scoped>
.logo {
  height: 6em;
  padding: 1.5em;
  will-change: filter;
  transition: filter 300ms;
}
.logo:hover {
  filter: drop-shadow(0 0 2em #646cffaa);
}
.logo.vue:hover {
  filter: drop-shadow(0 0 2em #42b883aa);
}
</style>

后端:

javascript 复制代码
const express = require('express');
const http = require('http');
const WebSocket = require('ws');
const Nls = require('alibabacloud-nls');
const { PassThrough } = require('stream');
const ffmpegPath = require('@ffmpeg-installer/ffmpeg').path;
const ffmpeg = require('fluent-ffmpeg');

const app = express();
const server = http.createServer(app);
const wss = new WebSocket.Server({ server });

const ALI_APP_KEY = '';// 请手动填写有效ALI_APP_KEY
const ALI_TOKEN = ''; // 请手动填写有效token

ffmpeg.setFfmpegPath(ffmpegPath);

let transcriber = null;
let ffmpegStream = null;
let inputStream = null;

wss.on('connection', (ws) => {
  console.log('新用户连接WebSocket成功')


  ws.on('message', async (data) => {
    if (!ffmpegStream) {
      inputStream = new PassThrough();
      ffmpegStream = ffmpeg()
        .input(inputStream)
        .inputFormat('webm')
        .inputOptions('-fflags +genpts')
        .audioCodec('pcm_s16le')
        .audioChannels(1)
        .audioFrequency(16000)
        .format('s16le')
        .outputOptions('-f s16le')
        .outputOptions('-acodec pcm_s16le')
        .outputOptions('-ar 16000')
        .outputOptions('-ac 1')
        .pipe();

      // 用手动填写的 token 实例化识别对象
      transcriber = new Nls.SpeechTranscription({
        url: 'wss://nls-gateway.cn-shanghai.aliyuncs.com/ws/v1',
        appkey: ALI_APP_KEY,
        token: ALI_TOKEN
      });

      transcriber.on('started', (msg) => {
        console.log('开始识别:',msg)
      });
      transcriber.on('changed', (msg) => {
        const data = JSON.parse(msg)
        console.log('changed:',data)
        ws.send(JSON.stringify({type: 'changed', content: data.payload?.result||''}));     
      });
      transcriber.on('completed', (msg) => {
        const data = JSON.parse(msg)
        console.log("completed:",data)
        ws.send(JSON.stringify({type: 'completed', content: data.payload?.result||''}));
      });
      transcriber.on('failed', (msg) => {
        console.log('识别失败:',msg)
      });
      transcriber.on('closed', () => {
        console.log('连接关闭')
      });

      // 启动识别
      try {
        await transcriber.start(transcriber.defaultStartParams(), true, 6000);
      } catch (err) {
         console.log('[识别启动失败] ' + err);
        return;
      }

      ffmpegStream.on('data', (pcmChunk) => {
        transcriber.sendAudio(pcmChunk);
      });      
      ffmpegStream.on('error', (err) => {
        stopAliyunTranscription()
      })
      ffmpegStream.on('end', () => {
        stopAliyunTranscription();
      })
    }
    inputStream.write(Buffer.from(data));
  });

  ws.on('close', async () => {
    stopAliyunTranscription()
  });
  
});
const stopAliyunTranscription = () => {
  if (inputStream) {
      inputStream.end()
      inputStream = null
  }
  if (ffmpegStream) {
      ffmpegStream.end()
      ffmpegStream = null
  }
  if (transcriber) {
      transcriber.shutdown()
      transcriber = null
  }
}
server.listen(3002, () => {
  console.log('WebSocket server running on ws://localhost:3002');
}); 
相关推荐
小a彤4 分钟前
elec-ops-inspection:电力巡检缺陷检测,NPU推理速度提升3倍
人工智能·cann
ZhengEnCi36 分钟前
09aaa-LayerNorm是什么?
人工智能
这是谁的博客?39 分钟前
AI Agent 安全架构设计:漏洞分析与防护策略深度解析
人工智能·安全·网络安全·ai·agent·安全架构·架构设计
人月神话-Lee44 分钟前
【图像处理】Sobel 边缘检测——让机器“看见“轮廓
图像处理·人工智能·计算机视觉·ios·ai编程·swift
冬奇Lab1 小时前
Agent系列(四):工具调用深度解析——Agent 的手和眼
人工智能·llm
Black蜡笔小新1 小时前
自动化AI算法训练服务器DLTM助力医学影像分析进入AI智能分析新时代
人工智能·算法·自动化
冬奇Lab2 小时前
一天一个开源项目(第111篇):Understand Anything - 把代码库变成可探索知识图谱的 AI 引擎
人工智能·开源·llm
猿饵块2 小时前
git--github
人工智能
黎阳之光2 小时前
黎阳之光:以视频孪生重构智慧防火,打造“天空地人智”一体化森林防火新范式
大数据·运维·人工智能·物联网·安全
why技术2 小时前
AI Coding开始进入第四个时代,我还没上车呢!
前端·人工智能·后端