阿里云实时语音识别

前端:

javascript 复制代码
<script setup>
import { ref, onMounted, onBeforeUnmount } from 'vue'

const isRecording = ref(false)
const transcript = ref('')
let mediaRecorder = null
let ws = null

onMounted(() => {
  ws = new WebSocket('ws://localhost:3002')
  ws.onopen = () => {
    console.log('WebSocket连接成功')
  }
  ws.onerror = (event) => {
    console.error('WebSocket连接错误:', event)
  }
  ws.onmessage = (event) => {
    console.log("接收到消息:",event.data)
    transcript.value = event.data
  }
})

onBeforeUnmount(() => {
  if (ws) ws.close()
})

const startRecording = async () => {
  if (isRecording.value) return
  isRecording.value = true
  transcript.value = ''

  // 获取音频流
  const stream = await navigator.mediaDevices.getUserMedia({ audio: true })
  // 采集为标准WAV格式
  mediaRecorder = new MediaRecorder(stream, { mimeType: 'audio/webm;codecs=opus' })

  mediaRecorder.ondataavailable = (e) => {
    if (e.data.size > 0 && ws && ws.readyState === 1) {
      e.data.arrayBuffer().then(buffer => {
        // console.log(buffer)
        ws.send(buffer)
      })
    }
  }
  mediaRecorder.onstop = () => {
    console.log("停止录音")
    setTimeout(() => {
      // ws.send(JSON.stringify({ type: 'voiceToTextEnd'}))
    }, 500)
  }
  // ws.send(JSON.stringify({ type: 'voiceToTextStart'}))
  mediaRecorder.start(500) // 每500ms发送一次数据
}

const stopRecording = () => {
  if (!isRecording.value) return
  isRecording.value = false
  if (mediaRecorder) {
    mediaRecorder.stop()
    mediaRecorder.stream.getTracks().forEach(track => track.stop())
  }
}
</script>

<template>
  <div style="padding: 40px; max-width: 600px; margin: auto;">
    <el-button type="primary" @click="isRecording ? stopRecording() : startRecording()">
      {{ isRecording ? '停止录音' : '开始录音' }}
    </el-button>
    <div style="margin-top: 30px;">
      <el-card>
        <div>识别文本:</div>
        <div style="min-height: 40px; color: #333;width: 600px;">{{ transcript }}</div>
      </el-card>
    </div>
  </div>
</template>

<style scoped>
.logo {
  height: 6em;
  padding: 1.5em;
  will-change: filter;
  transition: filter 300ms;
}
.logo:hover {
  filter: drop-shadow(0 0 2em #646cffaa);
}
.logo.vue:hover {
  filter: drop-shadow(0 0 2em #42b883aa);
}
</style>

后端:

javascript 复制代码
const express = require('express');
const http = require('http');
const WebSocket = require('ws');
const Nls = require('alibabacloud-nls');
const { PassThrough } = require('stream');
const ffmpegPath = require('@ffmpeg-installer/ffmpeg').path;
const ffmpeg = require('fluent-ffmpeg');

const app = express();
const server = http.createServer(app);
const wss = new WebSocket.Server({ server });

const ALI_APP_KEY = '';// 请手动填写有效ALI_APP_KEY
const ALI_TOKEN = ''; // 请手动填写有效token

ffmpeg.setFfmpegPath(ffmpegPath);

let transcriber = null;
let ffmpegStream = null;
let inputStream = null;

wss.on('connection', (ws) => {
  console.log('新用户连接WebSocket成功')


  ws.on('message', async (data) => {
    if (!ffmpegStream) {
      inputStream = new PassThrough();
      ffmpegStream = ffmpeg()
        .input(inputStream)
        .inputFormat('webm')
        .inputOptions('-fflags +genpts')
        .audioCodec('pcm_s16le')
        .audioChannels(1)
        .audioFrequency(16000)
        .format('s16le')
        .outputOptions('-f s16le')
        .outputOptions('-acodec pcm_s16le')
        .outputOptions('-ar 16000')
        .outputOptions('-ac 1')
        .pipe();

      // 用手动填写的 token 实例化识别对象
      transcriber = new Nls.SpeechTranscription({
        url: 'wss://nls-gateway.cn-shanghai.aliyuncs.com/ws/v1',
        appkey: ALI_APP_KEY,
        token: ALI_TOKEN
      });

      transcriber.on('started', (msg) => {
        console.log('开始识别:',msg)
      });
      transcriber.on('changed', (msg) => {
        const data = JSON.parse(msg)
        console.log('changed:',data)
        ws.send(JSON.stringify({type: 'changed', content: data.payload?.result||''}));     
      });
      transcriber.on('completed', (msg) => {
        const data = JSON.parse(msg)
        console.log("completed:",data)
        ws.send(JSON.stringify({type: 'completed', content: data.payload?.result||''}));
      });
      transcriber.on('failed', (msg) => {
        console.log('识别失败:',msg)
      });
      transcriber.on('closed', () => {
        console.log('连接关闭')
      });

      // 启动识别
      try {
        await transcriber.start(transcriber.defaultStartParams(), true, 6000);
      } catch (err) {
         console.log('[识别启动失败] ' + err);
        return;
      }

      ffmpegStream.on('data', (pcmChunk) => {
        transcriber.sendAudio(pcmChunk);
      });      
      ffmpegStream.on('error', (err) => {
        stopAliyunTranscription()
      })
      ffmpegStream.on('end', () => {
        stopAliyunTranscription();
      })
    }
    inputStream.write(Buffer.from(data));
  });

  ws.on('close', async () => {
    stopAliyunTranscription()
  });
  
});
const stopAliyunTranscription = () => {
  if (inputStream) {
      inputStream.end()
      inputStream = null
  }
  if (ffmpegStream) {
      ffmpegStream.end()
      ffmpegStream = null
  }
  if (transcriber) {
      transcriber.shutdown()
      transcriber = null
  }
}
server.listen(3002, () => {
  console.log('WebSocket server running on ws://localhost:3002');
}); 
相关推荐
小和尚同志10 分钟前
Cline | Cline + Grok3 免费 AI 编程新体验
人工智能·aigc
我就是全世界22 分钟前
TensorRT-LLM:大模型推理加速的核心技术与实践优势
人工智能·机器学习·性能优化·大模型·tensorrt-llm
.30-06Springfield25 分钟前
决策树(Decision tree)算法详解(ID3、C4.5、CART)
人工智能·python·算法·决策树·机器学习
我不是哆啦A梦26 分钟前
破解风电运维“百模大战”困局,机械版ChatGPT诞生?
运维·人工智能·python·算法·chatgpt
galaxylove38 分钟前
Gartner发布塑造安全运营未来的关键 AI 自动化趋势
人工智能·安全·自动化
强哥之神2 小时前
英伟达发布 Llama Nemotron Nano 4B:专为边缘 AI 和科研任务优化的高效开源推理模型
人工智能·深度学习·语言模型·架构·llm·transformer·边缘计算
Green1Leaves2 小时前
pytorch学习-9.多分类问题
人工智能·pytorch·学习
kyle~2 小时前
计算机视觉---RealSense深度相机技术
人工智能·数码相机·计算机视觉·机器人·嵌入式·ros·传感器
碣石潇湘无限路3 小时前
【AI篇】当Transformer模型开始学习《孙子兵法》
人工智能·学习
看到我,请让我去学习3 小时前
OpenCV开发-初始概念
人工智能·opencv·计算机视觉