前端:
javascript
<template>
<div class="app">
<h3>阿里云语音识别应用(一句话语音识别)</h3>
<div class="control-panel">
<button @click="toggleRecording" :class="{ recording: isRecording }">
{{ isRecording ? '停止录音' : '开始录音' }}
</button>
</div>
<div class="result-panel">
<h4>识别结果:</h4>
<p class="result-text">{{ recognitionResult }}</p>
</div>
<div class="status-panel">
<p>WebSocket状态: {{ websocketStatus }}</p>
<p v-if="usedMimeType">使用的音频格式: {{ usedMimeType }}</p>
</div>
<audio ref="audioRef" controls></audio>
</div>
</template>
<script setup>
import { ref, onMounted, onUnmounted } from 'vue'
// 状态管理
const isRecording = ref(false)
const recognitionResult = ref('')
const websocketStatus = ref('未连接')
const websocket = ref(null)
const mediaRecorder = ref(null)
const audioChunks = ref([])
const audioBlob = ref(null)
const usedMimeType = ref('')
const audioRef = ref(null)
// WebSocket连接地址,需与后端地址匹配
const WS_SERVER_URL = 'ws://localhost:8282'
// 支持的MIME类型列表(按优先级排序)
const SUPPORTED_MIME_TYPES = ['audio/ogg; codecs=opus', 'audio/webm; codecs=opus', 'audio/wav', 'audio/mpeg']
// 连接WebSocket - 保持不变
const connectWebSocket = () => {
try {
websocket.value = new WebSocket(WS_SERVER_URL)
websocket.value.onopen = () => {
websocketStatus.value = '已连接'
console.log('WebSocket连接已建立')
}
websocket.value.onmessage = (event) => {
// recognitionResult.value += event.data
try {
const msg = JSON.parse(event.data)
console.log('收到消息:', msg)
if (msg.type === 'changed') {
const msgData = JSON.parse(msg.data)
const result = msgData?.payload?.result || ''
console.log('识别信息:', result)
// recognitionResult.value = result
}
if (msg.type === 'completed') {
const msgData = JSON.parse(msg.data)
const result = msgData?.payload?.result || ''
console.log('识别结果:', result)
recognitionResult.value = result
}
} catch (error) {
console.error('结果解析错误:', error)
}
}
websocket.value.onclose = () => {
websocketStatus.value = '已断开'
console.log('WebSocket连接已关闭')
stopRecording()
}
websocket.value.onerror = (error) => {
websocketStatus.value = '连接错误'
console.error('WebSocket错误:', error)
}
} catch (error) {
websocketStatus.value = '连接失败'
console.error('WebSocket连接失败:', error)
}
}
// 开始/停止录音 - 改进自动检测格式
const toggleRecording = () => {
if (!isRecording.value) {
startRecording()
} else {
stopRecording()
}
}
// 开始录音 - 自动检测并使用支持的格式
const startRecording = async () => {
if (websocket.value?.readyState !== WebSocket.OPEN) {
connectWebSocket()
}
try {
// 请求麦克风权限
const stream = await navigator.mediaDevices.getUserMedia({ audio: true })
// const mimeType = MediaRecorder.isTypeSupported('audio/webm') ? 'audio/webm' : 'audio/mp4'
// 检测浏览器支持的MIME类型
const mimeType = detectSupportedMimeType()
if (!mimeType) {
throw new Error('浏览器不支持任何MediaRecorder音频格式')
}
usedMimeType.value = mimeType
console.log(`使用支持的MIME类型: ${mimeType}`)
mediaRecorder.value = new MediaRecorder(stream, { mimeType })
audioChunks.value = []
// 监听录音事件
mediaRecorder.value.ondataavailable = (event) => {
if (event.data.size > 0) {
audioChunks.value.push(event.data)
}
}
mediaRecorder.value.onstop = () => {
audioBlob.value = new Blob(audioChunks.value, { type: mimeType })
sendAudioToBackend()
createAndPlayAudio()
}
mediaRecorder.value.start()
isRecording.value = true
} catch (error) {
console.error('录音失败:', error)
alert(`录音失败: ${error.message}`)
}
}
// 检测浏览器支持的MIME类型
const detectSupportedMimeType = () => {
for (const mimeType of SUPPORTED_MIME_TYPES) {
if (MediaRecorder.isTypeSupported(mimeType)) {
return mimeType
}
}
return null
}
// 停止录音 - 保持不变
const stopRecording = () => {
if (mediaRecorder.value) {
mediaRecorder.value.stop()
// 立即释放麦克风权限
// mediaRecorder.value.ondataavailable = null
// mediaRecorder.value.onstop = null
mediaRecorder.value.stream?.getTracks().forEach((track) => track.stop())
isRecording.value = false
}
}
// 发送音频数据到后端 - 保持不变
const sendAudioToBackend = () => {
if (websocket.value?.readyState === WebSocket.OPEN && audioBlob.value) {
// const reader = new FileReader()
// reader.onload = (event) => {
// if (event.target.result) {
// websocket.value.send(event.target.result)
// // console.log('音频数据已发送到后端', event.target.result)
// }
// }
// reader.readAsArrayBuffer(audioBlob.value)
audioBlob.value.arrayBuffer().then((buffer) => {
console.log('Sending audio buffer to server', buffer)
websocket.value.send(buffer)
})
}
}
// 创建并播放音频
function createAndPlayAudio() {
// 合并音频块
const currentAudioBlob = new Blob(audioChunks.value, { type: 'audio/wav' })
// 创建音频URL
const audioUrl = URL.createObjectURL(currentAudioBlob)
// 设置音频源并播放
if (audioRef.value) {
audioRef.value.src = audioUrl
audioRef.value.load()
// 播放完成后清理
audioRef.value.onended = () => {
console.log('播放结束')
}
// 开始播放
audioRef.value.play().catch((error) => {
console.error('播放音频错误:', error)
})
}
}
// 组件挂载时连接WebSocket - 保持不变
onMounted(() => {
connectWebSocket()
})
// 组件卸载时断开连接 - 保持不变
onUnmounted(() => {
websocket.value?.close()
mediaRecorder.value?.stop()
})
</script>
<style scoped>
.app {
max-width: 600px;
margin: 0 auto;
padding: 20px;
font-family: Arial, sans-serif;
}
.control-panel {
margin-bottom: 20px;
}
button {
padding: 10px 20px;
font-size: 16px;
background-color: #4caf50;
color: white;
border: none;
border-radius: 4px;
cursor: pointer;
transition: background-color 0.3s;
}
button.recording {
background-color: #f44336;
}
button:hover {
opacity: 0.9;
}
.result-panel,
.status-panel {
background-color: #f5f5f5;
padding: 15px;
border-radius: 4px;
margin-bottom: 15px;
}
.result-text {
white-space: pre-wrap;
min-height: 80px;
border: 1px solid #ddd;
padding: 10px;
border-radius: 4px;
}
</style>
后台
javascript
/**
* 一句话语音识别,
* 采用阿里云alibabacloud-nls的SpeechRecognition
* 官方文档:https://help.aliyun.com/zh/isi/developer-reference/sdk-for-node-js-2?spm=a2c4g.11186623.help-menu-30413.d_3_2_0_7.435b3409Xlz6FL&scm=20140722.H_410563._.OR_help-T_cn~zh-V_1
*/
const WebSocket = require('ws')
const fs = require('fs')
const path = require('path')
const wav = require('wav')
const { promisify } = require('util')
const Nls = require('alibabacloud-nls')
const ffmpeg = require('fluent-ffmpeg')
const ffmpegInstaller = require('@ffmpeg-installer/ffmpeg')
// 阿里云配置
const ALIYUN_CONFIG = {
url: 'wss://nls-gateway.cn-shanghai.aliyuncs.com/ws/v1',
appkey: '', // 替换为你的Appkey
token: '' // 替换为你的Token
}
// 使用静态构建的FFmpeg
ffmpeg.setFfmpegPath(ffmpegInstaller.path)
// 创建WebSocket服务器
const wss = new WebSocket.Server({ port: 8282 })
console.log('WebSocket服务器运行在端口 8282')
// 处理WebSocket连接
wss.on('connection', (ws) => {
console.log('新客户端连接')
let aliyunTranscriber = null
const audioChunks = []
let isProcessing = false
let audioBuffer = Buffer.alloc(0)
// 处理客户端消息
ws.on('message', (message) => {
if (Buffer.isBuffer(message)) {
// changeToPCM(ws, message)
processAudioChunks(ws, message)
}
})
// 处理客户端断开连接
ws.on('close', () => {
console.log('客户端断开连接')
stopAliyunTranscription()
})
// 处理错误
ws.on('error', (error) => {
console.error('WebSocket错误:', error)
stopAliyunTranscription()
})
const changeToPCM = (ws, data) => {
audioBuffer = Buffer.concat([audioBuffer, data])
// 保存为临时文件
const tempPath = path.join(__dirname, 'temp_audio.webm')
const wavPath = path.join(__dirname, 'converted.wav')
fs.writeFileSync(tempPath, audioBuffer)
// 使用 ffmpeg 转换为 WAV
ffmpeg(tempPath)
.toFormat('wav')
.audioChannels(1)
.audioFrequency(16000)
.on('end', () => {
// 读取转换后的 WAV 文件
const wavBuffer = fs.readFileSync(wavPath)
const reader = new wav.Reader()
let pcmBuffer = Buffer.alloc(0)
reader.on('data', (chunk) => {
pcmBuffer = Buffer.concat([pcmBuffer, chunk])
})
reader.on('end', async () => {
startAliyunTranscription(ws, pcmBuffer)
try {
// 清理临时文件
fs.unlinkSync(tempPath)
fs.unlinkSync(wavPath)
} catch (e) {
console.log('清理临时文件失败:', e.message)
}
})
reader.on('error', (err) => {
ws.send(JSON.stringify({ type: 'error', data: '音频解析失败' }))
})
reader.end(wavBuffer)
})
.on('error', (err) => {
console.error('音频转换失败:', err)
ws.send(JSON.stringify({ type: 'error', data: '音频转换失败' }))
})
.save(wavPath)
}
// 处理音频数据块 - 直接生成PCM
async function processAudioChunks(ws, data) {
isProcessing = true
const audioData = Buffer.from(data)
audioChunks.push(audioData)
console.log('接收到音频数据块,准备转换为PCM')
try {
const mergedAudio = Buffer.concat(audioChunks)
audioChunks.length = 0
const originalFilePath = path.join(__dirname, 'temp.audio')
await promisify(fs.writeFile)(originalFilePath, mergedAudio)
// 直接转换为PCM格式
const pcmData = await convertToPcm(originalFilePath)
fs.unlinkSync(originalFilePath)
await startAliyunTranscription(ws, pcmData)
} catch (error) {
console.error('处理音频时出错:', error)
ws.send(JSON.stringify({ type: 'error', data: '处理音频时出错' }))
} finally {
isProcessing = false
}
}
// 启动阿里云语音识别
async function startAliyunTranscription(ws, pcmBuffer) {
try {
stopAliyunTranscription()
aliyunTranscriber = new Nls.SpeechRecognition({
url: ALIYUN_CONFIG.url,
appkey: ALIYUN_CONFIG.appkey,
token: ALIYUN_CONFIG.token
})
aliyunTranscriber.on('started', (msg) => {
console.log('阿里云识别开始:', msg)
})
aliyunTranscriber.on('changed', (msg) => {
console.log('阿里云中间识别结果:', msg)
ws.send(JSON.stringify({ type: 'changed', data: msg }))
})
aliyunTranscriber.on('completed', (msg) => {
console.log('阿里云识别完成:', msg)
ws.send(JSON.stringify({ type: 'completed', data: msg }))
})
aliyunTranscriber.on('failed', (msg) => {
console.error('识别失败:', msg)
ws.send(JSON.stringify({ type: 'error', data: '阿里云识别失败' }))
})
aliyunTranscriber.on('closed', () => {
console.log('阿里云连接关闭')
})
await aliyunTranscriber.start(aliyunTranscriber.defaultStartParams(), true, 6000)
// 分片发送音频
const sleep = (ms) => new Promise((r) => setTimeout(r, ms))
for (let i = 0; i < pcmBuffer.length; i += 1024) {
const chunk = pcmBuffer.slice(i, i + 1024)
if (!aliyunTranscriber.sendAudio(chunk)) {
return reject(new Error('send audio failed'))
}
await sleep(20)
}
await aliyunTranscriber.close()
} catch (error) {
console.error('启动阿里云识别时出错:', error)
ws.send(JSON.stringify({ type: 'error', data: '启动阿里云识别时出错' }))
stopAliyunTranscription()
}
}
// 停止阿里云语音识别
function stopAliyunTranscription() {
if (aliyunTranscriber) {
try {
aliyunTranscriber.shutdown()
aliyunTranscriber = null
console.log('阿里云识别已停止')
} catch (error) {
console.error('停止阿里云识别时出错:', error)
}
}
}
})
// 直接转换为PCM格式的工具函数
async function convertToPcm(inputFilePath) {
return new Promise((resolve, reject) => {
const pcmFilePath = path.join(__dirname, 'temp.pcm')
ffmpeg(inputFilePath)
.audioFrequency(16000) // 采样率16000Hz
.audioCodec('pcm_s16le') // 16位小端序PCM编码
.format('s16le') // 输出PCM格式
.on('end', () => {
const pcmData = fs.readFileSync(pcmFilePath)
fs.unlinkSync(pcmFilePath)
resolve(pcmData)
})
.on('error', (error) => {
reject(error)
})
.save(pcmFilePath)
})
}