
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Runtime.Remoting.Contexts;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using System.Numerics;
using System.Reflection;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using MathNet.Numerics;
using Microsoft.ML.OnnxRuntime;
using Microsoft.ML.OnnxRuntime.Tensors;
using NAudio.Wave;
using Newtonsoft.Json;
using WebSocketSharp;
using WebSocketSharp.Server;
namespace CyberWin_TradeTest_Sensvoice2026.CyberWin.VoiceServer.sensevoice
{
// internal class asr_sensevoice_server
#region 数据模型
/// <summary>
/// 识别响应模型
/// </summary>
public class RecognitionResponse_01
{
[JsonProperty("text")]
public string Text { get; set; } = string.Empty;
[JsonProperty("error")]
public string Error { get; set; } = string.Empty;
}
/// <summary>
/// EOF信号模型
/// </summary>
public class EofSignal
{
[JsonProperty("eof")]
public int Eof { get; set; } = 1;
}
/// <summary>
/// 客户端缓存信息
/// </summary>
public class ClientCache
{
public float[] AudioBuffer { get; set; } = Array.Empty<float>();
public DateTime LastProcessTime { get; set; } = DateTime.Now;
public bool IsFirst { get; set; } = true;
}
#endregion
#region ONNX模型封装
/// <summary>
/// SenseVoice ONNX模型封装(适配.NET Framework 4.7)
/// </summary>
public class SenseVoiceOnnxModel_01 : IDisposable
{
// 配置常量
public const int SampleRate = 16000;
public const int MinAudioLength = 16000; // 1秒
public const float EnergyThreshold = 0.01f;
public const int BufferSize = 8192;
// ONNX Runtime
private readonly InferenceSession _voiceSession;
private readonly InferenceSession _vadSession;
private readonly object _lockObj = new object();
/// <summary>
/// 初始化模型
/// </summary>
/// <param name="voiceModelPath">SenseVoice ONNX模型路径</param>
/// <param name="vadModelPath">VAD ONNX模型路径</param>
/// <param name="useGpu">是否使用GPU</param>
public SenseVoiceOnnxModel_01(string voiceModelPath, string vadModelPath, bool useGpu = false)
{
if (!File.Exists(voiceModelPath))
throw new FileNotFoundException("SenseVoice模型文件不存在", voiceModelPath);
if (!File.Exists(vadModelPath))
throw new FileNotFoundException("VAD模型文件不存在", vadModelPath);
// 配置ONNX Runtime(适配.NET Framework)
var sessionOptions = new SessionOptions();
sessionOptions.GraphOptimizationLevel = GraphOptimizationLevel.ORT_ENABLE_ALL;
/*
if (useGpu && CheckCudaAvailable())
{
sessionOptions.AppendExecutionProvider_CUDA(0);
Console.WriteLine("使用GPU加速");
}
else
{
sessionOptions.AppendExecutionProvider_CPU();
Console.WriteLine("使用CPU运行");
}
*/
// 简化版:强制使用CPU,无需检测CUDA
// var sessionOptions = new SessionOptions();
// sessionOptions.GraphOptimizationLevel = GraphOptimizationLevel.ORT_ENABLE_ALL;
sessionOptions.AppendExecutionProvider_CPU();
Console.WriteLine("使用CPU运行");
// 加载模型
Console.WriteLine("正在加载SenseVoice模型...");
_voiceSession = new InferenceSession(voiceModelPath, sessionOptions);
Console.WriteLine("正在加载VAD模型...");
_vadSession = new InferenceSession(vadModelPath, sessionOptions);
// 预加载模型
PreloadModel();
Console.WriteLine("模型加载完成!");
}
/// <summary>
/// 检查CUDA是否可用(适配.NET Framework)
/// </summary>
///
/// /*
/*
* private bool CheckCudaAvailable()
{
try
{
return SessionOptions.HasCudaProvider;
}
catch
{
return false;
}
}
*/
/// <summary>
/// 预加载模型(避免首次推理延迟)
/// </summary>
private void PreloadModel()
{
var dummyInput = new DenseTensor<float>(new[] { 1, MinAudioLength });
var inputs = new List<NamedOnnxValue>
{
NamedOnnxValue.CreateFromTensor("input", dummyInput)
};
lock (_lockObj)
{
using (var results = _voiceSession.Run(inputs))
{
// 空操作,仅预加载
}
}
}
/// <summary>
/// 音频有效性检测
/// </summary>
/// <param name="audioData">音频数据</param>
/// <returns>是否有效</returns>
public bool IsAudioValid(float[] audioData)
{
if (audioData == null || audioData.Length < MinAudioLength / 2)
return false;
// 计算音频能量
float energy = (float)Math.Sqrt(audioData.Average(x => x * x));
return energy > EnergyThreshold;
}
/// <summary>
/// 音频预处理(格式转换、重采样、归一化)
/// </summary>
/// <param name="audioBytes">原始音频字节</param>
/// <param name="sourceSampleRate">源采样率</param>
/// <returns>预处理后的浮点音频数据</returns>
public float[] PreprocessAudio(byte[] audioBytes, int sourceSampleRate = SampleRate)
{
if (audioBytes == null || audioBytes.Length == 0)
return Array.Empty<float>();
// 1. 将16bit PCM转换为浮点
float[] floatAudio = new float[audioBytes.Length / 2];
for (int i = 0; i < floatAudio.Length; i++)
{
short sample = BitConverter.ToInt16(audioBytes, i * 2);
floatAudio[i] = sample / 32767.0f;
}
// 2. 重采样到16kHz(适配.NET Framework的NAudio版本)
if (sourceSampleRate != SampleRate)
{
floatAudio = ResampleAudio(floatAudio, sourceSampleRate, SampleRate);
}
return floatAudio;
}
/// <summary>
/// 音频重采样(适配.NET Framework)
/// </summary>
/// <param name="audioData">音频数据</param>
/// <param name="srcRate">源采样率</param>
/// <param name="dstRate">目标采样率</param>
/// <returns>重采样后的音频</returns>
private float[] ResampleAudio(float[] audioData, int srcRate, int dstRate)
{
if (srcRate == dstRate || audioData.Length == 0)
return audioData;
try
{
// 将浮点音频转换为16bit PCM字节
byte[] pcmBytes = new byte[audioData.Length * 2];
for (int i = 0; i < audioData.Length; i++)
{
short sample = (short)(audioData[i] * 32767);
BitConverter.GetBytes(sample).CopyTo(pcmBytes, i * 2);
}
// 使用NAudio重采样(适配.NET Framework版本)
using (var msIn = new MemoryStream(pcmBytes))
using (var rawReader = new RawSourceWaveStream(msIn, new WaveFormat(srcRate, 16, 1)))
using (var resampler = new WaveFormatConversionStream(new WaveFormat(dstRate, 16, 1), rawReader))
using (var msOut = new MemoryStream())
{
byte[] buffer = new byte[4096];
int bytesRead;
while ((bytesRead = resampler.Read(buffer, 0, buffer.Length)) > 0)
{
msOut.Write(buffer, 0, bytesRead);
}
// 转换回浮点
byte[] resampledBytes = msOut.ToArray();
float[] resampledFloat = new float[resampledBytes.Length / 2];
for (int i = 0; i < resampledFloat.Length; i++)
{
short sample = BitConverter.ToInt16(resampledBytes, i * 2);
resampledFloat[i] = sample / 32767.0f;
}
return resampledFloat;
}
}
catch (Exception ex)
{
Console.WriteLine($"重采样失败:{ex.Message}");
return audioData;
}
}
/// <summary>
/// 语音识别推理
/// </summary>
/// <param name="audioData">预处理后的音频数据</param>
/// <param name="isFinal">是否为最后一段</param>
/// <returns>识别文本</returns>
public string Recognize(float[] audioData, bool isFinal = false)
{
if (!IsAudioValid(audioData))
return string.Empty;
lock (_lockObj)
{
try
{
// 构建输入张量 [1, length]
var inputTensor = new DenseTensor<float>(audioData, new[] { 1, audioData.Length });
var inputs = new List<NamedOnnxValue>
{
NamedOnnxValue.CreateFromTensor("input", inputTensor),
NamedOnnxValue.CreateFromTensor("is_final", new DenseTensor<bool>(new[] { isFinal }, new[] { 1 }))
};
// 执行推理
using (var results = _voiceSession.Run(inputs))
{
var outputTensor = results.First().AsTensor<string>();
string text = outputTensor.FirstOrDefault() ?? string.Empty;
// 格式化文本
return FormatText(text);
}
}
catch (Exception ex)
{
Console.WriteLine($"推理错误:{ex.Message}");
return string.Empty;
}
}
}
/// <summary>
/// VAD语音活动检测
/// </summary>
/// <param name="audioData">音频数据</param>
/// <returns>是否包含语音</returns>
public bool DetectVoiceActivity(float[] audioData)
{
if (!IsAudioValid(audioData))
return false;
try
{
var inputTensor = new DenseTensor<float>(audioData, new[] { 1, audioData.Length });
var inputs = new List<NamedOnnxValue>
{
NamedOnnxValue.CreateFromTensor("input", inputTensor)
};
using (var results = _vadSession.Run(inputs))
{
var output = results.First().AsTensor<float>();
return output.Average() > 0.5f;
}
}
catch (Exception ex)
{
Console.WriteLine($"VAD检测错误:{ex.Message}");
return true; // 出错时默认认为有语音
}
}
/// <summary>
/// 格式化识别文本
/// </summary>
/// <param name="text">原始文本</param>
/// <returns>格式化后的文本</returns>
private string FormatText(string text)
{
if (string.IsNullOrEmpty(text))
return string.Empty;
// 移除特殊标记和多余空格
text = System.Text.RegularExpressions.Regex.Replace(text, @"<\|.*?\|>", "");
text = System.Text.RegularExpressions.Regex.Replace(text, @"\s+", " ").Trim();
return text;
}
public void Dispose()
{
_voiceSession?.Dispose();
_vadSession?.Dispose();
}
}
#endregion
#region WebSocket服务(适配.NET Framework 4.7)
/// <summary>
/// 流式识别WebSocket服务
/// </summary>
public class StreamingRecognitionService : WebSocketBehavior
{
private static readonly ConcurrentDictionary<string, ClientCache> _clientCache = new ConcurrentDictionary<string, ClientCache>();
private static SenseVoiceOnnxModel_01 _model;
private string _clientId;
/// <summary>
/// 设置模型实例
/// </summary>
/// <param name="model">模型实例</param>
public static void SetModel(SenseVoiceOnnxModel_01 model)
{
_model = model;
}
protected override void OnOpen()
{
_clientId = ID;
_clientCache.TryAdd(_clientId, new ClientCache());
Console.WriteLine($"客户端连接:{_clientId}");
}
protected override void OnMessage(MessageEventArgs e)
{
try
{
if (_model == null)
{
SendError("模型未初始化");
return;
}
ClientCache clientCache;
if (!_clientCache.TryGetValue(_clientId, out clientCache))
{
clientCache = new ClientCache();
_clientCache.TryAdd(_clientId, clientCache);
}
// 处理二进制音频数据
if (e.IsBinary)
{
ProcessAudioData(e.RawData, clientCache);
}
// 处理文本消息(EOF信号)
else if (e.IsText)
{
ProcessTextMessage(e.Data, clientCache);
}
}
catch (Exception ex)
{
Console.WriteLine($"消息处理错误:{ex.Message}");
SendError(ex.Message);
}
}
/// <summary>
/// 处理音频数据
/// </summary>
/// <param name="audioBytes">音频字节</param>
/// <param name="clientCache">客户端缓存</param>
private void ProcessAudioData(byte[] audioBytes, ClientCache clientCache)
{
if (audioBytes == null || audioBytes.Length == 0)
return;
// 预处理音频
float[] audioData = _model.PreprocessAudio(audioBytes);
// 合并到缓存(适配.NET Framework的数组操作)
List<float> bufferList = new List<float>(clientCache.AudioBuffer);
bufferList.AddRange(audioData);
clientCache.AudioBuffer = bufferList.ToArray();
// 满足条件才处理
DateTime now = DateTime.Now;
if (clientCache.AudioBuffer.Length >= SenseVoiceOnnxModel_01.MinAudioLength &&
_model.IsAudioValid(clientCache.AudioBuffer) &&
(now - clientCache.LastProcessTime).TotalSeconds > 0.5)
{
// 取1秒数据处理
float[] chunk = clientCache.AudioBuffer.Take(SenseVoiceOnnxModel_01.MinAudioLength).ToArray();
clientCache.AudioBuffer = clientCache.AudioBuffer.Skip(SenseVoiceOnnxModel_01.MinAudioLength / 2).ToArray();
clientCache.LastProcessTime = now;
// 识别并发送结果
string text = _model.Recognize(chunk, false);
if (!string.IsNullOrEmpty(text))
{
Send(JsonConvert.SerializeObject(new RecognitionResponse_01 { Text = text }));
}
}
}
/// <summary>
/// 处理文本消息
/// </summary>
/// <param name="text">文本内容</param>
/// <param name="clientCache">客户端缓存</param>
private void ProcessTextMessage(string text, ClientCache clientCache)
{
try
{
EofSignal signal = JsonConvert.DeserializeObject<EofSignal>(text);
if (signal != null && signal.Eof == 1)
{
// 处理最后一段音频
if (clientCache.AudioBuffer.Length > SenseVoiceOnnxModel_01.MinAudioLength / 2 &&
_model.IsAudioValid(clientCache.AudioBuffer))
{
string finalText = _model.Recognize(clientCache.AudioBuffer, true);
Send(JsonConvert.SerializeObject(new RecognitionResponse_01 { Text = finalText }));
}
// 关闭连接
Send(JsonConvert.SerializeObject(new RecognitionResponse_01 { Text = "[识别完成]" }));
Context.WebSocket.Close();
}
}
catch
{
// 忽略解析错误
}
}
protected override void OnClose(CloseEventArgs e)
{
ClientCache cache;
_clientCache.TryRemove(_clientId, out cache);
Console.WriteLine($"客户端断开:{_clientId} - {e.Reason}");
}
protected override void OnError(WebSocketSharp.ErrorEventArgs e)
{
Console.WriteLine($"WebSocket错误:{e.Message}");
ClientCache cache;
_clientCache.TryRemove(_clientId, out cache);
}
/// <summary>
/// 发送错误信息
/// </summary>
/// <param name="error">错误信息</param>
private void SendError(string error)
{
Send(JsonConvert.SerializeObject(new RecognitionResponse_01 { Error = error }));
}
}
#endregion
#region HTTP服务(适配.NET Framework 4.7)
/// <summary>
/// HTTP文件上传识别服务
/// </summary>
public class HttpRecognitionServer
{
private readonly HttpListener _listener;
private readonly SenseVoiceOnnxModel_01 _model;
private readonly int _port;
private bool _isRunning;
/// <summary>
/// 初始化HTTP服务
/// </summary>
/// <param name="port">端口</param>
/// <param name="model">模型实例</param>
public HttpRecognitionServer(int port, SenseVoiceOnnxModel_01 model)
{
_port = port;
_model = model;
_listener = new HttpListener();
_listener.Prefixes.Add($"http://*:{port}/");
_isRunning = false;
}
/// <summary>
/// 启动服务
/// </summary>
public void Start()
{
if (_isRunning)
return;
_listener.Start();
_isRunning = true;
Console.WriteLine($"HTTP文件识别服务已启动:http://0.0.0.0:{_port}");
// 适配.NET Framework的异步处理(避免async/await兼容问题)
Task.Factory.StartNew(() =>
{
while (_isRunning && _listener.IsListening)
{
try
{
HttpListenerContext context = _listener.GetContext();
// 使用ThreadPool处理请求,避免阻塞
ThreadPool.QueueUserWorkItem(ProcessRequest, context);
}
catch (HttpListenerException ex)
{
if (ex.ErrorCode != 995) // 忽略关闭时的异常
Console.WriteLine($"HTTP监听错误:{ex.Message}");
break;
}
catch (Exception ex)
{
Console.WriteLine($"HTTP请求处理错误:{ex.Message}");
}
}
}, TaskCreationOptions.LongRunning);
}
/// <summary>
/// 停止服务
/// </summary>
public void Stop()
{
_isRunning = false;
_listener.Stop();
_listener.Close();
}
/// <summary>
/// 处理HTTP请求(适配.NET Framework 4.7)
/// </summary>
/// <param name="state">请求上下文</param>
private void ProcessRequest(object state)
{
HttpListenerContext context = state as HttpListenerContext;
if (context == null)
return;
HttpListenerResponse response = context.Response;
try
{
// 处理OPTIONS请求(跨域)
if (context.Request.HttpMethod == "OPTIONS")
{
response.Headers.Add("Access-Control-Allow-Origin", "*");
response.Headers.Add("Access-Control-Allow-Methods", "POST, OPTIONS");
response.Headers.Add("Access-Control-Allow-Headers", "Content-Type");
response.StatusCode = 200;
response.Close();
return;
}
// 只处理POST请求
if (context.Request.HttpMethod != "POST")
{
response.StatusCode = 405;
WriteResponse(response, new RecognitionResponse_01 { Error = "仅支持POST请求" });
return;
}
// 读取请求体(适配.NET Framework的同步读取)
byte[] requestData = new byte[context.Request.ContentLength64];
context.Request.InputStream.Read(requestData, 0, requestData.Length);
context.Request.InputStream.Close();
// 解析音频文件
float[] audioData;
try
{
// 直接读取WAV文件
using (var ms = new MemoryStream(requestData))
using (var waveReader = new WaveFileReader(ms))
{
byte[] waveBytes = ReadAllBytes(waveReader);
audioData = _model.PreprocessAudio(waveBytes, waveReader.WaveFormat.SampleRate);
}
}
catch (Exception ex)
{
WriteResponse(response, new RecognitionResponse_01 { Error = $"音频解析失败:{ex.Message}" });
return;
}
// 识别
string text = _model.Recognize(audioData, true);
WriteResponse(response, new RecognitionResponse_01 { Text = text });
}
catch (Exception ex)
{
WriteResponse(response, new RecognitionResponse_01 { Error = ex.Message });
}
finally
{
response.Close();
}
}
/// <summary>
/// 读取Wave文件所有字节
/// </summary>
private byte[] ReadAllBytes(WaveFileReader reader)
{
using (var ms = new MemoryStream())
{
byte[] buffer = new byte[4096];
int bytesRead;
while ((bytesRead = reader.Read(buffer, 0, buffer.Length)) > 0)
{
ms.Write(buffer, 0, bytesRead);
}
return ms.ToArray();
}
}
/// <summary>
/// 写入响应(适配.NET Framework 4.7)
/// </summary>
private void WriteResponse(HttpListenerResponse response, RecognitionResponse_01 data)
{
response.ContentType = "application/json";
response.Headers.Add("Access-Control-Allow-Origin", "*");
string json = JsonConvert.SerializeObject(data);
byte[] buffer = Encoding.UTF8.GetBytes(json);
response.ContentLength64 = buffer.Length;
response.OutputStream.Write(buffer, 0, buffer.Length);
response.OutputStream.Flush();
}
}
#endregion
}
东方仙盟:拥抱知识开源,共筑数字新生态
在全球化与数字化浪潮中,东方仙盟始终秉持开放协作、知识共享的理念,积极拥抱开源技术与开放标准。我们相信,唯有打破技术壁垒、汇聚全球智慧,才能真正推动行业的可持续发展。
开源赋能中小商户:通过将前端异常检测、跨系统数据互联等核心能力开源化,东方仙盟为全球中小商户提供了低成本、高可靠的技术解决方案,让更多商家能够平等享受数字转型的红利。
共建行业标准:我们积极参与国际技术社区,与全球开发者、合作伙伴共同制定开放协议与技术规范,推动跨境零售、文旅、餐饮等多业态的系统互联互通,构建更加公平、高效的数字生态。
知识普惠,共促发展:通过开源社区、技术文档与培训体系,东方仙盟致力于将前沿技术转化为可落地的行业实践,赋能全球合作伙伴,共同培育创新人才,推动数字经济的普惠式增长
阿雪技术观
在科技发展浪潮中,我们不妨积极投身技术共享。不满足于做受益者,更要主动担当贡献者。无论是分享代码、撰写技术博客,还是参与开源项目维护改进,每一个微小举动都可能蕴含推动技术进步的巨大能量。东方仙盟是汇聚力量的天地,我们携手在此探索硅基生命,为科技进步添砖加瓦。
Hey folks, in this wild tech - driven world, why not dive headfirst into the whole tech - sharing scene? Don't just be the one reaping all the benefits; step up and be a contributor too. Whether you're tossing out your code snippets, hammering out some tech blogs, or getting your hands dirty with maintaining and sprucing up open - source projects, every little thing you do might just end up being a massive force that pushes tech forward. And guess what? The Eastern FairyAlliance is this awesome place where we all come together. We're gonna team up and explore the whole silicon - based life thing, and in the process, we'll be fueling the growth of technology.