使用模型 sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01
效果图

具体代码
using uMicrophoneWebGL;
using UnityEngine;
[RequireComponent(typeof(MicrophoneWebGL))]
public class KeywordSpottingSample : MonoBehaviour
{
MicrophoneWebGL microphone;
public KeywordSpotting keywordSpotting;
// Start is called before the first frame update
void Start()
{
keywordSpotting.Init();
microphone = GetComponent<MicrophoneWebGL>();
microphone.dataEvent.AddListener(OnAudioData);
}
public void OnAudioData(float[] data)
{
if (keywordSpotting != null)
{
keywordSpotting.AcceptData(data);
}
}
float timer = 0f;
float interval = 0.2f;
string keyword;
private void Update()
{
if (keywordSpotting != null && keywordSpotting.initDone)
{
timer += Time.deltaTime;
if (timer >= interval)
{
keyword = keywordSpotting.Recognize();
if (!string.IsNullOrEmpty(keyword))
{
Debug.Log("keyword:" + keyword);
}
timer = 0f;
}
}
}
}
using System.IO;
using SherpaOnnx;
using UnityEngine;
/// <summary>
/// 关键字识别
/// </summary>
public class KeywordSpotting : MonoBehaviour
{
KeywordSpotter keywordSpotter;
string pathRoot;
string modelPath;
OnlineStream onlineStream;
int sampleRate = 16000;
public bool initDone = false;
public void Init()
{
pathRoot = Util.GetPath() + "/models";
//需要将此文件夹拷贝到models
modelPath = pathRoot + "/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01";
KeywordSpotterConfig config = new KeywordSpotterConfig();
config.FeatConfig.SampleRate = 16000;
config.FeatConfig.FeatureDim = 80;
config.ModelConfig.Transducer.Encoder = Path.Combine(modelPath, "encoder-epoch-12-avg-2-chunk-16-left-64.onnx");
config.ModelConfig.Transducer.Decoder = Path.Combine(modelPath, "decoder-epoch-12-avg-2-chunk-16-left-64.onnx");
config.ModelConfig.Transducer.Joiner = Path.Combine(modelPath, "joiner-epoch-12-avg-2-chunk-16-left-64.onnx");
config.ModelConfig.Tokens = Path.Combine(modelPath, "tokens.txt");
config.ModelConfig.Provider = "cpu";
config.ModelConfig.NumThreads = 1;
config.ModelConfig.Debug = 0;
config.KeywordsFile = Path.Combine(modelPath, "keywords.txt");
keywordSpotter = new KeywordSpotter(config);
onlineStream = keywordSpotter.CreateStream();
initDone = true;
}
public void AcceptData(float[] data)
{
onlineStream.AcceptWaveform(sampleRate, data);
}
KeywordResult result;
public string Recognize()
{
while (keywordSpotter.IsReady(onlineStream))
{
keywordSpotter.Decode(onlineStream);
result = keywordSpotter.GetResult(onlineStream);
if (result.Keyword != string.Empty)
{
Debug.Log("关键字: " + result.Keyword);
// Remember to call Reset() right after detecting a keyword
keywordSpotter.Reset(onlineStream);
return result.Keyword;
}
}
return string.Empty;
}
}