C#使用whisper.net实现语音识别(语音转文本)

目录

介绍

效果

输出信息

项目

代码

下载


介绍

github地址:https://github.com/sandrohanea/whisper.net

Whisper.net. Speech to text made simple using Whisper Models

模型下载地址:https://huggingface.co/sandrohanea/whisper.net/tree/main/classic

效果

输出信息

whisper_init_from_file_no_state: loading model from 'ggml-small.bin'

whisper_model_load: loading model

whisper_model_load: n_vocab = 51865

whisper_model_load: n_audio_ctx = 1500

whisper_model_load: n_audio_state = 768

whisper_model_load: n_audio_head = 12

whisper_model_load: n_audio_layer = 12

whisper_model_load: n_text_ctx = 448

whisper_model_load: n_text_state = 768

whisper_model_load: n_text_head = 12

whisper_model_load: n_text_layer = 12

whisper_model_load: n_mels = 80

whisper_model_load: ftype = 1

whisper_model_load: qntvr = 0

whisper_model_load: type = 3

whisper_model_load: mem required = 743.00 MB (+ 16.00 MB per decoder)

whisper_model_load: adding 1608 extra tokens

whisper_model_load: model ctx = 464.68 MB

whisper_model_load: model size = 464.44 MB

whisper_init_state: kv self size = 15.75 MB

whisper_init_state: kv cross size = 52.73 MB

00:00:00->00:00:20: 皇鶴楼,崔昊,西人已成皇鶴去,此地空于皇鶴楼,皇鶴一去不复返,白云千载空悠悠。

00:00:20->00:00:39: 青川莉莉汉阳树,方草七七英五周,日暮相关何处事,燕泊江上世人愁。

项目

代码

using System;

using System.Collections.Generic;

using System.ComponentModel;

using System.Data;

using System.Drawing;

using System.IO;

using System.Linq;

using System.Text;

using System.Threading.Tasks;

using System.Windows.Forms;

using Whisper.net;

using static System.Net.Mime.MediaTypeNames;

namespace C_使用whisper.net实现语音转文本

{

public partial class Form1 : Form

{

public Form1()

{

InitializeComponent();

}

string fileFilter = "*.wav|*.wav";

string wavFileName = "";

WhisperFactory whisperFactory;

WhisperProcessor processor;

private async void button2_Click(object sender, EventArgs e)

{

if (wavFileName == "")

{

return;

}

try

{

button2.Enabled = false;

using var fileStream = File.OpenRead(wavFileName);

await foreach (var result in processor.ProcessAsync(fileStream))

{

Console.WriteLine($"{result.Start}->{result.End}: {result.Text}\r\n");

txtResult.Text += $"{result.Start}->{result.End}: {result.Text}\r\n";

}

}

catch (Exception ex)

{

MessageBox.Show(ex.Message);

}

finally

{

button2.Enabled = true;

}

}

private void Form1_Load(object sender, EventArgs e)

{

whisperFactory = WhisperFactory.FromPath("ggml-small.bin");

processor = whisperFactory.CreateBuilder()

.WithLanguage("zh")//.WithLanguage("auto")

.Build();

wavFileName = "085黄鹤楼.wav";

txtFileName.Text = wavFileName;

}

private void button1_Click(object sender, EventArgs e)

{

OpenFileDialog ofd = new OpenFileDialog();

ofd.Filter = fileFilter;

if (ofd.ShowDialog() != DialogResult.OK) return;

txtResult.Text = "";

wavFileName = ofd.FileName;

txtFileName.Text = wavFileName;

}

}

}

复制代码
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Windows.Forms;
using Whisper.net;
using static System.Net.Mime.MediaTypeNames;

namespace C_使用whisper.net实现语音转文本
{
    public partial class Form1 : Form
    {
        public Form1()
        {
            InitializeComponent();
        }

        string fileFilter = "*.wav|*.wav";
        string wavFileName = "";
        WhisperFactory whisperFactory;
        WhisperProcessor processor;
        private async void button2_Click(object sender, EventArgs e)
        {
            if (wavFileName == "")
            {
                return;
            }

            try
            {
                button2.Enabled = false;
                using var fileStream = File.OpenRead(wavFileName);
                await foreach (var result in processor.ProcessAsync(fileStream))
                {
                    Console.WriteLine($"{result.Start}->{result.End}: {result.Text}\r\n");
                    txtResult.Text += $"{result.Start}->{result.End}: {result.Text}\r\n";
                }
            }
            catch (Exception ex)
            {
                MessageBox.Show(ex.Message);
            }
            finally
            {
                button2.Enabled = true;
            }

        }

        private void Form1_Load(object sender, EventArgs e)
        {
            whisperFactory = WhisperFactory.FromPath("ggml-small.bin");

            processor = whisperFactory.CreateBuilder()
               .WithLanguage("zh")//.WithLanguage("auto")
               .Build();

            wavFileName = "085黄鹤楼.wav";
            txtFileName.Text = wavFileName;
        }

        private void button1_Click(object sender, EventArgs e)
        {
            OpenFileDialog ofd = new OpenFileDialog();
            ofd.Filter = fileFilter;
            if (ofd.ShowDialog() != DialogResult.OK) return;

            txtResult.Text = "";

            wavFileName = ofd.FileName;
            txtFileName.Text = wavFileName;
        }
    }
}

下载

源码下载

相关推荐
Blossom.1183 小时前
使用Python和Scikit-Learn实现机器学习模型调优
开发语言·人工智能·python·深度学习·目标检测·机器学习·scikit-learn
scdifsn4 小时前
动手学深度学习12.7. 参数服务器-笔记&练习(PyTorch)
pytorch·笔记·深度学习·分布式计算·数据并行·参数服务器
军训猫猫头4 小时前
96.如何使用C#实现串口发送? C#例子
开发语言·c#
DFminer4 小时前
【LLM】fast-api 流式生成测试
人工智能·机器人
郄堃Deep Traffic4 小时前
机器学习+城市规划第十四期:利用半参数地理加权回归来实现区域带宽不同的规划任务
人工智能·机器学习·回归·城市规划
海盗儿5 小时前
Attention Is All You Need (Transformer) 以及Transformer pytorch实现
pytorch·深度学习·transformer
GIS小天5 小时前
AI+预测3D新模型百十个定位预测+胆码预测+去和尾2025年6月7日第101弹
人工智能·算法·机器学习·彩票
阿部多瑞 ABU5 小时前
主流大语言模型安全性测试(三):阿拉伯语越狱提示词下的表现与分析
人工智能·安全·ai·语言模型·安全性测试
cnbestec5 小时前
Xela矩阵三轴触觉传感器的工作原理解析与应用场景
人工智能·线性代数·触觉传感器
不爱写代码的玉子5 小时前
HALCON透视矩阵
人工智能·深度学习·线性代数·算法·计算机视觉·矩阵·c#