webassembly003 whisper.cpp的python绑定实现+Cython+Setuptools的GUI程序

CODE

  • python端的绑定和本文一样,还需要将cdef char* LANGUAGE = b'en'改为中文zh(也可以在函数中配置一个参数修改这个值)。

  • ps:本来想尝试cdef whisper_context* whisper_init_from_file_with_params_no_state(char*, whisper_full_params)然后进行调用,但是发现最新版的whisper.h没有这个API了,所以先不加了。

    import pyaudio
    import wave
    import struct
    import sys
    import numpy as np

    import pyqtgraph as pg
    from PyQt5 import QtWidgets
    from PyQt5.QtCore import Qt

    from whispercpp import Whisper

    Audio Format (check Audio MIDI Setup if on Mac)

    FORMAT = pyaudio.paInt16
    RATE = 16000
    CHANNELS = 2

    Set Plot Range [-RANGE,RANGE], default is nyquist/2

    RANGE = None
    if not RANGE:
    RANGE = RATE/2

    Set these parameters (How much data to plot per FFT)

    INPUT_BLOCK_TIME = 0.05
    INPUT_FRAMES_PER_BLOCK = int(RATE*INPUT_BLOCK_TIME)

    Which Channel? (L or R)

    LR = "l"

    class SpectrumAnalyzer():
    def init(self):
    self.pa = pyaudio.PyAudio()
    self.initMicrophone()
    self.initUI()

    复制代码
      def find_input_device(self):
      	device_index = None            
      	for i in range(self.pa.get_device_count()):     
      		devinfo = self.pa.get_device_info_by_index(i)
      		if devinfo["name"].lower() in ["mic","input"]:
      			device_index = i
      	return device_index
    
      def initMicrophone(self):
      	device_index = self.find_input_device()
    
      	self.stream = self.pa.open(	format = FORMAT,
      								channels = CHANNELS,
      								rate = RATE,
      								input = True,
      								input_device_index = device_index,
      								frames_per_buffer = INPUT_FRAMES_PER_BLOCK)
    
      def readData(self):
      	block = self.stream.read(INPUT_FRAMES_PER_BLOCK)
      	count = len(block)/2
      	format = "%dh"%(count)
      	shorts = struct.unpack( format, block )
      	if CHANNELS == 1:
      		return np.array(shorts)
      	else:
      		l = shorts[::2]
      		r = shorts[1::2]
      		if LR == 'l':
      			return np.array(l)
      		else:
      			return np.array(r)
    
      def initUI(self):
      	self.app = QtWidgets.QApplication([]) # self.app = QtGui.QApplication([])
      	self.app.quitOnLastWindowClosed()
    
      	self.mainWindow = QtWidgets.QMainWindow()
      	self.mainWindow.setWindowFlags(Qt.FramelessWindowHint | Qt.WindowStaysOnTopHint)
      	self.mainWindow.setWindowTitle("Spectrum Analyzer")
      	self.mainWindow.setGeometry(100, 100, 300, 200)#self.mainWindow.resize(800,300)
      	self.centralWid = QtWidgets.QWidget()
      	self.mainWindow.setCentralWidget(self.centralWid)
      	self.lay = QtWidgets.QVBoxLayout()
      	self.centralWid.setLayout(self.lay)
    
      	# Add a button
      	self.button_start = QtWidgets.QPushButton("Start Record Audio")
      	self.button_start.clicked.connect(self.Button_Start)
      	self.lay.addWidget(self.button_start)
      	self.button_end = QtWidgets.QPushButton("whisper Init")
      	self.whisper = None
      	self.is_whisper_inited = False
      	self.button_end.clicked.connect(self.Button_Whisper)
      	self.lay.addWidget(self.button_end)
      	self.button = QtWidgets.QPushButton("TRANS AUDIO")
      	self.button.clicked.connect(self.Button_TransAudio)
      	self.lay.addWidget(self.button)
      	# Add a text label
      	self.label = QtWidgets.QLabel("Text will appear here:")
      	self.lay.addWidget(self.label)
          # Add a QLineEdit
      	self.text_field = QtWidgets.QLineEdit()
      	self.text_field.setFixedSize(280, 200)
      	self.lay.addWidget(self.text_field)
    
      	self.specWid = pg.PlotWidget(name="spectrum")
      	self.specItem = self.specWid.getPlotItem()
      	self.specItem.setMouseEnabled(y=False)
      	self.specItem.setYRange(0,1000)
      	self.specItem.setXRange(-RANGE,RANGE, padding=0)
    
      	self.specAxis = self.specItem.getAxis("bottom")
      	self.specAxis.setLabel("Frequency [Hz]")
      	self.lay.addWidget(self.specWid)
    
      	self.mainWindow.show()
      	self.app.aboutToQuit.connect(self.close)
    
      def onButtonClick(self):
      	self.label.setText("Whisper res is:")
      	self.text_field.setText("Hello")
    
      def Button_Whisper(self):
      	self.whisper = Whisper('large',model_path= "/home/pdd/myassets/ggml-medium.bin")
      	self.is_whisper_inited = True
      	self.text_field.setText("Whisper INITED")
    
      def Button_TransAudio(self):
      	result = self.whisper.transcribe("/home/pdd/le/pywhisper/output.wav") # result = w.transcribe("myfile.mp3")
      	print(123)
      	text = self.whisper.extract_text(result)
      	self.text_field.setText(str(text))
    
      def Button_Start(self):
      	self.label.setText("Whisper res is:")
      	self.text_field.setText("Start ---")
      	# 录制音频
      	frames = []
      	sample_rate = 16000
      	duration = 5
      	for i in range(0, int(sample_rate / 1024 * duration)):
      		data = self.stream.read(1024)
      		frames.append(data)
     
      	# 将录制的音频保存为wav文件
      	with wave.open("output.wav", 'wb') as wf:
      		wf.setnchannels(CHANNELS) # 2
      		wf.setsampwidth(self.pa.get_sample_size(FORMAT)) # 2
      		wf.setframerate(sample_rate)
      		wf.writeframes(b''.join(frames))
      	self.text_field.setText("保存为wav文件")
      	
    
      def close(self):
      	self.stream.close()
      	sys.exit()
    
      def get_spectrum(self, data):
      	T = 1.0/RATE
      	N = data.shape[0]
      	Pxx = (1./N)*np.fft.fft(data)
      	f = np.fft.fftfreq(N,T)
      	Pxx = np.fft.fftshift(Pxx)
      	f = np.fft.fftshift(f)
    
      	return f.tolist(), (np.absolute(Pxx)).tolist()
    
      def mainLoop(self):
      	while 1:
      		# Sometimes Input overflowed because of mouse events, ignore this
      		try:
      			data = self.readData()
      		except IOError:
      			continue
      		f, Pxx = self.get_spectrum(data)
      		self.specItem.plot(x=f,y=Pxx, clear=True)
      		QtWidgets.QApplication.processEvents()

    if name == 'main':
    sa = SpectrumAnalyzer()
    sa.mainLoop()

相关推荐
学测绘的小杨7 小时前
CompassFusion:一个从 GNSS 到 GNSS/INS 组合导航的独立工程包
python
zzzzzz31014 小时前
当产品经理说这个很简单:我用Python自动化处理奇葩需求的实战指南
python·pycharm·产品经理
雪隐14 小时前
个人电脑玩AI-06让5060 Ti给你打工——不光能画画,Qwen3-TTS还能学人说话,连我老板都信了!
人工智能·后端·python
兵慌码乱1 天前
面向桌面端的资产管理系统分层架构设计与核心模块实现
python·系统架构·sqlite·pyqt5·数据库设计·桌面应用开发·mvc架构
hboot1 天前
AI工程师第三课 - 机器学习基础
python·scikit-learn·kaggle
顾林海1 天前
Agent入门阶段-编程基础-Python:流程控制
python·agent·ai编程
呱呱复呱呱1 天前
Django CBV 源码解读:一个请求是怎么找到你的 get() 方法的
python·django
曲幽2 天前
刚部署的 LibreTranslate 频频翻车?我掏出了 20 年前的 StarDict 词典,用 FastAPI 搭了个本地词典翻译 API
python·fastapi·web·translate·goldendict·libretranslate·stardict·pystardict
荣码2 天前
用Streamlit给AI应用套个界面,10行代码出Web页面
java·python
兵慌码乱2 天前
基于Python+PyQt5+SQLite的药房管理系统实现:事务一致性与界面解耦全流程解析
python·sqlite·信号与槽·pyqt5·数据库设计·桌面应用开发·事务处理