webassembly003 whisper.cpp的python绑定实现+Cython+Setuptools的GUI程序

CODE

  • python端的绑定和本文一样,还需要将cdef char* LANGUAGE = b'en'改为中文zh(也可以在函数中配置一个参数修改这个值)。

  • ps:本来想尝试cdef whisper_context* whisper_init_from_file_with_params_no_state(char*, whisper_full_params)然后进行调用,但是发现最新版的whisper.h没有这个API了,所以先不加了。

    import pyaudio
    import wave
    import struct
    import sys
    import numpy as np

    import pyqtgraph as pg
    from PyQt5 import QtWidgets
    from PyQt5.QtCore import Qt

    from whispercpp import Whisper

    Audio Format (check Audio MIDI Setup if on Mac)

    FORMAT = pyaudio.paInt16
    RATE = 16000
    CHANNELS = 2

    Set Plot Range [-RANGE,RANGE], default is nyquist/2

    RANGE = None
    if not RANGE:
    RANGE = RATE/2

    Set these parameters (How much data to plot per FFT)

    INPUT_BLOCK_TIME = 0.05
    INPUT_FRAMES_PER_BLOCK = int(RATE*INPUT_BLOCK_TIME)

    Which Channel? (L or R)

    LR = "l"

    class SpectrumAnalyzer():
    def init(self):
    self.pa = pyaudio.PyAudio()
    self.initMicrophone()
    self.initUI()

    复制代码
      def find_input_device(self):
      	device_index = None            
      	for i in range(self.pa.get_device_count()):     
      		devinfo = self.pa.get_device_info_by_index(i)
      		if devinfo["name"].lower() in ["mic","input"]:
      			device_index = i
      	return device_index
    
      def initMicrophone(self):
      	device_index = self.find_input_device()
    
      	self.stream = self.pa.open(	format = FORMAT,
      								channels = CHANNELS,
      								rate = RATE,
      								input = True,
      								input_device_index = device_index,
      								frames_per_buffer = INPUT_FRAMES_PER_BLOCK)
    
      def readData(self):
      	block = self.stream.read(INPUT_FRAMES_PER_BLOCK)
      	count = len(block)/2
      	format = "%dh"%(count)
      	shorts = struct.unpack( format, block )
      	if CHANNELS == 1:
      		return np.array(shorts)
      	else:
      		l = shorts[::2]
      		r = shorts[1::2]
      		if LR == 'l':
      			return np.array(l)
      		else:
      			return np.array(r)
    
      def initUI(self):
      	self.app = QtWidgets.QApplication([]) # self.app = QtGui.QApplication([])
      	self.app.quitOnLastWindowClosed()
    
      	self.mainWindow = QtWidgets.QMainWindow()
      	self.mainWindow.setWindowFlags(Qt.FramelessWindowHint | Qt.WindowStaysOnTopHint)
      	self.mainWindow.setWindowTitle("Spectrum Analyzer")
      	self.mainWindow.setGeometry(100, 100, 300, 200)#self.mainWindow.resize(800,300)
      	self.centralWid = QtWidgets.QWidget()
      	self.mainWindow.setCentralWidget(self.centralWid)
      	self.lay = QtWidgets.QVBoxLayout()
      	self.centralWid.setLayout(self.lay)
    
      	# Add a button
      	self.button_start = QtWidgets.QPushButton("Start Record Audio")
      	self.button_start.clicked.connect(self.Button_Start)
      	self.lay.addWidget(self.button_start)
      	self.button_end = QtWidgets.QPushButton("whisper Init")
      	self.whisper = None
      	self.is_whisper_inited = False
      	self.button_end.clicked.connect(self.Button_Whisper)
      	self.lay.addWidget(self.button_end)
      	self.button = QtWidgets.QPushButton("TRANS AUDIO")
      	self.button.clicked.connect(self.Button_TransAudio)
      	self.lay.addWidget(self.button)
      	# Add a text label
      	self.label = QtWidgets.QLabel("Text will appear here:")
      	self.lay.addWidget(self.label)
          # Add a QLineEdit
      	self.text_field = QtWidgets.QLineEdit()
      	self.text_field.setFixedSize(280, 200)
      	self.lay.addWidget(self.text_field)
    
      	self.specWid = pg.PlotWidget(name="spectrum")
      	self.specItem = self.specWid.getPlotItem()
      	self.specItem.setMouseEnabled(y=False)
      	self.specItem.setYRange(0,1000)
      	self.specItem.setXRange(-RANGE,RANGE, padding=0)
    
      	self.specAxis = self.specItem.getAxis("bottom")
      	self.specAxis.setLabel("Frequency [Hz]")
      	self.lay.addWidget(self.specWid)
    
      	self.mainWindow.show()
      	self.app.aboutToQuit.connect(self.close)
    
      def onButtonClick(self):
      	self.label.setText("Whisper res is:")
      	self.text_field.setText("Hello")
    
      def Button_Whisper(self):
      	self.whisper = Whisper('large',model_path= "/home/pdd/myassets/ggml-medium.bin")
      	self.is_whisper_inited = True
      	self.text_field.setText("Whisper INITED")
    
      def Button_TransAudio(self):
      	result = self.whisper.transcribe("/home/pdd/le/pywhisper/output.wav") # result = w.transcribe("myfile.mp3")
      	print(123)
      	text = self.whisper.extract_text(result)
      	self.text_field.setText(str(text))
    
      def Button_Start(self):
      	self.label.setText("Whisper res is:")
      	self.text_field.setText("Start ---")
      	# 录制音频
      	frames = []
      	sample_rate = 16000
      	duration = 5
      	for i in range(0, int(sample_rate / 1024 * duration)):
      		data = self.stream.read(1024)
      		frames.append(data)
     
      	# 将录制的音频保存为wav文件
      	with wave.open("output.wav", 'wb') as wf:
      		wf.setnchannels(CHANNELS) # 2
      		wf.setsampwidth(self.pa.get_sample_size(FORMAT)) # 2
      		wf.setframerate(sample_rate)
      		wf.writeframes(b''.join(frames))
      	self.text_field.setText("保存为wav文件")
      	
    
      def close(self):
      	self.stream.close()
      	sys.exit()
    
      def get_spectrum(self, data):
      	T = 1.0/RATE
      	N = data.shape[0]
      	Pxx = (1./N)*np.fft.fft(data)
      	f = np.fft.fftfreq(N,T)
      	Pxx = np.fft.fftshift(Pxx)
      	f = np.fft.fftshift(f)
    
      	return f.tolist(), (np.absolute(Pxx)).tolist()
    
      def mainLoop(self):
      	while 1:
      		# Sometimes Input overflowed because of mouse events, ignore this
      		try:
      			data = self.readData()
      		except IOError:
      			continue
      		f, Pxx = self.get_spectrum(data)
      		self.specItem.plot(x=f,y=Pxx, clear=True)
      		QtWidgets.QApplication.processEvents()

    if name == 'main':
    sa = SpectrumAnalyzer()
    sa.mainLoop()

相关推荐
AI探索者13 小时前
LangGraph StateGraph 实战:状态机聊天机器人构建指南
python
AI探索者13 小时前
LangGraph 入门:构建带记忆功能的天气查询 Agent
python
FishCoderh15 小时前
Python自动化办公实战:批量重命名文件,告别手动操作
python
躺平大鹅15 小时前
Python函数入门详解(定义+调用+参数)
python
曲幽16 小时前
我用FastAPI接ollama大模型,差点被asyncio整崩溃(附对话窗口实战)
python·fastapi·web·async·httpx·asyncio·ollama
两万五千个小时20 小时前
落地实现 Anthropic Multi-Agent Research System
人工智能·python·架构
哈里谢顿1 天前
Python 高并发服务限流终极方案:从原理到生产落地(2026 实战指南)
python
用户8356290780512 天前
无需 Office:Python 批量转换 PPT 为图片
后端·python
markfeng82 天前
Python+Django+H5+MySQL项目搭建
python·django
GinoWi2 天前
Chapter 2 - Python中的变量和简单的数据类型
python