webassembly003 whisper.cpp的python绑定实现+Cython+Setuptools的GUI程序

CODE

  • python端的绑定和本文一样,还需要将cdef char* LANGUAGE = b'en'改为中文zh(也可以在函数中配置一个参数修改这个值)。

  • ps:本来想尝试cdef whisper_context* whisper_init_from_file_with_params_no_state(char*, whisper_full_params)然后进行调用,但是发现最新版的whisper.h没有这个API了,所以先不加了。

    import pyaudio
    import wave
    import struct
    import sys
    import numpy as np

    import pyqtgraph as pg
    from PyQt5 import QtWidgets
    from PyQt5.QtCore import Qt

    from whispercpp import Whisper

    Audio Format (check Audio MIDI Setup if on Mac)

    FORMAT = pyaudio.paInt16
    RATE = 16000
    CHANNELS = 2

    Set Plot Range [-RANGE,RANGE], default is nyquist/2

    RANGE = None
    if not RANGE:
    RANGE = RATE/2

    Set these parameters (How much data to plot per FFT)

    INPUT_BLOCK_TIME = 0.05
    INPUT_FRAMES_PER_BLOCK = int(RATE*INPUT_BLOCK_TIME)

    Which Channel? (L or R)

    LR = "l"

    class SpectrumAnalyzer():
    def init(self):
    self.pa = pyaudio.PyAudio()
    self.initMicrophone()
    self.initUI()

    复制代码
      def find_input_device(self):
      	device_index = None            
      	for i in range(self.pa.get_device_count()):     
      		devinfo = self.pa.get_device_info_by_index(i)
      		if devinfo["name"].lower() in ["mic","input"]:
      			device_index = i
      	return device_index
    
      def initMicrophone(self):
      	device_index = self.find_input_device()
    
      	self.stream = self.pa.open(	format = FORMAT,
      								channels = CHANNELS,
      								rate = RATE,
      								input = True,
      								input_device_index = device_index,
      								frames_per_buffer = INPUT_FRAMES_PER_BLOCK)
    
      def readData(self):
      	block = self.stream.read(INPUT_FRAMES_PER_BLOCK)
      	count = len(block)/2
      	format = "%dh"%(count)
      	shorts = struct.unpack( format, block )
      	if CHANNELS == 1:
      		return np.array(shorts)
      	else:
      		l = shorts[::2]
      		r = shorts[1::2]
      		if LR == 'l':
      			return np.array(l)
      		else:
      			return np.array(r)
    
      def initUI(self):
      	self.app = QtWidgets.QApplication([]) # self.app = QtGui.QApplication([])
      	self.app.quitOnLastWindowClosed()
    
      	self.mainWindow = QtWidgets.QMainWindow()
      	self.mainWindow.setWindowFlags(Qt.FramelessWindowHint | Qt.WindowStaysOnTopHint)
      	self.mainWindow.setWindowTitle("Spectrum Analyzer")
      	self.mainWindow.setGeometry(100, 100, 300, 200)#self.mainWindow.resize(800,300)
      	self.centralWid = QtWidgets.QWidget()
      	self.mainWindow.setCentralWidget(self.centralWid)
      	self.lay = QtWidgets.QVBoxLayout()
      	self.centralWid.setLayout(self.lay)
    
      	# Add a button
      	self.button_start = QtWidgets.QPushButton("Start Record Audio")
      	self.button_start.clicked.connect(self.Button_Start)
      	self.lay.addWidget(self.button_start)
      	self.button_end = QtWidgets.QPushButton("whisper Init")
      	self.whisper = None
      	self.is_whisper_inited = False
      	self.button_end.clicked.connect(self.Button_Whisper)
      	self.lay.addWidget(self.button_end)
      	self.button = QtWidgets.QPushButton("TRANS AUDIO")
      	self.button.clicked.connect(self.Button_TransAudio)
      	self.lay.addWidget(self.button)
      	# Add a text label
      	self.label = QtWidgets.QLabel("Text will appear here:")
      	self.lay.addWidget(self.label)
          # Add a QLineEdit
      	self.text_field = QtWidgets.QLineEdit()
      	self.text_field.setFixedSize(280, 200)
      	self.lay.addWidget(self.text_field)
    
      	self.specWid = pg.PlotWidget(name="spectrum")
      	self.specItem = self.specWid.getPlotItem()
      	self.specItem.setMouseEnabled(y=False)
      	self.specItem.setYRange(0,1000)
      	self.specItem.setXRange(-RANGE,RANGE, padding=0)
    
      	self.specAxis = self.specItem.getAxis("bottom")
      	self.specAxis.setLabel("Frequency [Hz]")
      	self.lay.addWidget(self.specWid)
    
      	self.mainWindow.show()
      	self.app.aboutToQuit.connect(self.close)
    
      def onButtonClick(self):
      	self.label.setText("Whisper res is:")
      	self.text_field.setText("Hello")
    
      def Button_Whisper(self):
      	self.whisper = Whisper('large',model_path= "/home/pdd/myassets/ggml-medium.bin")
      	self.is_whisper_inited = True
      	self.text_field.setText("Whisper INITED")
    
      def Button_TransAudio(self):
      	result = self.whisper.transcribe("/home/pdd/le/pywhisper/output.wav") # result = w.transcribe("myfile.mp3")
      	print(123)
      	text = self.whisper.extract_text(result)
      	self.text_field.setText(str(text))
    
      def Button_Start(self):
      	self.label.setText("Whisper res is:")
      	self.text_field.setText("Start ---")
      	# 录制音频
      	frames = []
      	sample_rate = 16000
      	duration = 5
      	for i in range(0, int(sample_rate / 1024 * duration)):
      		data = self.stream.read(1024)
      		frames.append(data)
     
      	# 将录制的音频保存为wav文件
      	with wave.open("output.wav", 'wb') as wf:
      		wf.setnchannels(CHANNELS) # 2
      		wf.setsampwidth(self.pa.get_sample_size(FORMAT)) # 2
      		wf.setframerate(sample_rate)
      		wf.writeframes(b''.join(frames))
      	self.text_field.setText("保存为wav文件")
      	
    
      def close(self):
      	self.stream.close()
      	sys.exit()
    
      def get_spectrum(self, data):
      	T = 1.0/RATE
      	N = data.shape[0]
      	Pxx = (1./N)*np.fft.fft(data)
      	f = np.fft.fftfreq(N,T)
      	Pxx = np.fft.fftshift(Pxx)
      	f = np.fft.fftshift(f)
    
      	return f.tolist(), (np.absolute(Pxx)).tolist()
    
      def mainLoop(self):
      	while 1:
      		# Sometimes Input overflowed because of mouse events, ignore this
      		try:
      			data = self.readData()
      		except IOError:
      			continue
      		f, Pxx = self.get_spectrum(data)
      		self.specItem.plot(x=f,y=Pxx, clear=True)
      		QtWidgets.QApplication.processEvents()

    if name == 'main':
    sa = SpectrumAnalyzer()
    sa.mainLoop()

相关推荐
fish_xk1 小时前
c++中的引用和数组
开发语言·c++
酒尘&4 小时前
JS数组不止Array!索引集合类全面解析
开发语言·前端·javascript·学习·js
冬夜戏雪4 小时前
【java学习日记】【2025.12.7】【7/60】
java·开发语言·学习
xwill*4 小时前
分词器(Tokenizer)-sentencepiece(把训练语料中的字符自动组合成一个最优的子词(subword)集合。)
开发语言·pytorch·python
咖啡の猫5 小时前
Python列表的查询操作
开发语言·python
Chiandra_Leong5 小时前
Python-Pandas、Numpy
python·pandas
BoBoZz195 小时前
ParametricObjectsDemo多种参数曲面展示及面上部分点法线展示
python·vtk·图形渲染·图形处理
quikai19815 小时前
python练习第三组
开发语言·python
JIngJaneIL6 小时前
基于Java非遗传承文化管理系统(源码+数据库+文档)
java·开发语言·数据库·vue.js·spring boot
吃西瓜的年年6 小时前
1. 初识C语言
c语言·开发语言