webassembly003 whisper.cpp的python绑定实现+Cython+Setuptools的GUI程序

CODE

  • python端的绑定和本文一样,还需要将cdef char* LANGUAGE = b'en'改为中文zh(也可以在函数中配置一个参数修改这个值)。

  • ps:本来想尝试cdef whisper_context* whisper_init_from_file_with_params_no_state(char*, whisper_full_params)然后进行调用,但是发现最新版的whisper.h没有这个API了,所以先不加了。

    import pyaudio
    import wave
    import struct
    import sys
    import numpy as np

    import pyqtgraph as pg
    from PyQt5 import QtWidgets
    from PyQt5.QtCore import Qt

    from whispercpp import Whisper

    Audio Format (check Audio MIDI Setup if on Mac)

    FORMAT = pyaudio.paInt16
    RATE = 16000
    CHANNELS = 2

    Set Plot Range [-RANGE,RANGE], default is nyquist/2

    RANGE = None
    if not RANGE:
    RANGE = RATE/2

    Set these parameters (How much data to plot per FFT)

    INPUT_BLOCK_TIME = 0.05
    INPUT_FRAMES_PER_BLOCK = int(RATE*INPUT_BLOCK_TIME)

    Which Channel? (L or R)

    LR = "l"

    class SpectrumAnalyzer():
    def init(self):
    self.pa = pyaudio.PyAudio()
    self.initMicrophone()
    self.initUI()

    复制代码
      def find_input_device(self):
      	device_index = None            
      	for i in range(self.pa.get_device_count()):     
      		devinfo = self.pa.get_device_info_by_index(i)
      		if devinfo["name"].lower() in ["mic","input"]:
      			device_index = i
      	return device_index
    
      def initMicrophone(self):
      	device_index = self.find_input_device()
    
      	self.stream = self.pa.open(	format = FORMAT,
      								channels = CHANNELS,
      								rate = RATE,
      								input = True,
      								input_device_index = device_index,
      								frames_per_buffer = INPUT_FRAMES_PER_BLOCK)
    
      def readData(self):
      	block = self.stream.read(INPUT_FRAMES_PER_BLOCK)
      	count = len(block)/2
      	format = "%dh"%(count)
      	shorts = struct.unpack( format, block )
      	if CHANNELS == 1:
      		return np.array(shorts)
      	else:
      		l = shorts[::2]
      		r = shorts[1::2]
      		if LR == 'l':
      			return np.array(l)
      		else:
      			return np.array(r)
    
      def initUI(self):
      	self.app = QtWidgets.QApplication([]) # self.app = QtGui.QApplication([])
      	self.app.quitOnLastWindowClosed()
    
      	self.mainWindow = QtWidgets.QMainWindow()
      	self.mainWindow.setWindowFlags(Qt.FramelessWindowHint | Qt.WindowStaysOnTopHint)
      	self.mainWindow.setWindowTitle("Spectrum Analyzer")
      	self.mainWindow.setGeometry(100, 100, 300, 200)#self.mainWindow.resize(800,300)
      	self.centralWid = QtWidgets.QWidget()
      	self.mainWindow.setCentralWidget(self.centralWid)
      	self.lay = QtWidgets.QVBoxLayout()
      	self.centralWid.setLayout(self.lay)
    
      	# Add a button
      	self.button_start = QtWidgets.QPushButton("Start Record Audio")
      	self.button_start.clicked.connect(self.Button_Start)
      	self.lay.addWidget(self.button_start)
      	self.button_end = QtWidgets.QPushButton("whisper Init")
      	self.whisper = None
      	self.is_whisper_inited = False
      	self.button_end.clicked.connect(self.Button_Whisper)
      	self.lay.addWidget(self.button_end)
      	self.button = QtWidgets.QPushButton("TRANS AUDIO")
      	self.button.clicked.connect(self.Button_TransAudio)
      	self.lay.addWidget(self.button)
      	# Add a text label
      	self.label = QtWidgets.QLabel("Text will appear here:")
      	self.lay.addWidget(self.label)
          # Add a QLineEdit
      	self.text_field = QtWidgets.QLineEdit()
      	self.text_field.setFixedSize(280, 200)
      	self.lay.addWidget(self.text_field)
    
      	self.specWid = pg.PlotWidget(name="spectrum")
      	self.specItem = self.specWid.getPlotItem()
      	self.specItem.setMouseEnabled(y=False)
      	self.specItem.setYRange(0,1000)
      	self.specItem.setXRange(-RANGE,RANGE, padding=0)
    
      	self.specAxis = self.specItem.getAxis("bottom")
      	self.specAxis.setLabel("Frequency [Hz]")
      	self.lay.addWidget(self.specWid)
    
      	self.mainWindow.show()
      	self.app.aboutToQuit.connect(self.close)
    
      def onButtonClick(self):
      	self.label.setText("Whisper res is:")
      	self.text_field.setText("Hello")
    
      def Button_Whisper(self):
      	self.whisper = Whisper('large',model_path= "/home/pdd/myassets/ggml-medium.bin")
      	self.is_whisper_inited = True
      	self.text_field.setText("Whisper INITED")
    
      def Button_TransAudio(self):
      	result = self.whisper.transcribe("/home/pdd/le/pywhisper/output.wav") # result = w.transcribe("myfile.mp3")
      	print(123)
      	text = self.whisper.extract_text(result)
      	self.text_field.setText(str(text))
    
      def Button_Start(self):
      	self.label.setText("Whisper res is:")
      	self.text_field.setText("Start ---")
      	# 录制音频
      	frames = []
      	sample_rate = 16000
      	duration = 5
      	for i in range(0, int(sample_rate / 1024 * duration)):
      		data = self.stream.read(1024)
      		frames.append(data)
     
      	# 将录制的音频保存为wav文件
      	with wave.open("output.wav", 'wb') as wf:
      		wf.setnchannels(CHANNELS) # 2
      		wf.setsampwidth(self.pa.get_sample_size(FORMAT)) # 2
      		wf.setframerate(sample_rate)
      		wf.writeframes(b''.join(frames))
      	self.text_field.setText("保存为wav文件")
      	
    
      def close(self):
      	self.stream.close()
      	sys.exit()
    
      def get_spectrum(self, data):
      	T = 1.0/RATE
      	N = data.shape[0]
      	Pxx = (1./N)*np.fft.fft(data)
      	f = np.fft.fftfreq(N,T)
      	Pxx = np.fft.fftshift(Pxx)
      	f = np.fft.fftshift(f)
    
      	return f.tolist(), (np.absolute(Pxx)).tolist()
    
      def mainLoop(self):
      	while 1:
      		# Sometimes Input overflowed because of mouse events, ignore this
      		try:
      			data = self.readData()
      		except IOError:
      			continue
      		f, Pxx = self.get_spectrum(data)
      		self.specItem.plot(x=f,y=Pxx, clear=True)
      		QtWidgets.QApplication.processEvents()

    if name == 'main':
    sa = SpectrumAnalyzer()
    sa.mainLoop()

相关推荐
周亚鑫2 分钟前
vue3 js代码混淆
开发语言·javascript·ecmascript
Feibo20114 分钟前
管理agent
python
陳10308 分钟前
C++:vector(1)
开发语言·c++
棉晗榜9 分钟前
WPF将程序集里面嵌入的资源文件下载到本机磁盘中,将项目中的文件下载到桌面
开发语言·wpf
牛奔11 分钟前
Linux 的日志分析命令
linux·运维·服务器·python·excel
电化学仪器白超11 分钟前
20251209Ver8(精密电流源温漂特性测试报告)
python·单片机·嵌入式硬件·自动化
昵称已被吞噬~‘(*@﹏@*)’~13 分钟前
【强化学习】MacOS (M1芯片)上最新版本 MuJoCo 通用安装教程(最简洁),PS:不是 mujoco_py 的老版本
python·macos·机器学习·强化学习·mujoco
人道领域16 分钟前
【零基础学java】(Map集合)
java·开发语言
杀死那个蝈坦17 分钟前
JUC并发编程day1
java·开发语言
lly20240617 分钟前
SQLite Alter 命令详解
开发语言