webassembly003 whisper.cpp的python绑定实现+Cython+Setuptools的GUI程序

CODE

  • python端的绑定和本文一样,还需要将cdef char* LANGUAGE = b'en'改为中文zh(也可以在函数中配置一个参数修改这个值)。

  • ps:本来想尝试cdef whisper_context* whisper_init_from_file_with_params_no_state(char*, whisper_full_params)然后进行调用,但是发现最新版的whisper.h没有这个API了,所以先不加了。

    import pyaudio
    import wave
    import struct
    import sys
    import numpy as np

    import pyqtgraph as pg
    from PyQt5 import QtWidgets
    from PyQt5.QtCore import Qt

    from whispercpp import Whisper

    Audio Format (check Audio MIDI Setup if on Mac)

    FORMAT = pyaudio.paInt16
    RATE = 16000
    CHANNELS = 2

    Set Plot Range [-RANGE,RANGE], default is nyquist/2

    RANGE = None
    if not RANGE:
    RANGE = RATE/2

    Set these parameters (How much data to plot per FFT)

    INPUT_BLOCK_TIME = 0.05
    INPUT_FRAMES_PER_BLOCK = int(RATE*INPUT_BLOCK_TIME)

    Which Channel? (L or R)

    LR = "l"

    class SpectrumAnalyzer():
    def init(self):
    self.pa = pyaudio.PyAudio()
    self.initMicrophone()
    self.initUI()

    复制代码
      def find_input_device(self):
      	device_index = None            
      	for i in range(self.pa.get_device_count()):     
      		devinfo = self.pa.get_device_info_by_index(i)
      		if devinfo["name"].lower() in ["mic","input"]:
      			device_index = i
      	return device_index
    
      def initMicrophone(self):
      	device_index = self.find_input_device()
    
      	self.stream = self.pa.open(	format = FORMAT,
      								channels = CHANNELS,
      								rate = RATE,
      								input = True,
      								input_device_index = device_index,
      								frames_per_buffer = INPUT_FRAMES_PER_BLOCK)
    
      def readData(self):
      	block = self.stream.read(INPUT_FRAMES_PER_BLOCK)
      	count = len(block)/2
      	format = "%dh"%(count)
      	shorts = struct.unpack( format, block )
      	if CHANNELS == 1:
      		return np.array(shorts)
      	else:
      		l = shorts[::2]
      		r = shorts[1::2]
      		if LR == 'l':
      			return np.array(l)
      		else:
      			return np.array(r)
    
      def initUI(self):
      	self.app = QtWidgets.QApplication([]) # self.app = QtGui.QApplication([])
      	self.app.quitOnLastWindowClosed()
    
      	self.mainWindow = QtWidgets.QMainWindow()
      	self.mainWindow.setWindowFlags(Qt.FramelessWindowHint | Qt.WindowStaysOnTopHint)
      	self.mainWindow.setWindowTitle("Spectrum Analyzer")
      	self.mainWindow.setGeometry(100, 100, 300, 200)#self.mainWindow.resize(800,300)
      	self.centralWid = QtWidgets.QWidget()
      	self.mainWindow.setCentralWidget(self.centralWid)
      	self.lay = QtWidgets.QVBoxLayout()
      	self.centralWid.setLayout(self.lay)
    
      	# Add a button
      	self.button_start = QtWidgets.QPushButton("Start Record Audio")
      	self.button_start.clicked.connect(self.Button_Start)
      	self.lay.addWidget(self.button_start)
      	self.button_end = QtWidgets.QPushButton("whisper Init")
      	self.whisper = None
      	self.is_whisper_inited = False
      	self.button_end.clicked.connect(self.Button_Whisper)
      	self.lay.addWidget(self.button_end)
      	self.button = QtWidgets.QPushButton("TRANS AUDIO")
      	self.button.clicked.connect(self.Button_TransAudio)
      	self.lay.addWidget(self.button)
      	# Add a text label
      	self.label = QtWidgets.QLabel("Text will appear here:")
      	self.lay.addWidget(self.label)
          # Add a QLineEdit
      	self.text_field = QtWidgets.QLineEdit()
      	self.text_field.setFixedSize(280, 200)
      	self.lay.addWidget(self.text_field)
    
      	self.specWid = pg.PlotWidget(name="spectrum")
      	self.specItem = self.specWid.getPlotItem()
      	self.specItem.setMouseEnabled(y=False)
      	self.specItem.setYRange(0,1000)
      	self.specItem.setXRange(-RANGE,RANGE, padding=0)
    
      	self.specAxis = self.specItem.getAxis("bottom")
      	self.specAxis.setLabel("Frequency [Hz]")
      	self.lay.addWidget(self.specWid)
    
      	self.mainWindow.show()
      	self.app.aboutToQuit.connect(self.close)
    
      def onButtonClick(self):
      	self.label.setText("Whisper res is:")
      	self.text_field.setText("Hello")
    
      def Button_Whisper(self):
      	self.whisper = Whisper('large',model_path= "/home/pdd/myassets/ggml-medium.bin")
      	self.is_whisper_inited = True
      	self.text_field.setText("Whisper INITED")
    
      def Button_TransAudio(self):
      	result = self.whisper.transcribe("/home/pdd/le/pywhisper/output.wav") # result = w.transcribe("myfile.mp3")
      	print(123)
      	text = self.whisper.extract_text(result)
      	self.text_field.setText(str(text))
    
      def Button_Start(self):
      	self.label.setText("Whisper res is:")
      	self.text_field.setText("Start ---")
      	# 录制音频
      	frames = []
      	sample_rate = 16000
      	duration = 5
      	for i in range(0, int(sample_rate / 1024 * duration)):
      		data = self.stream.read(1024)
      		frames.append(data)
     
      	# 将录制的音频保存为wav文件
      	with wave.open("output.wav", 'wb') as wf:
      		wf.setnchannels(CHANNELS) # 2
      		wf.setsampwidth(self.pa.get_sample_size(FORMAT)) # 2
      		wf.setframerate(sample_rate)
      		wf.writeframes(b''.join(frames))
      	self.text_field.setText("保存为wav文件")
      	
    
      def close(self):
      	self.stream.close()
      	sys.exit()
    
      def get_spectrum(self, data):
      	T = 1.0/RATE
      	N = data.shape[0]
      	Pxx = (1./N)*np.fft.fft(data)
      	f = np.fft.fftfreq(N,T)
      	Pxx = np.fft.fftshift(Pxx)
      	f = np.fft.fftshift(f)
    
      	return f.tolist(), (np.absolute(Pxx)).tolist()
    
      def mainLoop(self):
      	while 1:
      		# Sometimes Input overflowed because of mouse events, ignore this
      		try:
      			data = self.readData()
      		except IOError:
      			continue
      		f, Pxx = self.get_spectrum(data)
      		self.specItem.plot(x=f,y=Pxx, clear=True)
      		QtWidgets.QApplication.processEvents()

    if name == 'main':
    sa = SpectrumAnalyzer()
    sa.mainLoop()

相关推荐
AI算法沐枫6 小时前
机器学习到底是什么?
人工智能·python·深度学习·机器学习·数据挖掘·大模型·#ai
小技与小术6 小时前
玩转Flask
开发语言·python·flask
SilentSamsara6 小时前
Python 性能优化:tracemalloc、profiling 与 C 扩展加速
开发语言·python·青少年编程·性能优化
冰小忆6 小时前
大驼峰命名规范和小驼峰命名规范的区别是什么?
开发语言·python
高洁017 小时前
知识图谱:AI的超级大脑
人工智能·python·数据挖掘·知识图谱
知识分享小能手7 小时前
Flask入门学习教程,从入门到精通,Flask智能租房——前期准备 知识点详解(5)
python·学习·flask
Curvatureflight7 小时前
【架构实战】生产级大模型 API 接入指南:流式响应(Streaming)异常处理与监控闭环
python·架构
smj2302_796826528 小时前
解决leetcode第3943题递增后的数对数量
数据结构·python·算法·leetcode
এ慕ོ冬℘゜8 小时前
JS 前端基础面试题
开发语言·前端·javascript
浩少7028 小时前
【无标题】
java·开发语言