CODE

python端的绑定和本文一样，还需要将cdef char* LANGUAGE = b'en'改为中文zh（也可以在函数中配置一个参数修改这个值）。

ps:本来想尝试cdef whisper_context* whisper_init_from_file_with_params_no_state(char*, whisper_full_params)然后进行调用，但是发现最新版的whisper.h没有这个API了，所以先不加了。

import pyaudio
import wave
import struct
import sys
import numpy as np

import pyqtgraph as pg
from PyQt5 import QtWidgets
from PyQt5.QtCore import Qt

from whispercpp import Whisper

Audio Format (check Audio MIDI Setup if on Mac)

FORMAT = pyaudio.paInt16
RATE = 16000
CHANNELS = 2

Set Plot Range [-RANGE,RANGE], default is nyquist/2

RANGE = None
if not RANGE:
RANGE = RATE/2

Set these parameters (How much data to plot per FFT)

INPUT_BLOCK_TIME = 0.05
INPUT_FRAMES_PER_BLOCK = int(RATE*INPUT_BLOCK_TIME)

Which Channel? (L or R)

LR = "l"

class SpectrumAnalyzer():
def init(self):
self.pa = pyaudio.PyAudio()
self.initMicrophone()
self.initUI()

  def find_input_device(self):
  	device_index = None            
  	for i in range(self.pa.get_device_count()):     
  		devinfo = self.pa.get_device_info_by_index(i)
  		if devinfo["name"].lower() in ["mic","input"]:
  			device_index = i
  	return device_index

  def initMicrophone(self):
  	device_index = self.find_input_device()

  	self.stream = self.pa.open(	format = FORMAT,
  								channels = CHANNELS,
  								rate = RATE,
  								input = True,
  								input_device_index = device_index,
  								frames_per_buffer = INPUT_FRAMES_PER_BLOCK)

  def readData(self):
  	block = self.stream.read(INPUT_FRAMES_PER_BLOCK)
  	count = len(block)/2
  	format = "%dh"%(count)
  	shorts = struct.unpack( format, block )
  	if CHANNELS == 1:
  		return np.array(shorts)
  	else:
  		l = shorts[::2]
  		r = shorts[1::2]
  		if LR == 'l':
  			return np.array(l)
  		else:
  			return np.array(r)

  def initUI(self):
  	self.app = QtWidgets.QApplication([]) # self.app = QtGui.QApplication([])
  	self.app.quitOnLastWindowClosed()

  	self.mainWindow = QtWidgets.QMainWindow()
  	self.mainWindow.setWindowFlags(Qt.FramelessWindowHint | Qt.WindowStaysOnTopHint)
  	self.mainWindow.setWindowTitle("Spectrum Analyzer")
  	self.mainWindow.setGeometry(100, 100, 300, 200)#self.mainWindow.resize(800,300)
  	self.centralWid = QtWidgets.QWidget()
  	self.mainWindow.setCentralWidget(self.centralWid)
  	self.lay = QtWidgets.QVBoxLayout()
  	self.centralWid.setLayout(self.lay)

  	# Add a button
  	self.button_start = QtWidgets.QPushButton("Start Record Audio")
  	self.button_start.clicked.connect(self.Button_Start)
  	self.lay.addWidget(self.button_start)
  	self.button_end = QtWidgets.QPushButton("whisper Init")
  	self.whisper = None
  	self.is_whisper_inited = False
  	self.button_end.clicked.connect(self.Button_Whisper)
  	self.lay.addWidget(self.button_end)
  	self.button = QtWidgets.QPushButton("TRANS AUDIO")
  	self.button.clicked.connect(self.Button_TransAudio)
  	self.lay.addWidget(self.button)
  	# Add a text label
  	self.label = QtWidgets.QLabel("Text will appear here:")
  	self.lay.addWidget(self.label)
      # Add a QLineEdit
  	self.text_field = QtWidgets.QLineEdit()
  	self.text_field.setFixedSize(280, 200)
  	self.lay.addWidget(self.text_field)

  	self.specWid = pg.PlotWidget(name="spectrum")
  	self.specItem = self.specWid.getPlotItem()
  	self.specItem.setMouseEnabled(y=False)
  	self.specItem.setYRange(0,1000)
  	self.specItem.setXRange(-RANGE,RANGE, padding=0)

  	self.specAxis = self.specItem.getAxis("bottom")
  	self.specAxis.setLabel("Frequency [Hz]")
  	self.lay.addWidget(self.specWid)

  	self.mainWindow.show()
  	self.app.aboutToQuit.connect(self.close)

  def onButtonClick(self):
  	self.label.setText("Whisper res is:")
  	self.text_field.setText("Hello")

  def Button_Whisper(self):
  	self.whisper = Whisper('large',model_path= "/home/pdd/myassets/ggml-medium.bin")
  	self.is_whisper_inited = True
  	self.text_field.setText("Whisper INITED")

  def Button_TransAudio(self):
  	result = self.whisper.transcribe("/home/pdd/le/pywhisper/output.wav") # result = w.transcribe("myfile.mp3")
  	print(123)
  	text = self.whisper.extract_text(result)
  	self.text_field.setText(str(text))

  def Button_Start(self):
  	self.label.setText("Whisper res is:")
  	self.text_field.setText("Start ---")
  	# 录制音频
  	frames = []
  	sample_rate = 16000
  	duration = 5
  	for i in range(0, int(sample_rate / 1024 * duration)):
  		data = self.stream.read(1024)
  		frames.append(data)
 
  	# 将录制的音频保存为wav文件
  	with wave.open("output.wav", 'wb') as wf:
  		wf.setnchannels(CHANNELS) # 2
  		wf.setsampwidth(self.pa.get_sample_size(FORMAT)) # 2
  		wf.setframerate(sample_rate)
  		wf.writeframes(b''.join(frames))
  	self.text_field.setText("保存为wav文件")
  	

  def close(self):
  	self.stream.close()
  	sys.exit()

  def get_spectrum(self, data):
  	T = 1.0/RATE
  	N = data.shape[0]
  	Pxx = (1./N)*np.fft.fft(data)
  	f = np.fft.fftfreq(N,T)
  	Pxx = np.fft.fftshift(Pxx)
  	f = np.fft.fftshift(f)

  	return f.tolist(), (np.absolute(Pxx)).tolist()

  def mainLoop(self):
  	while 1:
  		# Sometimes Input overflowed because of mouse events, ignore this
  		try:
  			data = self.readData()
  		except IOError:
  			continue
  		f, Pxx = self.get_spectrum(data)
  		self.specItem.plot(x=f,y=Pxx, clear=True)
  		QtWidgets.QApplication.processEvents()

if name == 'main':
sa = SpectrumAnalyzer()
sa.mainLoop()

webassembly003 whisper.cpp的python绑定实现+Cython+Setuptools的GUI程序

CODE

Audio Format (check Audio MIDI Setup if on Mac)

Set Plot Range [-RANGE,RANGE], default is nyquist/2

Set these parameters (How much data to plot per FFT)

Which Channel? (L or R)