CODE
-
python端的绑定和本文一样,还需要将cdef char* LANGUAGE = b'en'改为中文zh(也可以在函数中配置一个参数修改这个值)。
-
ps:本来想尝试
cdef whisper_context* whisper_init_from_file_with_params_no_state(char*, whisper_full_params)
然后进行调用,但是发现最新版的whisper.h没有这个API了,所以先不加了。import pyaudio
import wave
import struct
import sys
import numpy as npimport pyqtgraph as pg
from PyQt5 import QtWidgets
from PyQt5.QtCore import Qtfrom whispercpp import Whisper
Audio Format (check Audio MIDI Setup if on Mac)
FORMAT = pyaudio.paInt16
RATE = 16000
CHANNELS = 2Set Plot Range [-RANGE,RANGE], default is nyquist/2
RANGE = None
if not RANGE:
RANGE = RATE/2Set these parameters (How much data to plot per FFT)
INPUT_BLOCK_TIME = 0.05
INPUT_FRAMES_PER_BLOCK = int(RATE*INPUT_BLOCK_TIME)Which Channel? (L or R)
LR = "l"
class SpectrumAnalyzer():
def init(self):
self.pa = pyaudio.PyAudio()
self.initMicrophone()
self.initUI()def find_input_device(self): device_index = None for i in range(self.pa.get_device_count()): devinfo = self.pa.get_device_info_by_index(i) if devinfo["name"].lower() in ["mic","input"]: device_index = i return device_index def initMicrophone(self): device_index = self.find_input_device() self.stream = self.pa.open( format = FORMAT, channels = CHANNELS, rate = RATE, input = True, input_device_index = device_index, frames_per_buffer = INPUT_FRAMES_PER_BLOCK) def readData(self): block = self.stream.read(INPUT_FRAMES_PER_BLOCK) count = len(block)/2 format = "%dh"%(count) shorts = struct.unpack( format, block ) if CHANNELS == 1: return np.array(shorts) else: l = shorts[::2] r = shorts[1::2] if LR == 'l': return np.array(l) else: return np.array(r) def initUI(self): self.app = QtWidgets.QApplication([]) # self.app = QtGui.QApplication([]) self.app.quitOnLastWindowClosed() self.mainWindow = QtWidgets.QMainWindow() self.mainWindow.setWindowFlags(Qt.FramelessWindowHint | Qt.WindowStaysOnTopHint) self.mainWindow.setWindowTitle("Spectrum Analyzer") self.mainWindow.setGeometry(100, 100, 300, 200)#self.mainWindow.resize(800,300) self.centralWid = QtWidgets.QWidget() self.mainWindow.setCentralWidget(self.centralWid) self.lay = QtWidgets.QVBoxLayout() self.centralWid.setLayout(self.lay) # Add a button self.button_start = QtWidgets.QPushButton("Start Record Audio") self.button_start.clicked.connect(self.Button_Start) self.lay.addWidget(self.button_start) self.button_end = QtWidgets.QPushButton("whisper Init") self.whisper = None self.is_whisper_inited = False self.button_end.clicked.connect(self.Button_Whisper) self.lay.addWidget(self.button_end) self.button = QtWidgets.QPushButton("TRANS AUDIO") self.button.clicked.connect(self.Button_TransAudio) self.lay.addWidget(self.button) # Add a text label self.label = QtWidgets.QLabel("Text will appear here:") self.lay.addWidget(self.label) # Add a QLineEdit self.text_field = QtWidgets.QLineEdit() self.text_field.setFixedSize(280, 200) self.lay.addWidget(self.text_field) self.specWid = pg.PlotWidget(name="spectrum") self.specItem = self.specWid.getPlotItem() self.specItem.setMouseEnabled(y=False) self.specItem.setYRange(0,1000) self.specItem.setXRange(-RANGE,RANGE, padding=0) self.specAxis = self.specItem.getAxis("bottom") self.specAxis.setLabel("Frequency [Hz]") self.lay.addWidget(self.specWid) self.mainWindow.show() self.app.aboutToQuit.connect(self.close) def onButtonClick(self): self.label.setText("Whisper res is:") self.text_field.setText("Hello") def Button_Whisper(self): self.whisper = Whisper('large',model_path= "/home/pdd/myassets/ggml-medium.bin") self.is_whisper_inited = True self.text_field.setText("Whisper INITED") def Button_TransAudio(self): result = self.whisper.transcribe("/home/pdd/le/pywhisper/output.wav") # result = w.transcribe("myfile.mp3") print(123) text = self.whisper.extract_text(result) self.text_field.setText(str(text)) def Button_Start(self): self.label.setText("Whisper res is:") self.text_field.setText("Start ---") # 录制音频 frames = [] sample_rate = 16000 duration = 5 for i in range(0, int(sample_rate / 1024 * duration)): data = self.stream.read(1024) frames.append(data) # 将录制的音频保存为wav文件 with wave.open("output.wav", 'wb') as wf: wf.setnchannels(CHANNELS) # 2 wf.setsampwidth(self.pa.get_sample_size(FORMAT)) # 2 wf.setframerate(sample_rate) wf.writeframes(b''.join(frames)) self.text_field.setText("保存为wav文件") def close(self): self.stream.close() sys.exit() def get_spectrum(self, data): T = 1.0/RATE N = data.shape[0] Pxx = (1./N)*np.fft.fft(data) f = np.fft.fftfreq(N,T) Pxx = np.fft.fftshift(Pxx) f = np.fft.fftshift(f) return f.tolist(), (np.absolute(Pxx)).tolist() def mainLoop(self): while 1: # Sometimes Input overflowed because of mouse events, ignore this try: data = self.readData() except IOError: continue f, Pxx = self.get_spectrum(data) self.specItem.plot(x=f,y=Pxx, clear=True) QtWidgets.QApplication.processEvents()
if name == 'main':
sa = SpectrumAnalyzer()
sa.mainLoop()