faster_whisper语音识别

faster_whisper语音识别

检测可用设备:list_available_devices()函数

我这边usb摄像头带麦克风的,所以 DEV_index = 8

1 使用 pyaudio 打开音频设备

2 从音频设备读取数据,传递给 faster_whisper 识别

按键 r 录制 s 停止 q退出

test.py

python 复制代码
# from faster_whisper import WhisperModel

# model = WhisperModel("large-v3")

# audio_path= "mlk.flac"

# segments, info = model.transcribe(audio_path)

# for segment in segments:
#         print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))



from faster_whisper import WhisperModel
import numpy as np
import keyboard
import pynput
from pynput.keyboard import Controller, Listener,Key,KeyCode
import time
import pyaudio
import wave

def list_available_devices():
    print("Available input devices:")
    p = pyaudio.PyAudio()
    for i in range(p.get_device_count()):
        device_info = p.get_device_info_by_index(i)
        if device_info['maxInputChannels'] > 0:  # Check if it's an input device
            print(f"{i}: {device_info['name']}")
    p.terminate()


# List available devices
list_available_devices()


# Available input devices:
# 5: USB Audio: #1 (hw:2,1)
# 6: USB Audio: #2 (hw:2,2)
# 8: aoni webcam A20: USB Audio (hw:3,0)
# 9: pulse
# 10: default

# Replace with the device index you identified by run list_available_devices()
DEV_index = 8  # Replace with your actual device index


class VoiceRecorder:
    def __init__(self, channels=1, rate=16000, format=pyaudio.paInt16):
        self.p = pyaudio.PyAudio()
        self.model = WhisperModel("large-v3")
        self.CHANNELS = channels
        self.RATE = rate
        self.FORMAT = format

    def record(self, seconds=5):
        """
        记录指定秒数的音频。
        """
        CHUNK = 1024
        
        try:
            stream = self.p.open(format=self.FORMAT,
                                channels=self.CHANNELS,
                                rate=self.RATE,
                                input=True,
                                input_device_index=DEV_index,
                                frames_per_buffer=CHUNK)
            print("开始录音...")
            frames = []
            
            for i in range(0, int(self.RATE / CHUNK * seconds)):
                data = stream.read(CHUNK)
                frames.append(data)
                
            print("录音结束.")
        except Exception as e:
            print(f"录音时发生错误:{e}")
            return None
        finally:
            stream.stop_stream()
            stream.close()
        
        return b''.join(frames)

    def transcribe_audio(self, audio_data):
        """
        将音频数据转换为文本。
        """
        try:
            audio_np = np.frombuffer(audio_data, dtype=np.int16)
            if self.CHANNELS > 1:
                audio_np = audio_np.reshape((-1, self.CHANNELS)).mean(axis=1)
            audio_normalized = np.float32(audio_np) / 32768.0
            
            segments, _ = self.model.transcribe(audio_normalized, language='zh', beam_size=5)
            return [segment.text for segment in segments]
        except Exception as e:
            print(f"转录音频时发生错误:{e}")
            return None

    def close(self):
        """
        关闭PyAudio。
        """
        self.p.terminate()

def main():
    global recorder
    global listener
    recorder = VoiceRecorder()

    listener = Listener(
        on_press=on_press
    )
    listener.start()
    listener.join()


def on_press(key:KeyCode):
    print(type(key))
    if key.char == 'r':
        print("开始录音...")
        audio_data = recorder.record()
        if audio_data is not None:
            transcripts = recorder.transcribe_audio(audio_data)
            for text in transcripts:
                print(text)
            print("录音结束.")
    elif key.char == 's':
        print("停止录音.")
    elif key.char == 'q':
        print("退出程序.")
        listener.stop()
        recorder.close()

if __name__ == "__main__":
    main()
相关推荐
byte轻骑兵2 小时前
【HFP】规范精讲[23]: 蓝牙超宽频语音革命——LC3-SWB编码深度解析,重塑无线通话体验
人工智能·语音识别·蓝牙·hfp·通话
爱上珍珠的贝壳5 小时前
ESP32-S3-CAM:豆包语音识别文字后控制小车(五)——认识L298N驱动模块
人工智能·语音识别·智能硬件·esp32-s3·l298n·减速电机
特力康小冬6 小时前
从“看得见”到“喊得出”:智能驱赶防垂钓告警装置如何筑牢输电安全防线?
人工智能·语音识别
herinspace7 小时前
管家婆实用帖-如何使用ping命令检测网络环境
网络·数据库·人工智能·学习·excel·语音识别
齐齐大魔王17 小时前
智能语音技术(八)
人工智能·语音识别
许彰午17 小时前
零成本搭建RAG智能客服:Ollama + Milvus + DeepSeek全程实战
人工智能·语音识别·llama·milvus
mseaspring1 天前
离线语音识别 Vosk 入门指南:开源、轻量、告别网络依赖
人工智能·语音识别
wei_shuo1 天前
基于 Rokid 灵珠 AI 平台:OCR 工作流与学术智能体搭建实操指南
人工智能·语音识别·rokid
山河君1 天前
从后验到先验:语音信噪比估计与DD算法详解
算法·音视频·语音识别·信号处理
ghie90902 天前
隐马尔科夫模型(HMM)在语音识别领域的应用与代码实现
人工智能·语音识别