python + whisper 读取蓝牙耳机，转为文字

1. 起因，目的:

看到别人做了类似的效果。所以自己也想试试看。动手。

2. 先看效果

3. 过程:

我用的是蓝牙耳机，EDIFIER W820NB

先找到声音，设置为 Hands-Free 模式

代码 1 ，查找设备名称，看看哪个是能用的。

我的设备，能用的是 index=27

python 复制代码

import sounddevice as sd
import numpy as np
import wave
import re

def list_input_devices():
    print("🎤 可用音频输入设备列表：")
    input_devices = []
    devices = sd.query_devices()
    for i, device in enumerate(devices):
        if device['max_input_channels'] > 0:
            device['index'] = i
            print(f"Index {i}: {device['name']} - {device['max_input_channels']} channels - {device['default_samplerate']} Hz")
            input_devices.append(device)
    return input_devices

def record_audio(device_info, seconds=10):
    try:
        device_index = device_info['index']
        channels = 1  # 强制单声道
        rate = 16000  # 强制 16000 Hz

        print(f"\n🎛️ 使用设备: {device_info['name']}")
        print(f"➡️ 设备索引: {device_index}")
        print(f"➡️ 通道数: {channels}")
        print(f"➡️ 采样率: {rate} Hz\n")

        print("🔍 检查设备配置...")
        sd.check_input_settings(device=device_index, channels=channels, samplerate=rate, dtype='int16')
        print("✅ 配置有效")

        print("🎙️ 正在录音中...")
        audio_data = sd.rec(int(seconds * rate), samplerate=rate, channels=channels, dtype='int16', device=device_index)
        sd.wait()

        safe_device_name = re.sub(r'[^\w\s-]', '_', device_info['name']).replace('\r', '').replace('\n', '').strip()
        output_file = f"{safe_device_name}_output.wav"

        with wave.open(output_file, 'wb') as wf:
            wf.setnchannels(channels)
            wf.setsampwidth(2)
            wf.setframerate(rate)
            wf.writeframes(audio_data.tobytes())

        print(f"🎵 录音已保存为 {output_file}")

    except sd.PortAudioError as pae:
        print(f"❌ 音频设备错误：{pae}")
    except OSError as ose:
        print(f"❌ 文件系统错误：{ose}")
    except Exception as e:
        print(f"❌ 未知错误：{e}")

if __name__ == "__main__":
    print("🔊 使用默认音频接口")
    input_devices = list_input_devices()
    if input_devices:
        for device in input_devices:
            if 'EDIFIER W820NB' in device['name'] and 'Hands-Free' in device['name']:
                print(f"正在测试耳机设备: {device['name']}")
                record_audio(device)
    else:
        print("❌ 没有可用的音频输入设备。")

代码 2 , 使用 whisper 转为文字

效果很勉强，见文末总结。

python 复制代码

import sounddevice as sd
import numpy as np
import wave
import tempfile
import os
import whisper

# 加载 Whisper 模型
model = whisper.load_model("medium")  # 可改为 "tiny", "base", "small", "large"

# 音频录制设置
CHANNELS = 1  # 单声道，Hands-Free 模式通常只支持 1 通道
RATE = 16000  # 16000 Hz，适合 Hands-Free 模式
RECORD_SECONDS = 5  # 每次录音时长（秒）
DEVICE_INDEX = 27  # 已验证可用的设备索引
DEVICE_NAME = "耳机 (@System32\drivers\bthhfenum.sys,#2;%1 Hands-Free AG Audio%0;(EDIFIER W820NB 双金标版))"

def record_audio(seconds=RECORD_SECONDS):
    try:
        print(f"🎧 正在录音 {seconds} 秒...")
        # 使用 sounddevice 录制音频
        audio_data = sd.rec(
            int(seconds * RATE),
            samplerate=RATE,
            channels=CHANNELS,
            dtype='int16',
            device=DEVICE_INDEX
        )
        sd.wait()  # 等待录音完成

        # 保存临时音频文件
        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmpfile:
            with wave.open(tmpfile.name, 'wb') as wf:
                wf.setnchannels(CHANNELS)
                wf.setsampwidth(2)  # 16-bit 音频
                wf.setframerate(RATE)
                wf.writeframes(audio_data.tobytes())
            return tmpfile.name

    except sd.PortAudioError as pae:
        print(f"❌ 音频设备错误：{pae}")
        return None
    except Exception as e:
        print(f"❌ 未知错误：{e}")
        return None

def transcribe_audio(audio_file):
    try:
        print("🧠 正在识别...")
        result = model.transcribe(audio_file, language="zh")
        print("📝 识别结果:", result['text'].strip())
    except Exception as e:
        print(f"❌ 语音识别失败：{e}")
    finally:
        if os.path.exists(audio_file):
            os.remove(audio_file)

if __name__ == "__main__":
    print(f"🔊 使用设备: {DEVICE_NAME} (索引: {DEVICE_INDEX})")
    print("🎙️ 开始实时听写，按 Ctrl+C 停止")

    try:
        while True:
            # 录制音频
            audio_file = record_audio()
            if audio_file:
                # 进行语音识别
                transcribe_audio(audio_file)
            else:
                print("⚠️ 录音失败，跳过识别")
            # 短暂暂停，避免过于频繁的录音
            sd.sleep(100)  # 100 毫秒

    except KeyboardInterrupt:
        print("🛑 停止实时识别")
    except Exception as e:
        print(f"❌ 程序错误：{e}")

4. 结论 + todo

开始的时候，加载模型比较慢。
能实现实时语音识别，但识别效果不佳，我猜测的原因是：
耳机质量太差，有些参数设置不够合理。

python + whisper 读取蓝牙耳机， 转为文字

1. 起因， 目的:

2. 先看效果

3. 过程:

代码 1 ，查找设备名称， 看看哪个是能用的。

代码 2 , 使用 whisper 转为文字

4. 结论 + todo

python + whisper 读取蓝牙耳机，转为文字

1. 起因，目的:

代码 1 ，查找设备名称，看看哪个是能用的。