python从麦克风获取音频并识别

麦克风获取

python 复制代码
# 打开麦克风流
stream = audio.open(format=FORMAT,
                    channels=CHANNELS,
                    rate=RATE,
                    input=True,
                    frames_per_buffer=CHUNK)

设置参数

python 复制代码
# 录音参数
FORMAT = pyaudio.paInt16  # 16-bit resolution
CHANNELS = 1              # 单声道
RATE = 16000              # 采样率 16kHz
CHUNK = 1024              # 数据块大小
RECORD_SECONDS = 5        # 录制时长 (秒)
WAVE_OUTPUT_FILENAME = "output.wav"

读取数据块

python 复制代码
# 循环读取数据块
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
    data = stream.read(CHUNK)
    frames.append(data)

阿里语音识别模型加载

python 复制代码
paraformer_path = "./iic/speech_paraformer-large-vad-punc-spk_asr_nat-zh-cn"
fsmn_path = "./iic/speech_fsmn_vad_zh-cn-16k-common-pytorch"
ct_punc_path = "./iic/punc_ct-transformer_cn-en-common-vocab471067-large"
cam_path = "./iic/speech_campplus_sv_zh-cn_16k-common"

model = AutoModel(model=paraformer_path, model_revision="v2.0.4",
                  vad_model=fsmn_path, vad_model_revision="v2.0.4",
                  punc_model=ct_punc_path, punc_model_revision="v2.0.4",
                  spk_model=cam_path, spk_model_revision="v2.0.2",
                  device="cpu"
                  )

阿里语音识别

python 复制代码
res = model.generate(input=WAVE_OUTPUT_FILENAME,
                     batch_size_s=16000,
                     hotword='魔搭')

整体代码

python 复制代码
import pyaudio
import wave
import threading
import keyboard
from funasr import AutoModel

# 录音参数
FORMAT = pyaudio.paInt16  # 16-bit resolution
CHANNELS = 1  # 单声道
RATE = 16000  # 采样率 16kHz
CHUNK = 1024  # 数据块大小
WAVE_OUTPUT_FILENAME = "./wav_data/output.wav"

# 初始化 PyAudio
audio = pyaudio.PyAudio()

frames = []
stream = None
recording = False

paraformer_path = "./iic/speech_paraformer-large-vad-punc-spk_asr_nat-zh-cn"
fsmn_path = "./iic/speech_fsmn_vad_zh-cn-16k-common-pytorch"
ct_punc_path = "./iic/punc_ct-transformer_cn-en-common-vocab471067-large"
cam_path = "./iic/speech_campplus_sv_zh-cn_16k-common"

model = AutoModel(model=paraformer_path, model_revision="v2.0.4",
                  vad_model=fsmn_path, vad_model_revision="v2.0.4",
                  punc_model=ct_punc_path, punc_model_revision="v2.0.4",
                  spk_model=cam_path, spk_model_revision="v2.0.2",
                  device="cpu"
                  )

print("加载模型完成!!!")


def start_recording():
    """
    开始录音
    """
    global stream, recording
    if not recording:
        print("开始录音...")
        recording = True
        stream = audio.open(format=FORMAT,
                            channels=CHANNELS,
                            rate=RATE,
                            input=True,
                            frames_per_buffer=CHUNK)
        record_thread = threading.Thread(target=record_audio)
        record_thread.start()


def stop_recording():
    """
    停止录音并进行识别
    """
    global stream, recording
    if recording:
        print("录音结束.")
        recording = False
        stream.stop_stream()
        stream.close()
        save_audio()
        audio.terminate()
        print("开始识别!!!")
        res = model.generate(input=WAVE_OUTPUT_FILENAME,
                             batch_size_s=16000,
                             hotword='魔搭')
        print("识别结束!!!")
        print("识别结果:", res)


def record_audio():
    """
    录音功能实现
    """
    while recording:
        data = stream.read(CHUNK)
        frames.append(data)


def save_audio():
    """
    保存录音文件
    """
    wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
    wf.setnchannels(CHANNELS)
    wf.setsampwidth(audio.get_sample_size(FORMAT))
    wf.setframerate(RATE)
    wf.writeframes(b''.join(frames))
    wf.close()


# 监听 Ctrl 键开始录音
keyboard.add_hotkey('ctrl', start_recording)

# 监听 Alt 键结束录音
keyboard.add_hotkey('alt', stop_recording)

print("按 Ctrl 开始录音,按 Alt 结束录音")
keyboard.wait()  # 保持程序运行
相关推荐
小江的记录本7 小时前
【JVM虚拟机】垃圾回收GC:四种引用类型:强引用、软引用、弱引用、虚引用(附《思维导图》+《面试高频考点清单》)
java·jvm·spring boot·后端·python·spring·面试
APIshop8 小时前
Python 获取 1688 商品采集 API 接口 | 工厂货源自动化对接商品信息 | 无需选品
运维·python·自动化
deepin_sir8 小时前
10 - 函数
开发语言·python
charlee449 小时前
《GIS基础原理与技术实践》配套案例(Python版)
python·conda·numpy·gis·环境配置
枫叶林FYL9 小时前
项目十:事件溯源仓储管理系统(WMS)仿真实现
开发语言·python
君为先-bey9 小时前
CogVideoX——Transformer从文本到视频的扩散模型
深度学习·音视频·transformer·扩散模型
Raink老师9 小时前
【AI面试临阵磨枪-77】音视频 + AI:实时字幕、翻译、降噪、虚拟人、多模态对话
人工智能·面试·音视频
FrameNotWork10 小时前
HarmonyOS 短视频滑动交互实现:打造流畅的上下切换体验
音视频·交互·harmonyos
渣渣xiong11 小时前
从零开始:前端转型AI agent直到就业第五十七天-第五十八天
前端·人工智能·python
小L~~~12 小时前
基于贪心策略的混合遗传算法求解01背包问题
python·算法