faster_whisper,视频转文字,并生成字幕文件

faster_whisper,视频转文字,并生成字幕文件(附带exe)

使用说明:

--model:选 tiny/base/small/medium/large(越大越准、越耗资源)。

模型路径(medium):C:\Users\XXX.cache\huggingface\hub\models--Systran--faster-whisper-medium\snapshots\08e178d48790749d25932bbc082711ddcfdfbc4f

  1. 直接运行run.exe
  2. 选择视频文件
  3. 等待结果,结果保存在视频所在文件夹,文件名与视频文件名相同,格式为txt和srt
  4. 使用PotPlayer播放视频,自动读取同名的字幕文件
  5. 暂停的时候,可以复制当前字幕内容到剪贴板
python 复制代码
# ===================== 16核CPU 强制优化 禁用GPU =====================
import os
os.environ["OMP_NUM_THREADS"] = "16"
os.environ["MKL_NUM_THREADS"] = "16"
os.environ["NUMBA_NUM_THREADS"] = "16"
os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"  # 彻底禁用GPU
# ====================================================================

from faster_whisper import WhisperModel
import opencc
import tkinter as tk
from tkinter import ttk, filedialog, messagebox
import subprocess
import json
import datetime

# SRT字幕时间格式转换
def format_srt_time(sec):
    h = int(sec // 3600)
    m = int((sec % 3600) // 60)
    s = int(sec % 60)
    ms = int((sec - int(sec)) * 1000)
    return f"{h:02d}:{m:02d}:{s:02d},{ms:03d}"

# ===================== 模型选择弹窗(下拉菜单) =====================
def select_model():
    # win = tk.Tk()
    # win.title("选择模型")
    # win.geometry("300x150")
    # win.resizable(False, False)
    # win.attributes('-topmost', True)  # 窗口置顶

    # # 模型选项(可自行增删)
    # model_options = ["small", "medium", "large"]

    # tk.Label(win, text="请选择识别模型:", font=("微软雅黑", 12)).pack(pady=20)
    # selected = tk.StringVar(value=model_options[0])
    # combo = ttk.Combobox(win, textvariable=selected, values=model_options, state="readonly", font=("微软雅黑", 11))
    # combo.pack(pady=5)

    # result = None
    # def confirm():
    #     nonlocal result
    #     result = selected.get()
    #     win.destroy()

    # tk.Button(win, text="确认", command=confirm, width=10, font=("微软雅黑", 10)).pack(pady=10)
    # win.mainloop()
    # return result
    return "medium"

# 选择模型
model_name = select_model()
if not model_name:
    exit()

# ===================== 初始化 =====================
root = tk.Tk()
root.withdraw()
cc = opencc.OpenCC('t2s')

# 选择视频
video_path = filedialog.askopenfilename(
    title="选择视频文件",
    filetypes=[("视频格式", "*.mp4 *.mkv *.mov *.avi *.flv *.wmv"), ("所有文件", "*.*")]
)
if not video_path:
    exit()

# 输出文件
video_dir = os.path.dirname(video_path)
video_name = os.path.splitext(os.path.basename(video_path))[0]
txt_file = os.path.join(video_dir, f"{video_name}.txt")
srt_file = os.path.join(video_dir, f"{video_name}.srt")

# 获取视频时长
def get_video_duration(video):
    try:
        res = subprocess.run(["ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "json", video], stdout=subprocess.PIPE)
        return float(json.loads(res.stdout)["format"]["duration"])
    except:
        return 100
total_time = get_video_duration(video_path)

# ===================== 加载模型(纯CPU 16核) =====================
print(f"\n加载模型:{model_name}")
model = WhisperModel(
    model_size_or_path=model_name,
    device="cpu",
    compute_type="int8",
    cpu_threads=16,
    num_workers=16
)

# ===================== 开始识别 =====================
print(f"\n视频总时长:{total_time:.1f}秒,开始识别...\n")
segments, info = model.transcribe(video_path, language="zh", vad_filter=False)

# 写入标准SRT + 纯文本
with open(txt_file, "w", encoding="utf-8") as f_txt, \
     open(srt_file, "w", encoding="utf-8") as f_srt:
    idx = 1
    for seg in segments:
        text = cc.convert(seg.text.strip())
        start_str = format_srt_time(seg.start)
        end_str = format_srt_time(seg.end)

        print(f"[{start_str} → {end_str}] {text}")

        f_txt.write(text + "\n")
        # 标准SRT格式
        f_srt.write(f"{idx}\n")
        f_srt.write(f"{start_str} --> {end_str}\n")
        f_srt.write(f"{text}\n\n")
        idx += 1

# 完成提示
messagebox.showinfo("完成", f"模型:{model_name}\n识别成功!\n已生成:\n1. 纯文本.txt\n2. SRT字幕(PotPlayer直接用)")
相关推荐
做怪小疯子9 小时前
华为笔试0429
python·numpy
Warson_L9 小时前
Dictionary
python
寒山李白11 小时前
解决 python-docx 生成的 Word 文档打开时弹出“无法读取内容“警告
python·word·wps·文档·docx·qoder
2401_8323655212 小时前
JavaScript中rest参数(...args)取代arguments的优势
jvm·数据库·python
Sirius.z12 小时前
第J3周:DenseNet121算法详解
python
2301_7796224113 小时前
Go语言怎么用信号量控制并发_Go语言semaphore信号量教程【入门】
jvm·数据库·python
2301_7662834413 小时前
c++如何将控制台输出保存到文件_cout重定向到txt【详解】
jvm·数据库·python
小康小小涵14 小时前
基于ESP32S3实现无人机RID模块底层源码编译
linux·开发语言·python
lzjava202414 小时前
Python的函数
开发语言·python