MAC使用python下载字幕

安装软件

brew install tesseract

安装pytesseract和pillow

pip install pytesseract
pip install pillow

pip install SpeechRecognition

pip3 install soundfile

pip3 install torch 

pip3 install whisper

脚本参考:

python 复制代码
import os
import sys

import cv2
import pytesseract
import speech_recognition as sr
from moviepy.video.io.VideoFileClip import VideoFileClip
from pydub import AudioSegment


def extract_subtitles(video_path, output_dir):
    audio = AudioSegment.from_file(video_path, format='mp4')
    audio.export("audio.wav", format="wav")

    r = sr.Recognizer()
    audio_file = sr.AudioFile('audio.wav')
    with audio_file as source:
        audio = r.record(source)
    text = r.recognize_whisper(audio)

    print(text)


def extract_subtitles_v2(video_path, output_dir):
    r = sr.Recognizer()
    clip = VideoFileClip(video_path)
    clip.audio.write_audiofile('audio.wav')
    sound = AudioSegment.from_file('audio.wav', format='wav')

    def transcribe_audio(sound):
        transcript = ''
        with sr.AudioFile(sound) as source:
            audio_text = r.record(source)
            try:
                transcript = r.recognize_whisper(audio_text)
            except sr.UnknownValueError as e:
                print(e)
        return transcript

    transcription = transcribe_audio(sound)
    print(transcription)


def extract_subtitles_v1(video_path, output_dir):
    vidcap = cv2.VideoCapture(video_path)

    frames = []
    success, image = vidcap.read()

    count = 0
    success = True

    while success:
        frames.append(image)
        success, image = vidcap.read()
        count += 1

    # lang='chi_sim'
    for frame in frames:
        # text = pytesseract.image_to_string(frame, lang='eng')
        text = pytesseract.image_to_string(frame, lang='chi_sim')
        print(text)


def extract_subtitles_v3(video_path, output_dir):
    vidcap = cv2.VideoCapture(video_path)

    frames = []
    success, image = vidcap.read()

    count = 0
    success = True

    while success:
        success, image = vidcap.read()
        text = pytesseract.image_to_string(image, lang='chi_sim')
        print(text)
        count += 1


if __name__ == '__main__':
    current_dir = os.getcwd()
    print("current_dir:", current_dir)
    # current_dir + "/" + "png"
    # current_dir + "/" + "png"
    # args: ['merge.py', 'png', 'png']

    args = sys.argv
    print('args:', args)
    input_dir = args[1]
    output_dir = args[2]
    # extract_subtitles(input_dir, output_dir)
    # extract_subtitles_v1(input_dir, output_dir)
    extract_subtitles_v3(input_dir, output_dir)
相关推荐
Ciderw2 分钟前
Go中的三种锁
开发语言·c++·后端·golang·互斥锁·
查理零世4 分钟前
【算法】经典博弈论问题——巴什博弈 python
开发语言·python·算法
jk_10135 分钟前
MATLAB中insertAfter函数用法
开发语言·matlab
啥也学不会a43 分钟前
PLC通信
开发语言·网络·网络协议·c#
汤姆和佩琦43 分钟前
2025-1-21-sklearn学习(43) 使用 scikit-learn 介绍机器学习 楼上阑干横斗柄,寒露人远鸡相应。
人工智能·python·学习·机器学习·scikit-learn·sklearn
C++小厨神1 小时前
C#语言的学习路线
开发语言·后端·golang
HyperAI超神经1 小时前
【TVM教程】为 ARM CPU 自动调优卷积网络
arm开发·人工智能·python·深度学习·机器学习·tvm·编译器
心之语歌1 小时前
LiteFlow Spring boot使用方式
java·开发语言
缺的不是资料,是学习的心2 小时前
使用qwen作为基座训练分类大模型
python·机器学习·分类
人才程序员2 小时前
【C++拓展】vs2022使用SQlite3
c语言·开发语言·数据库·c++·qt·ui·sqlite