MAC使用python下载字幕

安装软件

brew install tesseract

安装pytesseract和pillow

复制代码
pip install pytesseract
pip install pillow

pip install SpeechRecognition

pip3 install soundfile

pip3 install torch 

pip3 install whisper

脚本参考:

python 复制代码
import os
import sys

import cv2
import pytesseract
import speech_recognition as sr
from moviepy.video.io.VideoFileClip import VideoFileClip
from pydub import AudioSegment


def extract_subtitles(video_path, output_dir):
    audio = AudioSegment.from_file(video_path, format='mp4')
    audio.export("audio.wav", format="wav")

    r = sr.Recognizer()
    audio_file = sr.AudioFile('audio.wav')
    with audio_file as source:
        audio = r.record(source)
    text = r.recognize_whisper(audio)

    print(text)


def extract_subtitles_v2(video_path, output_dir):
    r = sr.Recognizer()
    clip = VideoFileClip(video_path)
    clip.audio.write_audiofile('audio.wav')
    sound = AudioSegment.from_file('audio.wav', format='wav')

    def transcribe_audio(sound):
        transcript = ''
        with sr.AudioFile(sound) as source:
            audio_text = r.record(source)
            try:
                transcript = r.recognize_whisper(audio_text)
            except sr.UnknownValueError as e:
                print(e)
        return transcript

    transcription = transcribe_audio(sound)
    print(transcription)


def extract_subtitles_v1(video_path, output_dir):
    vidcap = cv2.VideoCapture(video_path)

    frames = []
    success, image = vidcap.read()

    count = 0
    success = True

    while success:
        frames.append(image)
        success, image = vidcap.read()
        count += 1

    # lang='chi_sim'
    for frame in frames:
        # text = pytesseract.image_to_string(frame, lang='eng')
        text = pytesseract.image_to_string(frame, lang='chi_sim')
        print(text)


def extract_subtitles_v3(video_path, output_dir):
    vidcap = cv2.VideoCapture(video_path)

    frames = []
    success, image = vidcap.read()

    count = 0
    success = True

    while success:
        success, image = vidcap.read()
        text = pytesseract.image_to_string(image, lang='chi_sim')
        print(text)
        count += 1


if __name__ == '__main__':
    current_dir = os.getcwd()
    print("current_dir:", current_dir)
    # current_dir + "/" + "png"
    # current_dir + "/" + "png"
    # args: ['merge.py', 'png', 'png']

    args = sys.argv
    print('args:', args)
    input_dir = args[1]
    output_dir = args[2]
    # extract_subtitles(input_dir, output_dir)
    # extract_subtitles_v1(input_dir, output_dir)
    extract_subtitles_v3(input_dir, output_dir)
相关推荐
IT·小灰灰4 分钟前
AI学会理解物理法则:OpenAI Sora 2如何重塑视频生成新范式
人工智能·python·深度学习·机器学习·数据挖掘·音视频
郑州光合科技余经理9 分钟前
技术视角:海外版一站式同城生活服务平台源码解析
java·开发语言·uni-app·php·排序算法·objective-c·生活
郑州光合科技余经理11 分钟前
海外版生活服务系统源码 | 外卖+跑腿一站式平台技术解析
java·开发语言·javascript·git·spring cloud·php·生活
小小Fred11 分钟前
Cortex-M3 LR寄存器的特殊值EXC_RETURN
java·开发语言·jvm
记忆偶然12 分钟前
语音转写技术在专业服务领域的应用实践
python
小小心愿家12 分钟前
线程——对于锁的进一步认识
java·开发语言
暗之星瞳15 分钟前
python爬虫学习——1
爬虫·python·学习
曹牧18 分钟前
Java: FATAL ERROR: processing of -javaagent failed
java·开发语言
无意feel18 分钟前
MacOS 安装neofetch cmatrix lolcat
macos·数字雨·彩虹特效
橘子海全栈攻城狮29 分钟前
【最新源码】基于springboot的会议室预订系统设计与实现 014
java·开发语言·前端·spring boot·后端·spring·自动化