ass字幕嵌入mp4带偏移

python 复制代码
# 格式转化文件,包含多种文件的互相转化,主要与视频相关
from pathlib import Path
import subprocess
import random
import os
import re



class Utils(object):
    @staticmethod
    def get_decimal_part(x: float) -> float:
        s = format(x, '.15f')  # 格式化为15位小数字符串
        if '.' in s:
            _, decimal_part = s.split('.')
            decimal_str = decimal_part.rstrip('0')  # 移除末尾多余的0
            if not decimal_str:  # 如果小数部分全为0
                return 0.0
            result = float("0." + decimal_str)
            return -result if x < 0 else result  # 处理负数
        return 0.0
    
    @staticmethod
    def file_path_processor(*file_paths: str) -> str | list[str]:
        if len(file_paths) == 1:
            return file_paths[0].replace("\\", "\\\\")
        return [file_path.replace("\\", "\\\\") for file_path in file_paths]


class TimeConverter(object):
    def time_analysis(self, time_str, offset: float = 0.0) -> tuple[int, int, int, int]:
        pattern = r'^(\d{1,2})\D(\d{1,2})\D(\d{1,2})[.,](\d{1,3})$'
        match: re.Match = re.match(pattern, time_str)
        if not match:
            raise ValueError(f"无法解析的时间格式: {time_str}")
        results = list(match.groups())
        results[-1] = f"{results[-1]:0<3}"
        hours, minutes, seconds, milliseconds = map(int, results)
        if offset != 0:
            milliseconds += int(Utils.get_decimal_part(offset) * 1000)
            offset = int(offset)
            hours += (offset // 3600)
            minutes += (offset // 60)
            seconds += (offset % 60)
            # 注意进位
            seconds += milliseconds // 1000
            minutes += seconds // 60
            hours += minutes // 60
            seconds %= 60
            milliseconds %= 1000
            minutes %= 60
            
        return hours, minutes, seconds, milliseconds
    
    def format_number(self, input_str: str, length: int) -> str:
        if not input_str.isdigit():
            raise ValueError("输入必须是数字字符串")
        if not isinstance(length, int) or length <= 0:
            raise ValueError("长度必须是正整数")
        if len(input_str) > length:
            return input_str[:length]  # 截断超长部分
        else:
            return '0' * (length - len(input_str))  + input_str
    
    def format_copy(self, ref_time):
        match: re.Match = re.match("^(\d+)(\D)(\d+)(\D)(\d+)(\D)(\d+)$", ref_time)
        if not match:
            raise ValueError(f"无法复制的时间格式: {ref_time}")
        (h_str, sep1, m_str, sep2, s_str, sep3, ms_str) = match.groups()
        h_len, m_len, s_len, ms_len = map(len, (h_str, m_str, s_str, ms_str))
        def time_formater(hours: int, minutes: int, seconds: int, milliseconds: int) -> str:
            return (
                f"{self.format_number(str(hours), h_len)}{sep1}"
                f"{self.format_number(str(minutes), m_len)}{sep2}"
                f"{self.format_number(str(seconds), s_len)}{sep3}"
                f"{self.format_number(str(milliseconds), ms_len)}"
            )
        return time_formater


class SubtitleConverter(object):
    def ass_analyser(self, ass_file) -> list[dict]:
        ass_object = []
        with open(ass_file, "r", encoding="utf-8") as f:
            start_record = False
            record_fields = False
            for i in f:
                if i.strip() == "[Events]":
                    record_fields = True
                    continue
                if re.match("\[.*?\]$", i.strip()):
                    start_record = False
                    continue
                if record_fields:
                    record_fields = False
                    start_record = True
                    fields = [j.strip() for j in i.split(",")]
                    continue
                if start_record:
                    row = map(lambda j: j.strip(), i.split(",", maxsplit=len(fields) - 1))
                    ass_object.append({k: v for k, v in zip(fields, row)})
        return ass_object

    def ass2srt(self, ass_file: str, srt_file: str, offset: float) -> None:
        if not ass_file.endswith(".ass"):
            return
        time_analyser = TimeConverter()
        formatter = time_analyser.format_copy("00:00:00,000")
        srt_file_obj = open(srt_file, "w", encoding="utf-8")
        count = 1
        for row in self.ass_analyser(ass_file):
            if not re.match("标准", row["Style"]):
                continue
            start_time = formatter(*time_analyser.time_analysis(row["Start"], offset))
            end_time = formatter(*time_analyser.time_analysis(row["End"], offset))
            subtitle = row["Text"]
            srt_file_obj.write(
                f"{count}\n{start_time} --> {end_time}\n{subtitle}\n\n"
            )
            count += 1
        srt_file_obj.close()

    def ass2ass_eng(self, ass_file: str, output_file: str, remove_style='标准-Chi') -> None:
        """
        这个方法是纯AI写的,与ass_analyser脱节。
        其作用是处理ass文件中的样式,去除其中的描边设置,同时移除掉指定字幕。
        字幕文件下载自网站:http://154.17.3.217:8888/sub/new, 原生字幕可能中英夹杂,但有时
        我们出于学习目的,可能只想要英文字幕;或纯粹出于娱乐目的,只想保留中文字幕等;
        """
        with open(ass_file, 'r', encoding='utf-8-sig') as f:
            lines = f.readlines()
        output = []
        current_section = None
        for line in lines:
            line = line.rstrip('\r\n')
            stripped = line.strip()
            if stripped.startswith('[') and stripped.endswith(']'):
                current_section = stripped
                output.append(line)
                continue
            if current_section == '[V4+ Styles]':
                if line.startswith('Style:'):
                    parts = line[len('Style:'):].split(',')
                    if len(parts) >= 17:
                        parts[16] = '0'
                        output.append('Style:' + ','.join(parts))
                    else:
                        output.append(line)
                else:
                    output.append(line)
            elif current_section == '[Events]' and line.startswith('Dialogue:'):
                content = line[len('Dialogue:'):].strip()
                fields = content.split(',', 9)
                if len(fields) >= 4 and fields[3].strip() == remove_style:
                    continue
                output.append(line)
            else:
                output.append(line)
        with open(output_file, 'w', encoding='utf-8') as f:
            f.write('\n'.join(output))

    def ass2ass_offset(self, ass_file1, ass_file2, offset: float) -> None:
        time_analyser = TimeConverter()
        ref_time = "0:00:00.00"
        time_formatter = time_analyser.format_copy(ref_time)
        ass_object = self.ass_analyser(ass_file1)
        for row in ass_object:
            offset_start = time_analyser.time_analysis(row["Start"], offset)
            offset_end = time_analyser.time_analysis(row["End"], offset)
            row["Start"] = time_formatter(*offset_start)
            row["End"] = time_formatter(*offset_end)
        fields = ",".join(ass_object[0].keys())
        rows = "\n".join(",".join(row[k] for k in ass_object[0]) for row in ass_object)
        rows = rows.replace("\\", "\\\\")
        new_body = f"[Events]\n{fields}\n{rows}"
        pattern = r'\[Events\]\nFormat:.*(?:\nDialogue:.*)*'
        with open(ass_file1, "r", encoding="utf-8") as f:
            content = f.read()
        new_content = re.sub(pattern, new_body, content, re.MULTILINE)
        with open(ass_file2, "w", encoding="utf-8") as f:
            f.write(new_content)

    def srt2srt_offset(self, srt_file1, srt_file2, offset: float) -> None:
        time_analyser = TimeConverter()
        ref_time = "00:00:00,000"
        time_formatter = time_analyser.format_copy(ref_time)
        with open(srt_file1, "r", encoding="utf-8") as f1:
            f2 = open(srt_file2, "w", encoding="utf-8")
            for i in f1:
                res = re.match("^(.*?) --> (.*)$", i.strip())
                if res:
                    start_time, end_time = res.groups()
                    offset_start = time_analyser.time_analysis(start_time, offset)
                    offset_end = time_analyser.time_analysis(end_time, offset)
                    start_time = time_formatter(*offset_start)
                    end_time = time_formatter(*offset_end)
                    i = f"{start_time} --> {end_time}\n"
                f2.write(i)
            f2.close()        


class VedioConverter(object):
    def mkv2mp4(self, mkv_file: str, mp4_file: str) -> None:
        mkv_file, mp4_file = Utils.file_path_processor(mkv_file, mp4_file)
        command = [
            ffmpeg_file_path,
            '-i', Utils.file_path_processor(mkv_file),
            '-c:v', 'copy',
            '-c:a', 'copy',
            '-y',
            Utils.file_path_processor(mp4_file)
        ]
        subprocess.run(command, check=True)

    def ass_embed_mp4(self, mp4_file: str, ass_file: str, output_file: str, itsoffset: float = 0.0) -> None:
        mp4_file, ass_file, output_file = Utils.file_path_processor(
            mp4_file, ass_file, output_file
        )
        converter = SubtitleConverter()
        random_name = f"{random.randint(0, 1000000)}.ass"
        converter.ass2ass_offset(ass_file, random_name, itsoffset)
        command = [
            ffmpeg_file_path,
            '-i', mp4_file,
            '-vf', f"subtitles={random_name}",
            '-c:v', 'libx264',
            '-profile:v', 'high',
            '-pix_fmt', 'yuv420p',
            '-preset', 'fast',
            '-b:a', '192k',
            '-c:a', 'aac',
            '-movflags', '+faststart',
            '-y',
            output_file
        ]
        subprocess.run(command, check=True)
        os.remove(random_name)

    def ass_embed_mkv(self, mkv_file: str, ass_file: str, output_file: str, itsoffset) -> None:
        mkv_file, ass_file, output_file = Utils.file_path_processor(
            mkv_file, ass_file, output_file
        )
        command = [
            ffmpeg_file_path,
            '-i', mkv_file,
            '-itsoffset', str(itsoffset),
            '-i', ass_file,
            '-map', '0',
            '-map', '-0:s',
            '-map', '1',
            '-c', 'copy',
            '-metadata:s:s:0', 'language=eng',
            '-y',
            output_file
        ]
        subprocess.run(command, check=True)



ffmpeg_file_path = r"ffmpeg.exe"
vedio_converter = VedioConverter()
# 加入硬字幕(mp4)
vedio_converter.ass_embed_mp4(
     r"your_input_mp4_file.mp4",
     r"your_ass_subtitle_file.ass",
     r"your_output_mp4_file.mp4",
     itsoffset=6.3    # 指定的字幕偏移时间,让字幕对齐音频
)

为了方便字幕的视频嵌入,上述代码实现了一些较为重要的功能,部分如下:

ass文件转srt文件,见SubtitleConverter的ass2srt方法;

根据srt文件和ass文件的时间偏移指定时间生成新的srt文件和ass文件的方法,见SubtitleConverter的ass2ass_offset和srt2srt_offset

ass字幕文件嵌入mp4视频的方法(可指定时间偏移),见VedioConverter的ass_embed_mp4方法,注意是硬字幕嵌入,相对耗时。

这些方法的实现都不是很难,不过并不能保证没有Bug;经过简单而基本测试,暂时没有出现问题

在使用之前,请确保找到你的ffmpeg路径。如果已经添加到环境变量,可以直接使用;否则请自行修改里面ffmpeg的路径为绝对路径。此处提供ffmpeg的下载路径:

FFmpeg 最新 Windows 64 位 GPL 版本下载

关于为什么mp4视频嵌入硬字幕,而mkv视频却是嵌入软字幕(在代码中),其实是有原因的:

mp4被更加广泛的兼容,几乎所有视频播放器都可以正确播放。但如果是嵌入软字幕,mp4视频的字幕则不一定能被播放器支持。硬字幕嵌入能保证字幕一定显示,更凸显mp4兼容的优势。缺陷就是硬字幕不能选择隐藏,同时需要重新编码,比较耗时;

mkv视频的支持性就明显要差不少,如手机上很多视频播放器就对mkv视频的支持不好,或视频变形,或音频丢失;但是软字幕的显示往往不是问题,这得益于mkv视频是字幕的天然容器,所以只要能找到合适的mkv视频播放器,几乎就能正常的显示软字幕。

相关推荐
橙子家5 小时前
浏览器缓存之【基础键值存储】:Local storage 和 Session storage
前端
程序员龙叔7 小时前
编写高质量 Skill 系列 -- 如何设计需求分析与用例生成的 SKILL
自动化测试·软件测试·python·软件测试工程师·接口测试·性能测试·skill·ai测试
星星在线7 小时前
MusicFree:一个「All in One」的个人音乐服务器,让听歌回归简单
前端·后端
IT_陈寒8 小时前
Redis的SETNX并发问题让我加了三天班
前端·人工智能·后端
demo007x8 小时前
Docling 文档转换以及技术架构分析
前端·后端·程序员
京东云开发者9 小时前
京东市民服务又“上新”!这次是黑龙江“龙易办”
前端
袋鱼不重10 小时前
我的神奇同事,AI 用多了居然写了个 Open In Codex
前端·后端·ai编程
用户83562907805110 小时前
使用 Python 操作 Word 内容控件
后端·python
通信小呆呆10 小时前
当算法有了“五感”:多模态数据融合如何向人体感官协同学习?
人工智能·学习·算法·机器学习·机器人
Fireworks10 小时前
深入vue3源码解读 -- 1、响应式的基础概念
前端