Python 实现 视频随机抽帧

Python 实现 视频随机抽帧

flyfish

自动扫描根目录下所有层级子文件夹,批量处理全部 .mp4 文件,在视频时长范围内随机选取时间点提取帧,自动避开首尾 5 秒,减少黑屏/花屏概率,采用 32 位 UUID 生成唯一文件名,所有图片存入同一目录。

环境要求

FFmpeg 安装

验证配置:打开命令提示符(CMD / PowerShell),分别执行以下两条命令,正常输出版本号即为配置成功

bash 复制代码
ffmpeg -version
ffprobe -version

使用方法

  1. 将脚本代码保存为 extract_frames_ffmpeg.py

  2. 打开脚本,修改底部配置区的三个参数:

    python 复制代码
    VIDEO_ROOT_FOLDER = r"D:\videos"    # 存放视频的根目录
    OUTPUT_FOLDER = r"D:\frames_output" # 提取图片的保存目录
    FRAMES_PER_VIDEO = 6                # 每个视频提取的帧数
  3. 打开命令行,进入脚本所在目录,执行运行命令:

    bash 复制代码
    python extract_frames_ffmpeg.py
python 复制代码
import os
import random
import subprocess
import uuid

def get_video_duration(video_path):
    """
    多重容错获取视频时长,失败返回 None
    优先级:format总时长 > 视频流时长 > 总帧数/帧率计算
    """
    # 1. 先尝试直接读取 format 层的 duration
    cmd = [
        "ffprobe", "-v", "error",
        "-show_entries", "format=duration",
        "-of", "default=noprint_wrappers=1:nokey=1",
        video_path
    ]
    try:
        result = subprocess.run(cmd, capture_output=True, text=True, encoding="utf-8", errors="ignore")
        duration_str = result.stdout.strip()
        if duration_str and duration_str != "N/A":
            duration = float(duration_str)
            if duration > 1:
                return duration
    except:
        pass

    # 2. 失败则读取视频流的 duration
    cmd = [
        "ffprobe", "-v", "error",
        "-select_streams", "v:0",
        "-show_entries", "stream=duration,r_frame_rate,nb_frames",
        "-of", "default=noprint_wrappers=1:nokey=0",
        video_path
    ]
    try:
        result = subprocess.run(cmd, capture_output=True, text=True, encoding="utf-8", errors="ignore")
        info = {}
        for line in result.stdout.strip().splitlines():
            if "=" in line:
                k, v = line.split("=", 1)
                info[k.strip()] = v.strip()

        # 优先用流时长
        if "duration" in info and info["duration"] != "N/A":
            duration = float(info["duration"])
            if duration > 1:
                return duration

        # 时长拿不到就用 总帧数 / 帧率 计算
        if "nb_frames" in info and "r_frame_rate" in info:
            if info["nb_frames"] != "N/A" and "/" in info["r_frame_rate"]:
                num, den = info["r_frame_rate"].split("/", 1)
                fps = float(num) / float(den)
                total_frames = int(info["nb_frames"])
                if fps > 0 and total_frames > 10:
                    return total_frames / fps
    except:
        pass

    # 全部失败返回 None
    return None

def extract_random_frames_ffmpeg(root_dir, output_dir, frames_per_video=6):
    os.makedirs(output_dir, exist_ok=True)
    
    total_video = 0
    total_frame = 0
    failed_videos = []

    for dirpath, _, filenames in os.walk(root_dir):
        for filename in filenames:
            if filename.lower().endswith(".mp4"):
                video_path = os.path.join(dirpath, filename)
                total_video += 1

                try:
                    duration = get_video_duration(video_path)
                    if duration is None or duration <= 1:
                        failed_videos.append(filename)
                        print(f"无法获取时长,跳过: {filename}")
                        continue

                    # 随机生成时间点(避开首尾5秒,减少黑屏花屏)
                    start = 5.0
                    end = max(duration - 1, start + 1)
                    time_points = sorted(random.uniform(start, end) for _ in range(frames_per_video))

                    success_count = 0
                    for t in time_points:
                        unique_name = f"{uuid.uuid4().hex}.jpg"
                        save_path = os.path.join(output_dir, unique_name)
                        
                        cmd = [
                            "ffmpeg", "-y",
                            "-ss", str(round(t, 3)),
                            "-i", video_path,
                            "-vframes", "1",
                            "-q:v", "2",
                            "-loglevel", "error",
                            save_path
                        ]
                        
                        ret = subprocess.run(cmd, capture_output=True)
                        if ret.returncode == 0 and os.path.exists(save_path):
                            success_count += 1
                            total_frame += 1

                    print(f"{filename} 成功提取 {success_count}/{frames_per_video} 帧")

                except Exception as e:
                    failed_videos.append(filename)
                    print(f"处理出错 {filename}: {str(e)}")

    print(f"\n🎉 全部处理完成!")
    print(f"共处理视频 {total_video} 个,成功提取图片 {total_frame} 张")
    if failed_videos:
        print(f"跳过异常视频 {len(failed_videos)} 个")
    print(f"图片保存目录: {os.path.abspath(output_dir)}")

# ====================== 配置区 ======================
if __name__ == "__main__":
    VIDEO_ROOT_FOLDER = r"D:\videos"
    OUTPUT_FOLDER = r"D:\video_frames_output"
    FRAMES_PER_VIDEO = 6
    # ==================================================

    extract_random_frames_ffmpeg(VIDEO_ROOT_FOLDER, OUTPUT_FOLDER, FRAMES_PER_VIDEO)