VideoMAE复现之SSV2数据集解压(Linux)、webm转mp4格式

合并语句:

bash 复制代码
cat 20bn-something-something-v2-* > video.tar

解压语句:

bash 复制代码
tar -xvf video.tar

参考链接:VideoMAE模型复现(Linux)

解压后的数据集是webm格式,需要将其转换未240px的mp4格式,代码如下:

python 复制代码
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import glob
import os
import os.path as osp
import sys
from multiprocessing import Pool


def resize_videos(args, vid_item):
    """Generate resized video cache.

    Args:
        vid_item (list): Video item containing video full path,
            video relative path.

    Returns:
        bool: Whether generate video cache successfully.
    """

    # args = parse_args()
    args.remove_dup = False
    # args.to_mp4 = True
    # args.scale = 320
    full_path, vid_path = vid_item
    # Change the output video extension to .mp4 if '--to-mp4' flag is set
    if args.to_mp4:
        vid_path = vid_path.split('.')
        assert len(vid_path) == 2, \
            f"Video path '{vid_path}' contain more than one dot"
        vid_path = vid_path[0] + '.mp4'
    out_full_path = osp.join(args.out_dir, vid_path)
    dir_name = osp.dirname(vid_path)
    out_dir = osp.join(args.out_dir, dir_name)
    if not osp.exists(out_dir):
        os.makedirs(out_dir)
    result = os.popen(
        f'ffprobe -hide_banner -loglevel error -select_streams v:0 -show_entries stream=width,height -of csv=p=0 {full_path}'  # noqa:E501
    )

    # res = result.readline().rstrip().split(',')
    # w, h = [int(d) for d in res]
    w, h = [int(d) for d in result.readline().rstrip().split(',')]
    if w > h:
        cmd = (f'ffmpeg -hide_banner -loglevel error -i {full_path} '
               f'-vf {"mpdecimate," if args.remove_dup else ""}'
               f'scale=-2:{args.scale} '
               f'{"-vsync vfr" if args.remove_dup else ""} '
               f'-c:v libx264 {"-g 16" if args.dense else ""} '
               f'-an {out_full_path} -y')
    else:
        cmd = (f'ffmpeg -hide_banner -loglevel error -i {full_path} '
               f'-vf {"mpdecimate," if args.remove_dup else ""}'
               f'scale={args.scale}:-2 '
               f'{"-vsync vfr" if args.remove_dup else ""} '
               f'-c:v libx264 {"-g 16" if args.dense else ""} '
               f'-an {out_full_path} -y')
    os.popen(cmd)
    print(f'{vid_path} done')
    sys.stdout.flush()
    return True


def parse_args():
    parser = argparse.ArgumentParser(
        description='Generate the resized cache of original videos')
    parser.add_argument('src_dir', type=str, help='source video directory')
    parser.add_argument('out_dir', type=str, help='output video directory')
    parser.add_argument(
        '--dense',
        action='store_true',
        help='whether to generate a faster cache')
    parser.add_argument(
        '--level',
        type=int,
        choices=[1, 2],
        default=2,
        help='directory level of data')
    parser.add_argument(
        '--remove-dup',
        action='store_true',
        help='whether to remove duplicated frames')
    parser.add_argument(
        '--ext',
        type=str,
        default='mp4',
        choices=['avi', 'mp4', 'webm', 'mkv'],
        help='video file extensions')
    parser.add_argument(
        '--to_mp4',
        action='store_true',
        help='whether to output videos in mp4 format')
    parser.add_argument(
        '--scale',
        type=int,
        default=240,
        help='resize image short side length keeping ratio')
    parser.add_argument(
        '--num_worker', type=int, default=8, help='number of workers')
    args = parser.parse_args()

    return args


if __name__ == '__main__':

    args = parse_args()
    
    if not osp.isdir(args.out_dir):
        print(f'Creating folder: {args.out_dir}')
        os.makedirs(args.out_dir)

    print('Reading videos from folder: ', args.src_dir)
    print('Extension of videos: ', args.ext)
    fullpath_list = glob.glob(args.src_dir + '/*' * args.level + '.' +
                              args.ext)
    done_fullpath_list = glob.glob(args.out_dir + '/*' * args.level + args.ext)
    print('Total number of videos found: ', len(fullpath_list))
    print('Total number of videos transfer finished: ',
          len(done_fullpath_list))
    if args.level == 2:
        vid_list = list(
            map(
                lambda p: osp.join(
                    osp.basename(osp.dirname(p)), osp.basename(p)),
                fullpath_list))
    elif args.level == 1:
        vid_list = list(map(osp.basename, fullpath_list))

    for i in range(len(fullpath_list)):
        fullpath = fullpath_list[i]
        vid = vid_list[i]
        resize_videos(args, (fullpath, vid))

参数示例:

bash 复制代码
"args": [
	"../somethingV2/20bn-something-something-v2", 
	"../../dataset_image_code/xhl/VideoMAE-main/data/something-something/20bn-something-something-v2_240p", 
	"--level", "1", "--to_mp4", "--scale", "240", "--ext", "webm"
]
相关推荐
Yana.nice39 分钟前
openssl将证书从p7b转换为crt格式
java·linux
AI逐月44 分钟前
tmux 常用命令总结:从入门到稳定使用的一篇实战博客
linux·服务器·ssh·php
小白跃升坊1 小时前
基于1Panel的AI运维
linux·运维·人工智能·ai大模型·教学·ai agent
跃渊Yuey2 小时前
【Linux】线程同步与互斥
linux·笔记
舰长1152 小时前
linux 实现文件共享的实现方式比较
linux·服务器·网络
zmjjdank1ng2 小时前
Linux 输出重定向
linux·运维
路由侠内网穿透.2 小时前
本地部署智能家居集成解决方案 ESPHome 并实现外部访问( Linux 版本)
linux·运维·服务器·网络协议·智能家居
VekiSon2 小时前
Linux内核驱动——基础概念与开发环境搭建
linux·运维·服务器·c语言·arm开发
zl_dfq3 小时前
Linux 之 【进程信号】(signal、kill、raise、abort、alarm、Core Dump核心转储机制)
linux
Ankie Wan3 小时前
cgroup(Control Group)是 Linux 内核提供的一种机制,用来“控制、限制、隔离、统计”进程对系统资源的使用。
linux·容器·cgroup·lxc