血的教训:采用md5哈希算法+时间戳重命名,避免覆盖
合并后就能安安心心排序了
python
import os
import hashlib
import shutil
# 指定的源文件夹列表
source_dirs = ["video/vox2/8-12/10s", "video/vox2/12-20/10s", "video/vox2/20-30/10s", "video/vox2/30-60/10s", "video/vox2/60-inf"] # 添加更多文件夹
# 目标文件夹
target_dir = "video/vox2/10s"
# 创建目标文件夹,如果它不存在
if not os.path.exists(target_dir):
os.makedirs(target_dir)
# 遍历每个源文件夹
for source_dir in source_dirs:
if not os.path.exists(source_dir):
print(f"Warning: Directory does not exist: {source_dir}")
continue
print(f"Processing directory: {source_dir}")
# 使用 os.walk() 递归遍历文件夹
for root, dirs, files in os.walk(source_dir):
for filename in files:
print(f"Checking file: {os.path.join(root, filename)}")
# 检查文件是否为.jpg或.png格式
if filename.lower().endswith('.mp4'):
# 分离文件名和扩展名
base_name, ext = os.path.splitext(filename)
# 生成文件内容的哈希值
with open(os.path.join(root, filename), 'rb') as f:
file_hash = hashlib.md5(f.read()).hexdigest()
# 获取文件的修改时间戳
modification_time = os.path.getmtime(os.path.join(root, filename))
# 构建目标文件路径
dst_file_path = os.path.join(target_dir, f"{file_hash}_{modification_time}{ext}")
# 如果文件已存在,递增后缀直到找到唯一文件名
while os.path.exists(dst_file_path):
modification_time += 0.001 # 微小增加时间戳
dst_file_path = os.path.join(target_dir, f"{file_hash}_{modification_time}{ext}")
# 构建源文件的完整路径
src_file_path = os.path.join(root, filename)
# 复制文件到目标文件夹
shutil.copy2(src_file_path, dst_file_path)
# 输出信息
print(f'Copied "{filename}" from "{root}" to "{dst_file_path}"')
else:
print(f"Ignored file: {os.path.join(root, filename)}")
print("All images have been merged successfully.")