GarmageSet下载和处理

复制代码

# 1. 获取 Access Token
# 访问: https://huggingface.co/settings/tokens
# 创建新 Token: 
#   - Name: garmagenet
#   - Role: Read (读取权限即可)
#   - 复制令牌（如: hf_xxxxxxxxxxxxxxxx）

# 2. 在终端登录
conda activate garmagenet
huggingface-cli login

# 粘贴 Token（输入时不会显示字符），按回车
# 看到 "Login successful" 即成功

python 复制代码

# /home/user/hk/garmagenet-impl-main/download_garmageset.py
from huggingface_hub import snapshot_download
import os

# 设置下载路径
local_dir = "/home/user/hk/garmagenet-impl-main/GarmageSet"
os.makedirs(local_dir, exist_ok=True)

print(f"🔹 开始下载数据集到: {local_dir}")

# 下载整个数据集
snapshot_download(
    repo_id="Style3D/GarmageSet",
    repo_type="dataset",
    local_dir=local_dir,

    max_workers=8,                 # 并行下载线程数
)

print("✅ 数据集下载完成！")

python 复制代码

# /home/user/hk/garmagenet-impl-main/extract_all.py
import os
import tarfile
import argparse
from pathlib import Path
from tqdm import tqdm

def extract_tar_gz(file_path, extract_to=None, remove_after=False):
    """解压单个 .tar.gz 文件"""
    try:
        with tarfile.open(file_path, 'r:gz') as tar:
            # 确定解压目标目录
            if extract_to is None:
                extract_to = file_path.parent
            
            # 创建目标目录
            os.makedirs(extract_to, exist_ok=True)
            
            # 解压文件
            tar.extractall(path=extract_to)
            
            # 可选：解压后删除原压缩包
            if remove_after:
                os.remove(file_path)
                print(f"🗑️  已删除: {file_path.name}")
                
        return True
    except tarfile.TarError as e:
        print(f"❌ TarError 解压失败 {file_path.name}: {e}")
        return False
    except Exception as e:
        print(f"❌ 未知错误 解压失败 {file_path.name}: {e}")
        return False

def find_and_extract(root_dir, remove_after=False, dry_run=False):
    """递归查找并解压所有 .tar.gz 文件"""
    root = Path(root_dir)
    tar_files = list(root.rglob("*.tar.gz"))
    
    if not tar_files:
        print(f"🔍 在 {root_dir} 中未找到 .tar.gz 文件")
        return
    
    print(f"📦 找到 {len(tar_files)} 个压缩包，开始处理...\n")
    
    success_count = 0
    fail_count = 0
    
    # 使用 tqdm 显示进度
    for tar_path in tqdm(tar_files, desc="解压进度"):
        if dry_run:
            print(f"🔸 [DRY RUN] 将会解压: {tar_path}")
            success_count += 1
            continue
            
        # 检查是否已存在解压后的目录（避免重复解压）
        extract_dir = tar_path.parent / tar_path.stem  # 移除 .tar.gz
        if extract_dir.exists() and any(extract_dir.iterdir()):
            print(f"⏭️  跳过（已存在）: {tar_path.name}")
            success_count += 1
            continue
            
        if extract_tar_gz(tar_path, remove_after=remove_after):
            success_count += 1
        else:
            fail_count += 1
    
    # 输出统计
    print(f"\n{'='*60}")
    print(f"✅ 解压完成！")
    print(f"   成功: {success_count} | 失败: {fail_count} | 总计: {len(tar_files)}")
    if fail_count > 0:
        print(f"⚠️  请检查上方错误信息")
    print(f"{'='*60}")

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="批量解压 GarmageSet 中的 .tar.gz 文件")
    parser.add_argument(
        "--root", 
        type=str, 
        default="/home/user/hk/garmagenet-impl-main/GarmageSet",
        help="要搜索的根目录（默认: GarmageSet）"
    )
    parser.add_argument(
        "--remove", 
        action="store_true",
        help="解压成功后删除原 .tar.gz 文件（节省空间）"
    )
    parser.add_argument(
        "--dry-run", 
        action="store_true",
        help="仅列出将要解压的文件，不实际执行"
    )
    
    args = parser.parse_args()
    
    print(f"🔹 根目录: {args.root}")
    print(f"🔹 删除原文件: {'是' if args.remove else '否'}")
    print(f"🔹 模拟运行: {'是' if args.dry_run else '否'}\n")
    
    find_and_extract(args.root, remove_after=args.remove, dry_run=args.dry_run)

python 复制代码

conda activate garmagenet
cd /home/user/hk/garmagenet-impl-main

# 🔍 第一步：干跑模式（预览将要解压的文件）
python extract_all.py --dry-run

# 🚀 第二步：实际解压（保留原压缩包）
python extract_all.py

# 🗑️  第三步：解压并删除原文件（节省磁盘空间）
python extract_all.py --remove

python 复制代码

conda install -c nvidia cuda-nvcc=11.8 cuda-cudart-dev=11.8 -y
export TORCH_CUDA_FLAGS="-allow-unsupported-compiler"
export CUDA_NVCC_FLAGS="-allow-unsupported-compiler"
rm -rf ~/.cache/torch_extensions/nvdiffrast_plugin
export CUDA_HOME=/usr/local/cuda
export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
export PATH=/usr/local/cuda/bin:$PATH
rm -rf ~/.cache/torch_extensions/
python data_process/process_garmage.py     -i GarmageSet/raw     -o GarmageSet/garmages_processed     --num_views 8python data_process/process_garmage.py     -i GarmageSet/raw     -o GarmageSet/garmages_processed     --num_views 8