python
复制代码
# 多gpu调度
# python multi_swap_10s_v2.py
import os
import subprocess
from tqdm import tqdm
import hashlib
from multiprocessing import Process, Lock, Manager, Queue
from queue import Empty # 用于检查队列是否为空
# Locks for each GPU to ensure only one task runs at a time per GPU
gpu_locks = [Lock(), Lock()]
# A shared queue for all tasks using Manager's Queue
task_queue = Manager().Queue()
def worker(gpu_id, lock):
os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu_id) # Set the CUDA_VISIBLE_DEVICES for this process
while True:
# Try to acquire the lock and get a task atomically
with lock:
try:
cmd = task_queue.get_nowait()
except Empty:
# No more tasks available, exit the worker
break
# Update the progress bar outside the lock to avoid contention
tqdm.write(f"GPU {gpu_id} starting task: {' '.join(cmd)}")
# Run the subprocess
subprocess.run(cmd)
# Worker finishes when it exits the loop
def calculate_device_id(vid_file, img_file):
# Calculate a hash of the file paths to determine the device ID
hash_object = hashlib.md5(f"{vid_file}{img_file}".encode())
hex_dig = hash_object.hexdigest()
return int(hex_dig, 16) % len(gpu_locks)
def main():
source_videos_dir = "/home/nvidia/data/video/HDTF/10s"
source_images_dir = "/home/nvidia/data/image/CelebA-HQ/300/0"
output_dir = source_images_dir
video_files_list = [
os.path.join(source_videos_dir, f)
for f in os.listdir(source_videos_dir)
if os.path.isfile(os.path.join(source_videos_dir, f)) and f.endswith('.mp4') and not any(char.isalpha() for char in f.split('.')[0])
]
image_files_list = [
os.path.join(source_images_dir, f)
for f in os.listdir(source_images_dir)
if os.path.isfile(os.path.join(source_images_dir, f)) and f.endswith('.jpg')
]
model_id = 'c'
# Fill the task queue
for vid_file in video_files_list:
for img_file in image_files_list:
output_video = f"{os.path.splitext(os.path.basename(vid_file))[0]}_{os.path.splitext(os.path.basename(img_file))[0]}_{model_id}.mp4"
output_video_path = os.path.join(output_dir, output_video)
# Check if the output file already exists
if not os.path.exists(output_video_path):
device_id = calculate_device_id(vid_file, img_file)
cmd = [
"python", "multi_face_single_source.py",
"--retina_path", "retinaface/RetinaFace-Res50.h5",
"--arcface_path", "arcface_model/ArcFace-Res50.h5",
"--facedancer_path", "model_zoo/FaceDancer_config_c_HQ.h5",
"--vid_path", vid_file,
"--swap_source", img_file,
"--output", output_video_path,
"--compare", "False",
"--sample_rate", "1",
"--length", "1",
"--align_source", "True",
"--device_id", str(device_id)
]
task_queue.put(cmd)
# Create worker processes for each GPU
workers = []
for gpu_id in range(len(gpu_locks)): # Assuming you have 2 GPUs
p = Process(target=worker, args=(gpu_id, gpu_locks[gpu_id]))
p.start()
workers.append(p)
# Wait for all tasks to be processed
task_queue.join()
# Signal workers to exit by adding None to the queue
# Ensure enough exit signals for all workers
for _ in workers:
task_queue.put(None)
# Wait for all workers to finish
for p in workers:
p.join()
if __name__ == '__main__':
main()
"""
在这个版本中,我引入了一个calculate_device_id函数,它基于视频文件和图像文件的路径计算出一个哈希值,然后取模得到设备ID。
这样可以确保任务更均匀地分配到不同的GPU上,而不仅仅依赖于列表的索引。
同时,我添加了设置CUDA_VISIBLE_DEVICES的代码到worker函数中,虽然这不是严格必需的,但它强调了每个工作进程将只看到并使用分配给它的GPU。这有助于避免潜在的GPU资源冲突问题。
"""