Qwen-Image-Edit-2511 生成图片教程和源码

一、模型下载和使用

去modelscope官网上下载官方的权重文件:https://modelscope.ai/models/Qwen/Qwen-Image-Edit-2511

预计需要60G+显存占用,如果OOM请选择量化版本

如果没有modelscope,使用pip装一下

复制代码
pip install modelscope

export MODELSCOPE_DOMAIN=www.modelscope.ai
modelscope download --model Qwen/Qwen-Image-Edit-2511 --local_dir ./dir

下载完成之后使用下面的源码运行

二、源码运行

项目结构

复制代码
├── scene_defect_batch.py
├── 场景类文件夹/
│   └── *.jpg

直接命名成 scene_defect_batch.py,然后

复制代码
python scene_defect_batch.py

单独运行一个场景的一张图片

复制代码
python scene_defect_batch.py --scene 场景类类别 --name 1040.jpg

指定某张显卡

复制代码
python scene_defect_batch.py --gpu_ids 0,1,2,3 --num_workers 2 --gpus_per_worker 2

源码:(这里以电厂场景图片生成为例)

python 复制代码
import argparse
import json
import os
import subprocess
import sys
from pathlib import Path

def _preconfigure_visible_gpus():
    args = sys.argv[1:]
    for i, arg in enumerate(args):
        if arg == "--gpu_ids" and i + 1 < len(args):
            gpu_ids = args[i + 1].strip()
            if gpu_ids:
                os.environ["CUDA_VISIBLE_DEVICES"] = gpu_ids
            break
        if arg.startswith("--gpu_ids="):
            gpu_ids = arg.split("=", 1)[1].strip()
            if gpu_ids:
                os.environ["CUDA_VISIBLE_DEVICES"] = gpu_ids
            break

_preconfigure_visible_gpus()

from PIL import Image

ROOT_DIR = Path(__file__).resolve().parent
DEFAULT_OUTPUT_ROOT = ROOT_DIR / "场景缺陷输出"
SUPPORTED_SUFFIXES = {".jpg", ".jpeg", ".png", ".bmp", ".webp"}

SCENE_CONFIGS = {
    "表计类": {
        "defect_name": "表计模糊",
        "instruction": (
            "请把图中的表计或表盘主体编辑成真实的表计模糊缺陷。"
            "重点制造更明显的表盘玻璃脏污、起雾、反光遮挡、焦点偏移、镜头失焦、数字刻度模糊、指针边缘虚化等效果,"
            "让表计读数区域出现较明显的难辨认、难读取、局部严重不清晰的异常状态。"
            "缺陷主体要集中在表计读数区域或表盘表面,可以让读数区域大部分内容都明显变糊,但不要把整个设备或整张图都变模糊。"
        ),
    },
    "地面油污": {
        "defect_name": "地面油污",
        "instruction": (
            "请把图中的地面主体区域编辑成真实的地面油污缺陷。"
            "重点生成深色、湿润、边缘不规则的油污、渗漏痕迹、积液反光、拖拽污染或片状污渍,"
            "使其看起来像设备附近地面发生了漏油或污染。"
            "缺陷应贴合地面透视和纹理,不要把油污做成立体漂浮物。"
        ),
    },
    "金属锈蚀": {
        "defect_name": "金属锈蚀",
        "instruction": (
            "请把图中的金属部件主体编辑成真实的金属锈蚀缺陷。"
            "重点生成锈斑、腐蚀、掉漆后露出的锈层、边缘氧化、局部粉化、褐红色锈迹扩散等效果,"
            "让缺陷看起来像长期受潮或老化导致的金属腐蚀。"
            "锈蚀应附着在金属表面,不要凭空新增不相关结构。"
        ),
    },
    "绝缘子": {
        "defect_name": "绝缘子破损",
        "instruction": (
            "请把图中的绝缘子主体编辑成真实的绝缘子破损缺陷。"
            "重点生成裂纹、破口、崩边、缺角、局部掉块、表面碎裂或破损后的结构不完整状态,"
            "让它看起来像绝缘子受到撞击、老化或损伤后的异常情况。"
            "破损要符合绝缘子的材质和结构,不要把整根绝缘子完全消失。"
        ),
    },
    "鸟巢": {
        "defect_name": "鸟巢",
        "instruction": (
            "请在画面中可能被鸟类筑巢的合理位置生成真实的鸟巢缺陷。"
            "优先选择瓦斯继电器、支架、横梁、平台边角、柜顶、设备上方凹槽、遮挡结构内侧等容易停留和筑巢的位置,"
            "自然增加由树枝、枯草、细枝、杂草等组成的鸟巢结构,必要时可伴随少量散落草梗。"
            "鸟巢尺寸、遮挡关系、受力方式和透视要合理,看起来像真实附着或卡在设备结构上的巢体,"
            "不要把鸟巢做成漂浮物,不要新增明显不合理的大量鸟类,也不要破坏原有设备主体结构。"
        ),
    },
    "箱门闭合异常": {
        "defect_name": "箱门开启",
        "instruction": (
            "请把图中原本处于闭合状态的柜体或箱门编辑成真实的箱门闭合异常缺陷,呈现箱门开启、未关严或门缝明显张开的效果。"
            "优先修改柜门、箱门、检修门、观察门等可开合部件,让门体出现向外打开、半开、偏斜闭合不严或锁扣未闭合的状态。"
            "门板转轴、边框、门缝阴影、内部暗部和遮挡关系要符合真实机械结构,"
            "尽量保持柜体本身、铭牌、周边设备和背景不变,不要凭空重构整个柜子。"
        ),
    },
    "挂空悬浮物": {
        "defect_name": "挂空悬浮物",
        "instruction": (
            "请在电厂设备相关的电线、导线、绝缘子串、金具连接处或邻近高处结构上生成真实的挂空悬浮物缺陷。"
            "重点表现塑料袋、条带、薄膜、布条、轻质杂物等被风挂住后悬挂在电线上或绝缘子附近的状态,"
            "让悬挂物具有自然的下垂、缠绕、拉扯、飘动趋势和重力方向,并与电线或设备产生合理接触。"
            "悬挂物应占比适中、材质轻薄、边缘自然,不要做成夸张的大块遮挡物,不要生成脱离接触点的真正漂浮状态。"
        ),
    },
    "呼吸器硅胶变色": {
        "defect_name": "呼吸器硅胶变色",
        "instruction": (
            "请把设备中部或主体上的呼吸器圆形观察窗口编辑成真实的硅胶变色异常。"
            "重点观察中间圆形窗口内的硅胶颜色,将原本红色、红橙色或偏暖色的硅胶区域自然改成发黑、黑褐色或明显变深的颜色状态,"
            "体现硅胶受潮老化后的异常变色效果。"
            "只修改圆形窗口内部可见硅胶填充物的颜色和局部质感,尽量保持窗口玻璃、外壳、边框、螺栓和周围设备完全不变,"
            "不要把整个呼吸器都涂黑,也不要改成其他无关颜色。"
        ),
    },
}

def parse_args():
    parser = argparse.ArgumentParser(description="Batch-generate scene-specific defect images for scene folders.")
    parser.add_argument("--model_id", default="./Qwen-Image-Edit-2511")
    parser.add_argument("--output_root", default=str(DEFAULT_OUTPUT_ROOT), help="Root directory for generated outputs.")
    parser.add_argument("--scene", default="", help="Optional single scene folder name, for example 表计类")
    parser.add_argument("--name", default="", help="Optional single image filename, for example 1040.jpg")
    parser.add_argument(
        "--prompt_suffix",
        default="",
        help="Optional extra prompt text appended to the scene-specific prompt.",
    )
    parser.add_argument(
        "--negative_prompt",
        default="不要保留蓝色描边、边框、标注线、文字、水印、箭头、额外符号,不要大幅改变非目标区域,不要改变视角和整体构图,不要新增不合理的夸张特效。",
    )
    parser.add_argument("--seed", type=int, default=0)
    parser.add_argument("--num_inference_steps", type=int, default=40)
    parser.add_argument("--true_cfg_scale", type=float, default=4.0)
    parser.add_argument("--guidance_scale", type=float, default=1.0)
    parser.add_argument("--num_images_per_prompt", type=int, default=1)
    parser.add_argument("--overwrite", action="store_true", help="Overwrite existing outputs.")
    parser.add_argument("--gpu_ids", default="", help="Comma-separated GPU ids, for example 0,1,2,3.")
    parser.add_argument("--num_workers", type=int, default=2, help="Number of worker processes in batch mode.")
    parser.add_argument("--gpus_per_worker", type=int, default=2, help="Number of GPUs assigned to each worker.")
    parser.add_argument("--worker_rank", type=int, default=-1, help=argparse.SUPPRESS)
    parser.add_argument("--worker_count", type=int, default=1, help=argparse.SUPPRESS)
    parser.add_argument("--manifest_name", default="", help=argparse.SUPPRESS)
    return parser.parse_args()

def configure_visible_gpus(gpu_ids: str):
    if gpu_ids.strip() and os.environ.get("CUDA_VISIBLE_DEVICES") != gpu_ids:
        os.environ["CUDA_VISIBLE_DEVICES"] = gpu_ids

def import_runtime_dependencies():
    import torch
    from diffusers import QwenImageEditPlusPipeline

    return torch, QwenImageEditPlusPipeline

def build_max_memory_map(torch_module, gpu_count: int):
    max_memory = {}
    for gpu_idx in range(gpu_count):
        total_mem_gb = torch_module.cuda.get_device_properties(gpu_idx).total_memory / (1024 ** 3)
        reserve_gb = 6 if gpu_idx == 0 else 3
        usable_gb = max(int(total_mem_gb - reserve_gb), 8)
        max_memory[gpu_idx] = f"{usable_gb}GiB"
    return max_memory

def load_pipeline(model_id: str):
    torch, qwen_pipeline_cls = import_runtime_dependencies()
    gpu_count = torch.cuda.device_count()
    if gpu_count <= 0:
        raise RuntimeError("CUDA is not available. This script requires at least one GPU.")

    common_kwargs = {
        "torch_dtype": torch.bfloat16,
    }

    if gpu_count == 1:
        pipe = qwen_pipeline_cls.from_pretrained(model_id, **common_kwargs)
        pipe.to("cuda:0")
        mode = "single-gpu"
        max_memory = None
    else:
        max_memory = build_max_memory_map(torch, gpu_count)
        pipe = qwen_pipeline_cls.from_pretrained(
            model_id,
            device_map="balanced",
            max_memory=max_memory,
            **common_kwargs,
        )
        mode = f"multi-gpu({gpu_count}, balanced)"

    pipe.set_progress_bar_config(disable=None)
    return pipe, mode, max_memory, torch

def iter_input_images(input_dir: Path):
    return sorted(path for path in input_dir.iterdir() if path.is_file() and path.suffix.lower() in SUPPORTED_SUFFIXES)

def build_scene_prompt(scene_name: str, image_name: str, prompt_suffix: str):
    scene_config = SCENE_CONFIGS[scene_name]

    prompt = (
        f"请把这张设备图像编辑成一张真实、自然的缺陷图,当前场景为{scene_name},目标缺陷类型为{scene_config['defect_name']}。"
        f"{scene_config['instruction']}"
        "请根据当前场景和画面主体,自然地把缺陷放在最合理的目标位置上,优先采用局部编辑方式,不要把任务做成整图重绘。"
        "缺陷要与原图透视、尺度、材质、光照、阴影、遮挡关系保持一致,生成效果应像真实巡检照片中的局部异常。"
        "除缺陷相关区域外,背景、设备结构、视角、光照、色调、纹理和其他非目标内容尽量保持不变,不要做整图风格化修改。"
        "输出结果应真实克制,避免夸张特效、避免卡通感、避免新增文字、箭头、水印、边框或无关目标。"
    )

    if prompt_suffix.strip():
        prompt = f"{prompt}{prompt_suffix.strip()}"

    return prompt

def run_edit(torch_module, pipe, image: Image.Image, prompt: str, negative_prompt: str, seed: int, args):
    generator = torch_module.Generator(device="cuda").manual_seed(seed)
    inputs = {
        "image": [image],
        "prompt": prompt,
        "generator": generator,
        "true_cfg_scale": args.true_cfg_scale,
        "negative_prompt": negative_prompt,
        "num_inference_steps": args.num_inference_steps,
        "guidance_scale": args.guidance_scale,
        "num_images_per_prompt": args.num_images_per_prompt,
    }

    with torch_module.inference_mode():
        output = pipe(**inputs)
    return output.images[0]

def save_manifest(manifest_path: Path, rows):
    manifest_path.parent.mkdir(parents=True, exist_ok=True)
    with manifest_path.open("w", encoding="utf-8") as handle:
        for row in rows:
            handle.write(json.dumps(row, ensure_ascii=False) + "\n")

def resolve_gpu_groups(args):
    if args.gpu_ids.strip():
        flat_gpu_ids = [item.strip() for item in args.gpu_ids.split(",") if item.strip()]
    else:
        env_value = os.environ.get("CUDA_VISIBLE_DEVICES", "").strip()
        if env_value:
            flat_gpu_ids = [item.strip() for item in env_value.split(",") if item.strip()]
        else:
            flat_gpu_ids = [str(i) for i in range(max(args.num_workers * args.gpus_per_worker, 1))]

    required_gpu_count = args.num_workers * args.gpus_per_worker
    if len(flat_gpu_ids) < required_gpu_count:
        raise RuntimeError(
            f"Need at least {required_gpu_count} GPUs for {args.num_workers} workers x {args.gpus_per_worker} GPUs, "
            f"but only resolved {len(flat_gpu_ids)} GPU ids: {flat_gpu_ids}"
        )

    gpu_groups = []
    for worker_rank in range(args.num_workers):
        start = worker_rank * args.gpus_per_worker
        end = start + args.gpus_per_worker
        gpu_groups.append(",".join(flat_gpu_ids[start:end]))

    return gpu_groups

def resolve_scene_names(args):
    if args.scene:
        if args.scene not in SCENE_CONFIGS:
            raise ValueError(f"Unknown scene: {args.scene}. Available scenes: {list(SCENE_CONFIGS)}")
        return [args.scene]
    return list(SCENE_CONFIGS.keys())

def build_image_items(args):
    output_root = Path(args.output_root).resolve()
    image_items = []
    global_index = 0

    for scene_name in resolve_scene_names(args):
        scene_dir = ROOT_DIR / scene_name
        if not scene_dir.exists():
            raise FileNotFoundError(f"Scene directory not found: {scene_dir}")

        if args.name:
            scene_images = [scene_dir / args.name]
        else:
            scene_images = iter_input_images(scene_dir)

        for image_path in scene_images:
            image_items.append((global_index, scene_name, image_path, output_root / scene_name / image_path.name))
            global_index += 1

    if args.worker_rank >= 0 and args.worker_count > 1:
        image_items = [item for item in image_items if item[0] % args.worker_count == args.worker_rank]

    return image_items

def build_subprocess_command(args, worker_rank: int, worker_count: int, gpu_group: str):
    cmd = [
        sys.executable,
        str(Path(__file__).resolve()),
        "--model_id",
        args.model_id,
        "--output_root",
        args.output_root,
        "--prompt_suffix",
        args.prompt_suffix,
        "--negative_prompt",
        args.negative_prompt,
        "--seed",
        str(args.seed),
        "--num_inference_steps",
        str(args.num_inference_steps),
        "--true_cfg_scale",
        str(args.true_cfg_scale),
        "--guidance_scale",
        str(args.guidance_scale),
        "--num_images_per_prompt",
        str(args.num_images_per_prompt),
        "--num_workers",
        "1",
        "--gpu_ids",
        gpu_group,
        "--worker_rank",
        str(worker_rank),
        "--worker_count",
        str(worker_count),
        "--manifest_name",
        f"prompt_manifest.worker{worker_rank}.jsonl",
    ]

    if args.overwrite:
        cmd.append("--overwrite")
    if args.scene:
        cmd.extend(["--scene", args.scene])
    if args.name:
        cmd.extend(["--name", args.name])

    return cmd

def merge_worker_manifests(output_root: Path, worker_count: int):
    merged_rows = []
    for worker_rank in range(worker_count):
        worker_manifest = output_root / f"prompt_manifest.worker{worker_rank}.jsonl"
        if not worker_manifest.exists():
            continue
        merged_rows.extend(worker_manifest.read_text(encoding="utf-8").splitlines())

    merged_path = output_root / "prompt_manifest.jsonl"
    merged_path.write_text("\n".join(line for line in merged_rows if line) + ("\n" if merged_rows else ""), encoding="utf-8")

def dispatch_workers(args):
    gpu_groups = resolve_gpu_groups(args)
    output_root = Path(args.output_root).resolve()
    processes = []

    for worker_rank in range(args.num_workers):
        gpu_group = gpu_groups[worker_rank]
        cmd = build_subprocess_command(args, worker_rank, args.num_workers, gpu_group)
        print(f"[dispatch] worker={worker_rank} gpus={gpu_group}")
        processes.append((worker_rank, subprocess.Popen(cmd, cwd=str(ROOT_DIR))))

    failed_workers = []
    for worker_rank, process in processes:
        return_code = process.wait()
        if return_code != 0:
            failed_workers.append((worker_rank, return_code))

    if failed_workers:
        raise RuntimeError(f"Worker failures: {failed_workers}")

    merge_worker_manifests(output_root, args.num_workers)

def run_single_process(args):
    configure_visible_gpus(args.gpu_ids)

    torch, _ = import_runtime_dependencies()
    if not torch.cuda.is_available():
        raise RuntimeError("CUDA is not available.")

    pipe, mode, max_memory, torch = load_pipeline(args.model_id)
    print(f"Running mode: {mode}")
    print(f"CUDA_VISIBLE_DEVICES={os.environ.get('CUDA_VISIBLE_DEVICES', '(default)')}")
    print(f"Visible GPU count: {torch.cuda.device_count()}")
    if max_memory is not None:
        print(f"Max memory map: {max_memory}")
    if hasattr(pipe, "hf_device_map"):
        print(f"Device map: {pipe.hf_device_map}")

    image_items = build_image_items(args)
    manifest_rows = []
    total = len(image_items)
    saved = 0
    skipped = 0

    for local_index, (global_index, scene_name, input_path, output_path) in enumerate(image_items):
        if not input_path.exists():
            print(f"[missing] {input_path}")
            skipped += 1
            continue

        if output_path.exists() and not args.overwrite:
            print(f"[skip-exists] {scene_name}/{output_path.name}")
            skipped += 1
            continue

        prompt = build_scene_prompt(scene_name, input_path.name, args.prompt_suffix)
        seed = args.seed + global_index

        print(f"[processing] {scene_name}/{input_path.name} ({local_index + 1}/{total})")
        print(f"[prompt] {prompt}")

        image = Image.open(input_path).convert("RGB")
        result = run_edit(torch, pipe, image, prompt, args.negative_prompt, seed, args)

        output_path.parent.mkdir(parents=True, exist_ok=True)
        result.save(output_path)
        saved += 1

        manifest_rows.append(
            {
                "scene_name": scene_name,
                "image_name": input_path.name,
                "input_path": str(input_path),
                "output_path": str(output_path),
                "seed": seed,
                "prompt": prompt,
                "negative_prompt": args.negative_prompt,
                "worker_rank": args.worker_rank,
            }
        )

        print(f"[saved] {output_path}")

    manifest_name = args.manifest_name or "prompt_manifest.jsonl"
    save_manifest(Path(args.output_root).resolve() / manifest_name, manifest_rows)

    print("")
    print(f"Processed: {saved}")
    print(f"Skipped: {skipped}")

def main():
    args = parse_args()
    is_parent_dispatcher = args.worker_rank < 0 and args.num_workers > 1

    if is_parent_dispatcher:
        dispatch_workers(args)
        return

    run_single_process(args)

if __name__ == "__main__":
    main()

三、显存占用