[linux-sd-webui]之txt2img

从webui中抽离出txt2img的接口：

1.参数

复制代码

id_task="task",
prompt="a highly detailed tower designed by Zaha hadid with few metal and lots of glass,roads around with much traffic,in a city with a lot of greenery,aerial view, stunning sunny lighting, foggy atmosphere, vivid colors, Photo-grade rendering, Realistic style,8k,high res,highly detialed, ray tracing,vray render,masterpiece, best quality,rendered by Mir. and Brick visual",
negative_prompt="", # v2模型才支持
prompt_styles=[],
steps=20, # sampling steps,去噪过长的采样步骤数，25
sampler_index=0, # sampling method,采样方法
restore_faces=False, # 应用了额外模型，该模型可以恢复面部缺陷
tiling=False, # 生成可以平铺的周期性图像
n_iter=1, # batch count,推理的次数
batch_size=1, # 一次推理生成的图片，batch count*batch_size是总生成图片数目
cfg_scale=7, # classifier free guidance scale,控制模型和prompt的匹配程度，1.忽视prompt，
3.更有创意，7，在prompt和freedom中取得平衡，15，遵守prompt，30，严格按照prompt
seed=-1, # 用于在潜空间中生成最张量的种子，控制图像的内容，每生成一个图像都有自己的种子值
subseed=-1, # 附加种子值，在extra中
subseed_strength=0, # 种子和variation seed之间的差值程度，如果生成了两张图，可以通过第一个
图为seed，第二个图为subseed，然后设置subseed strength在0-1之间的强度变化，那么会生成逐渐
过渡的两个图。
seed_resize_from_h=0, # 即便使用相同的seed，如果更改图片大小，图像也会发生变化，
因此在这里调整图片，图片内容变化不会太大
seed_resize_from_w=0,
seed_enable_extras=False, # True,subseed到seed_resize_from_w之间的都要填值
height=512, # v1模型一边至少是512
width=512,
enable_hr=False, # 使用high-resolution fix放大图片，sdv1.4是512，v2是768图片太小，
如果将宽度和高度设置的更高，比如1024,偏离原始分辨率会影响构图，例如生成带有两个头像的图片
denoising_strength=0.7, # 仅用于latent upscalers,该参数与image-to-image含义相同，
它控制在之星hires采样步骤之前添加到潜空间中的噪声，必须大于0.5，否则会尝产生模糊的图像，
使用latent的好处是没有像esrgan这种一样可能引入放大伪像，sd的解码器生成图像，确保风格一致，
缺点是在一定程度改变图像，这取决于去噪强度的值
hr_scale=0,  # 放大的倍数
hr_upscaler="Latent",  # 在潜空间中缩放图像，它是在文本到图像生成的采样步骤之后完成的，
类似于图像到图像
hr_second_pass_steps=0, # 采样步数
hr_resize_x=0, # 制定尺寸
hr_resize_y=0,
override_settings_texts=[],

photo of woman, dress, city night background

photo of woman, dress, city night background, bracelet

seed 1

seed 3

1.代码流程

复制代码

modules.sd_models.setup_model()->
list_models()->cmd_ckpt=shared.cmd_opts.ckpt加载模型->
modules.txt2img.txt2img->
modules.scripts.ScriptRunner.run()-> 实际上在挑scripts，txt2img有三个scripts，一些工具
modules/txt2img.py->process_image(p) 58
modules/processing.py->process_images_inner(p) 503
modules/processing.py->sampler.sample() 871
modules/sd_samplers_kdiffusion.py->sample->launch_sampling() 359
repositories/k-diffusion/k_diffusion/sampling.py->model() 146
modules/sd_samplers_kdiffusion.py->CFGDenoiser->forward()->inner_model 126
repositories/k-diffusion/k_diffusion/external.py->DiscreteEpsDDPMDenoiser->forward() 112
repositories/k-diffudion/k_diffusion/external.py->
moudles/sd_hijack_utils.py->
repositories/stable-diffusion-stability-ai/ldm/models/diffusion/ddpm.py->self.model() 858
ddpm.py DiffusionWrapper diffusion_model 1330
- modules/diffusionmodules/openaimodel.py->
repositories/k-diffudion/k_diffusion/external.py->forward()
modules/sd_samplers_kdiffusion.py->CFGDenoiser->forward()->denoised:[4,4,64,64]
modules/sampling/sample_euler_ancestral->self.model()
modules/processing/StableDiffusionProcessingText2Img().sample() return samples 874
modules/processing->decode_first_stage()->
repositories/stable-diffusion-stability-ai/ldm/models/diffusion/ddpm.py decode_first_stage
- ldm/models/autoencoder.py AutoencoderKL.decode->self.post_quant_conv()->
extensions-builtin/lora/lora.py->lora_conv2d_forward()->
repositories/stable-diffusion-stability-ai/ldm/modules/diffusionmodules/model.py Decoder 621->
x_sample_ddim

Euler a
Euler
LMS
Heun
DPM2
DPM2 a
DPM++ 2S a
DPM++ 2M
DPM++ SDE
DPM fast
DPM adaptive
LMS Karras
DPM2 Karras
DPM2 a Karras
DPM++ 2S a Karras
DPM++ 2M Karras
DPM++ SDE Karras
DDIM
PLMS

main.py

复制代码

import sys
import cv2
import torch
import logging

logging.getLogger("xformers").addFilter(lambda record: 'A matching Triton is not available' not in record.getMessage())
import numpy as np
from modules import paths, timer, import_hook, errors

startup_timer = timer.Timer()

from modules import shared, devices, sd_samplers, upscaler, extensions, localization, ui_tempdir, ui_extra_networks
import modules.scripts
import modules.sd_models
import modules.txt2img

from modules.shared import cmd_opts
from torchvision.utils import save_image

def initialize():

    extensions.list_extensions()
    localization.list_localizations(cmd_opts.localizations_dir)
    startup_timer.record("list extensions")

    modules.sd_models.setup_model() # 加载模型
    startup_timer.record("list SD models")

    modules.scripts.load_scripts()
    startup_timer.record("load scripts")

    try:
        modules.sd_models.load_model()
    except Exception as e:
        errors.display(e, "loading stable diffusion model")
        print("", file=sys.stderr)
        print("Stable diffusion model failed to load, exiting", file=sys.stderr)
        exit(1)
    startup_timer.record("load SD checkpoint")


def webui():
    initialize()

    image, _, _, _ = modules.txt2img.txt2img(
        id_task="task",
        prompt="a highly detailed tower designed by Zaha hadid with few metal and lots of glass,roads around with much traffic,in a city with a lot of greenery,aerial view, stunning sunny lighting, foggy atmosphere, vivid colors, Photo-grade rendering, Realistic style,8k,high res,highly detialed, ray tracing,vray render,masterpiece, best quality,rendered by Mir. and Brick visual",
        negative_prompt="",
        prompt_styles=[],
        steps=20,
        sampler_index=0,
        restore_faces=False,
        tiling=False,
        n_iter=1,
        batch_size=1,
        cfg_scale=7,
        seed=-1,
        subseed=-1,
        subseed_strength=0,
        seed_resize_from_h=0,
        seed_resize_from_w=0,
        seed_enable_extras=False,
        height=512,
        width=512,
        enable_hr=False,
        denoising_strength=0.7,
        hr_scale=0,
        hr_upscaler="Latent",
        hr_second_pass_steps=0,
        hr_resize_x=0,
        hr_resize_y=0,
        override_settings_texts=[],
    )

    for i in range(len(image)):
        image[i].save(f"{i}.png")

if __name__ == "__main__":
    webui()

modules/txt2img.py

复制代码

import modules.scripts
from modules import sd_samplers
from modules.generation_parameters_copypaste import create_override_settings_dict
from modules.processing import StableDiffusionProcessing, Processed, StableDiffusionProcessingTxt2Img, \
    StableDiffusionProcessingImg2Img, process_images
from modules.shared import opts, cmd_opts
import modules.shared as shared
import modules.processing as processing
from modules.ui import plaintext_to_html


def txt2img(id_task: str, prompt: str, negative_prompt: str, prompt_styles, steps: int, sampler_index: int,
            restore_faces: bool, tiling: bool, n_iter: int, batch_size: int, cfg_scale: float, seed: int, subseed: int,
            subseed_strength: float, seed_resize_from_h: int, seed_resize_from_w: int, seed_enable_extras: bool,
            height: int, width: int, enable_hr: bool, denoising_strength: float, hr_scale: float, hr_upscaler: str,
            hr_second_pass_steps: int, hr_resize_x: int, hr_resize_y: int, override_settings_texts, *args):
    args = (
    0, False, False, 'positive', 'comma', 0, False, False, '', 1, '', 0, '', 0, '', True, False, False, False, 0)
    override_settings = create_override_settings_dict(override_settings_texts)

    p = StableDiffusionProcessingTxt2Img(
        sd_model=shared.sd_model,
        outpath_samples=opts.outdir_samples or opts.outdir_txt2img_samples,
        outpath_grids=opts.outdir_grids or opts.outdir_txt2img_grids,
        prompt=prompt,
        styles=prompt_styles,
        negative_prompt=negative_prompt,
        seed=seed,
        subseed=subseed,
        subseed_strength=subseed_strength,
        seed_resize_from_h=seed_resize_from_h,
        seed_resize_from_w=seed_resize_from_w,
        seed_enable_extras=seed_enable_extras,
        sampler_name=sd_samplers.samplers[sampler_index].name,
        batch_size=batch_size,
        n_iter=n_iter,
        steps=steps,
        cfg_scale=cfg_scale,
        width=width,
        height=height,
        restore_faces=restore_faces,
        tiling=tiling,
        enable_hr=enable_hr,
        denoising_strength=denoising_strength if enable_hr else None,
        hr_scale=hr_scale,
        hr_upscaler=hr_upscaler,
        hr_second_pass_steps=hr_second_pass_steps,
        hr_resize_x=hr_resize_x,
        hr_resize_y=hr_resize_y,
        override_settings=override_settings,
    )

    p.scripts = modules.scripts.scripts_txt2img
    p.script_args = args

    if cmd_opts.enable_console_prompts:
        print(f"\ntxt2img: {prompt}", file=shared.progress_print_out)

    processed = modules.scripts.scripts_txt2img.run(p, *args)
    #     processed = None

    if processed is None:
        processed = process_images(p)

    p.close()

    shared.total_tqdm.clear()

    generation_info_js = processed.js()
    if opts.samples_log_stdout:
        print(generation_info_js)

    if opts.do_not_show_images:
        processed.images = []

    return processed.images, generation_info_js, plaintext_to_html(processed.info), plaintext_to_html(
        processed.comments)