由于Fooocus的优秀推理能力,后续考虑从webui切换到Fooocus上,因此对其中的代码要进行深入分析,Fooocus的sdxl版本在11g的显存上跑起来压力不大,但是webui的sdxl版本起码12g。尤其要对比其和webui的优化点,但是在代码层面,并不是和webui同一档次的框架,webui采用了支持hook式的插件系统,但是fooocus因为其midjourney的指向,所以并不是走三方插件的路子。
在autodl上:python launch.py --listen --port 6006
entry_with_update.py
python
# 启动
python entry_with_update.py --listen
launch.py ->
prepare_environment()->
ini_comfy_args()->
- args_manager.py -> args = comfy_cli.args ->backend/headless/comfy/cli_args.py
download_models()->
webui.py ->
- run_button.click().then(fn=generate_clicked)->
-- modules/async_worker.py->workers()->threading.Thread(target=worker).start()
-- handler()-> # 传入参数并配置
-- prompt_processing/vary/upscale/inpaint/controlnet/
-- imgs = pipeline.process_diffusin(...)->
modules/default_pipeline.py -> process_diffusion() 主pipeline,webui中是StableDiffusionProcessingTxt2Img和StableDiffusionProcessingImg2Img两个核心接口。
python
if latent is None:
empty_latent = core.generate_empty_latent(width,height,1)
else:
empty_latent = latent
sampled_latent = core.ksampler(final_unet,final_refiner,positive_cond,negative_cond,empty_latent,steps,denoise,callback,cfg_scale,sampler_name,scheduler_name,switch)
decoded_latent = core.decode_vae(vae,sampled_latent,...)
images = core.pytorch_to_numpy(decoded_latent)
默认方法:refresh_everything()
python
refresh_everything()->
refresh_refiner_model(refiner_model_name)
refresh_base_model(base_model_name)
refresh_loras(loras)
prepare_text_encoder(True)
core.py -> generate_empty_latent()
python
opEmptyLatentImage.generate(width,height,batch_size)[0]
- backend/headless/nodes.py -> EmptyLatentImage.generate()
- latent = torch.zeros([bs,4,height//8,width//8]) -> {'samples':latent}
core.py->ksampler()->backend/headless/comfy/sample.py
python
core->ksampler()
latent_image = latent['sampler']
noise = comfy.sample.prepare_noise(latent_image,seed,...)
samples = comfy.sample.sample(model,noise,steps,cfg,sampler_name,scheduler,positive,negative,latent_image,...)
- backend/headless/comfy/sample.py->sample()
- real_model,positive_copy,negative_copy,noise_mask,models=prepare_sampling(model,noise.shape,positive...)
- sampler = comfy.samplers.KSampler(...)
- sampler = sampler.sample(noise,positive_copy,negative_copy,cfg,latent_image...)
-- sampler = sampler_class(self.sampler)
-- sample(self.model,noise,positive,...)
sampler_class()->backend/headless/comfy/samplers.py
python
sampler_class(name)->
sampler = ksampler(name)->class KSAMPLER(Sampler)
sample->modules/sample_hijack.py (backend/headless/comfy/samplers.py) 劫持了comfy中的sample
python
sample_hijack(model,noise,positive,negative,cfg,device,sampler,sigmas,model_options,latent_image,denoise_mask,callback,...) ->
positive = positive[:]
negative = negative[:]
model_wrap = model_wrap(model)
- model_denoise = CFGNoisePredictor(model)
- model_wrap = k_diffusion_external.CompVisDenoiser(model_denoise)
calculate_start_end_timesteps(model_wrap,negative)
calculate_start_end_timesteps(model_wrap,positive)
for c in positive/negative:
create_cond_with_same_area_if_none(negative/positive,c)
pre_run_control(model_wrap,negative+positive) # cfg相关
latent_image = model.process_latent_in(latent_image)
samples = samplers.sample(model_wrap,sigmas,extra_args,...)
model.process_latent_out(samples)
backend/headless/comfy/samplers.py
python
class KSAMPLER->sample(model_wrap,sigmas,extra_args,callback,...)
model_k = KSamplerOInpaint(model_wrap)
if sampler_name == "dpm_fast":
samples = k_diffusion_sampling.sample_dpm_fast(model_k,noise,...)
elif sampler_name == "dpm_adaptive":
samples = k_diffusion_sampling.sample_dpm_adaptive(model_k,noise,...)
else:
samples = getattr(k_diffusion_sampling,"sample_{}".format(sampler_name))(model_k,noise,...)
backend/headless/comfy/k_diffusion/sampling.py
python
sampler_dpmpp_2m_sde_gpu(model,x,sigmas,extra_args,callback,...)
noise_sampler = BrownianTreeNoiseSampler(x,sigma_min,..) if noise_sampler is None else noise_sampler
sample_dpmpp_2m_sde(model,x,...)
sample_dpmpp_2m_sde
python
for i in trange(len(sigmas)-1):
denoised = model(x,sigmas[i]*s_in,**extra_args)
if callback is not None:
callback({'x':x,'i':i,'sigma':sigma[i],'sigma_hat':sigmas[i],'denoised':denoised})
if sigmas[i+1] == 0:
x = denoised
else:
# DPM-Solver++(2M) SDE
t, s = -sigmas[i].log(), -sigmas[i + 1].log()
h = s - t
eta_h = eta * h
x = sigmas[i + 1] / sigmas[i] * (-eta_h).exp() * x + (-h - eta_h).expm1().neg() * denoised
if old_denoised is not None:
r = h_last / h
if solver_type == 'heun':
x = x + ((-h - eta_h).expm1().neg() / (-h - eta_h) + 1) * (1 / r) * (denoised - old_denoised)
elif solver_type == 'midpoint':
x = x + 0.5 * (-h - eta_h).expm1().neg() * (1 / r) * (denoised - old_denoised)
if eta:
x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * sigmas[i + 1] * (-2 * eta_h).expm1().neg().sqrt() * s_noise
backend/headless/nodes.py 节点就是类
python
NODE_CLASS_MAPPINGS = {
"KSampler": KSampler,
"CheckpointLoaderSimple": CheckpointLoaderSimple,
"CLIPTextEncode": CLIPTextEncode,
"CLIPSetLastLayer": CLIPSetLastLayer,
"VAEDecode": VAEDecode,
"VAEEncode": VAEEncode,
"VAEEncodeForInpaint": VAEEncodeForInpaint,
"VAELoader": VAELoader,
"EmptyLatentImage": EmptyLatentImage,
"LatentUpscale": LatentUpscale,
"LatentUpscaleBy": LatentUpscaleBy,
"LatentFromBatch": LatentFromBatch,
"RepeatLatentBatch": RepeatLatentBatch,
"SaveImage": SaveImage,
"PreviewImage": PreviewImage,
"LoadImage": LoadImage,
"LoadImageMask": LoadImageMask,
"ImageScale": ImageScale,
"ImageScaleBy": ImageScaleBy,
"ImageInvert": ImageInvert,
"ImageBatch": ImageBatch,
"ImagePadForOutpaint": ImagePadForOutpaint,
"EmptyImage": EmptyImage,
"ConditioningAverage": ConditioningAverage,
"ConditioningCombine": ConditioningCombine,
"ConditioningConcat": ConditioningConcat,
"ConditioningSetArea": ConditioningSetArea,
"ConditioningSetAreaPercentage": ConditioningSetAreaPercentage,
"ConditioningSetMask": ConditioningSetMask,
"KSamplerAdvanced": KSamplerAdvanced,
"SetLatentNoiseMask": SetLatentNoiseMask,
"LatentComposite": LatentComposite,
"LatentBlend": LatentBlend,
"LatentRotate": LatentRotate,
"LatentFlip": LatentFlip,
"LatentCrop": LatentCrop,
"LoraLoader": LoraLoader,
"CLIPLoader": CLIPLoader,
"UNETLoader": UNETLoader,
"DualCLIPLoader": DualCLIPLoader,
"CLIPVisionEncode": CLIPVisionEncode,
"StyleModelApply": StyleModelApply,
"unCLIPConditioning": unCLIPConditioning,
"ControlNetApply": ControlNetApply,
"ControlNetApplyAdvanced": ControlNetApplyAdvanced,
"ControlNetLoader": ControlNetLoader,
"DiffControlNetLoader": DiffControlNetLoader,
"StyleModelLoader": StyleModelLoader,
"CLIPVisionLoader": CLIPVisionLoader,
"VAEDecodeTiled": VAEDecodeTiled,
"VAEEncodeTiled": VAEEncodeTiled,
"unCLIPCheckpointLoader": unCLIPCheckpointLoader,
"GLIGENLoader": GLIGENLoader,
"GLIGENTextBoxApply": GLIGENTextBoxApply,
"CheckpointLoader": CheckpointLoader,
"DiffusersLoader": DiffusersLoader,
"LoadLatent": LoadLatent,
"SaveLatent": SaveLatent,
"ConditioningZeroOut": ConditioningZeroOut,
"ConditioningSetTimestepRange": ConditioningSetTimestepRange,
}