folders
Browse files- ComfyUI/.gitignore +0 -1
- ComfyUI/models/.gitignore +1 -0
- ComfyUI/models/checkpoints/put_checkpoints_here +0 -0
- ComfyUI/models/clip/put_clip_or_text_encoder_models_here +0 -0
- ComfyUI/models/clip_vision/put_clip_vision_models_here +0 -0
- ComfyUI/models/configs/anything_v3.yaml +73 -0
- ComfyUI/models/configs/v1-inference.yaml +70 -0
- ComfyUI/models/configs/v1-inference_clip_skip_2.yaml +73 -0
- ComfyUI/models/configs/v1-inference_clip_skip_2_fp16.yaml +74 -0
- ComfyUI/models/configs/v1-inference_fp16.yaml +71 -0
- ComfyUI/models/configs/v1-inpainting-inference.yaml +71 -0
- ComfyUI/models/configs/v2-inference-v.yaml +68 -0
- ComfyUI/models/configs/v2-inference-v_fp32.yaml +68 -0
- ComfyUI/models/configs/v2-inference.yaml +67 -0
- ComfyUI/models/configs/v2-inference_fp32.yaml +67 -0
- ComfyUI/models/configs/v2-inpainting-inference.yaml +158 -0
- ComfyUI/models/controlnet/put_controlnets_and_t2i_here +0 -0
- ComfyUI/models/diffusers/put_diffusers_models_here +0 -0
- ComfyUI/models/embeddings/put_embeddings_or_textual_inversion_concepts_here +0 -0
- ComfyUI/models/gligen/put_gligen_models_here +0 -0
- ComfyUI/models/hypernetworks/put_hypernetworks_here +0 -0
- ComfyUI/models/layer_model/layers +0 -0
- ComfyUI/models/loras/put_loras_here +0 -0
- ComfyUI/models/photomaker/put_photomaker_models_here +0 -0
- ComfyUI/models/style_models/put_t2i_style_model_here +0 -0
- ComfyUI/models/unet/put_unet_files_here +0 -0
- ComfyUI/models/upscale_models/put_esrgan_and_other_upscale_models_here +0 -0
- ComfyUI/models/vae/put_vae_here +0 -0
- ComfyUI/models/vae_approx/put_taesd_encoder_pth_and_taesd_decoder_pth_here +0 -0
    	
        ComfyUI/.gitignore
    CHANGED
    
    | @@ -3,7 +3,6 @@ __pycache__/ | |
| 3 | 
             
            /output/
         | 
| 4 | 
             
            /input/
         | 
| 5 | 
             
            !/input/example.png
         | 
| 6 | 
            -
            /models/
         | 
| 7 | 
             
            /temp/
         | 
| 8 | 
             
            /custom_nodes/
         | 
| 9 | 
             
            !custom_nodes/layerdiffuse/*
         | 
|  | |
| 3 | 
             
            /output/
         | 
| 4 | 
             
            /input/
         | 
| 5 | 
             
            !/input/example.png
         | 
|  | |
| 6 | 
             
            /temp/
         | 
| 7 | 
             
            /custom_nodes/
         | 
| 8 | 
             
            !custom_nodes/layerdiffuse/*
         | 
    	
        ComfyUI/models/.gitignore
    ADDED
    
    | @@ -0,0 +1 @@ | |
|  | 
|  | |
| 1 | 
            +
            *.safetensors
         | 
    	
        ComfyUI/models/checkpoints/put_checkpoints_here
    ADDED
    
    | 
            File without changes
         | 
    	
        ComfyUI/models/clip/put_clip_or_text_encoder_models_here
    ADDED
    
    | 
            File without changes
         | 
    	
        ComfyUI/models/clip_vision/put_clip_vision_models_here
    ADDED
    
    | 
            File without changes
         | 
    	
        ComfyUI/models/configs/anything_v3.yaml
    ADDED
    
    | @@ -0,0 +1,73 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            model:
         | 
| 2 | 
            +
              base_learning_rate: 1.0e-04
         | 
| 3 | 
            +
              target: ldm.models.diffusion.ddpm.LatentDiffusion
         | 
| 4 | 
            +
              params:
         | 
| 5 | 
            +
                linear_start: 0.00085
         | 
| 6 | 
            +
                linear_end: 0.0120
         | 
| 7 | 
            +
                num_timesteps_cond: 1
         | 
| 8 | 
            +
                log_every_t: 200
         | 
| 9 | 
            +
                timesteps: 1000
         | 
| 10 | 
            +
                first_stage_key: "jpg"
         | 
| 11 | 
            +
                cond_stage_key: "txt"
         | 
| 12 | 
            +
                image_size: 64
         | 
| 13 | 
            +
                channels: 4
         | 
| 14 | 
            +
                cond_stage_trainable: false   # Note: different from the one we trained before
         | 
| 15 | 
            +
                conditioning_key: crossattn
         | 
| 16 | 
            +
                monitor: val/loss_simple_ema
         | 
| 17 | 
            +
                scale_factor: 0.18215
         | 
| 18 | 
            +
                use_ema: False
         | 
| 19 | 
            +
             | 
| 20 | 
            +
                scheduler_config: # 10000 warmup steps
         | 
| 21 | 
            +
                  target: ldm.lr_scheduler.LambdaLinearScheduler
         | 
| 22 | 
            +
                  params:
         | 
| 23 | 
            +
                    warm_up_steps: [ 10000 ]
         | 
| 24 | 
            +
                    cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
         | 
| 25 | 
            +
                    f_start: [ 1.e-6 ]
         | 
| 26 | 
            +
                    f_max: [ 1. ]
         | 
| 27 | 
            +
                    f_min: [ 1. ]
         | 
| 28 | 
            +
             | 
| 29 | 
            +
                unet_config:
         | 
| 30 | 
            +
                  target: ldm.modules.diffusionmodules.openaimodel.UNetModel
         | 
| 31 | 
            +
                  params:
         | 
| 32 | 
            +
                    image_size: 32 # unused
         | 
| 33 | 
            +
                    in_channels: 4
         | 
| 34 | 
            +
                    out_channels: 4
         | 
| 35 | 
            +
                    model_channels: 320
         | 
| 36 | 
            +
                    attention_resolutions: [ 4, 2, 1 ]
         | 
| 37 | 
            +
                    num_res_blocks: 2
         | 
| 38 | 
            +
                    channel_mult: [ 1, 2, 4, 4 ]
         | 
| 39 | 
            +
                    num_heads: 8
         | 
| 40 | 
            +
                    use_spatial_transformer: True
         | 
| 41 | 
            +
                    transformer_depth: 1
         | 
| 42 | 
            +
                    context_dim: 768
         | 
| 43 | 
            +
                    use_checkpoint: True
         | 
| 44 | 
            +
                    legacy: False
         | 
| 45 | 
            +
             | 
| 46 | 
            +
                first_stage_config:
         | 
| 47 | 
            +
                  target: ldm.models.autoencoder.AutoencoderKL
         | 
| 48 | 
            +
                  params:
         | 
| 49 | 
            +
                    embed_dim: 4
         | 
| 50 | 
            +
                    monitor: val/rec_loss
         | 
| 51 | 
            +
                    ddconfig:
         | 
| 52 | 
            +
                      double_z: true
         | 
| 53 | 
            +
                      z_channels: 4
         | 
| 54 | 
            +
                      resolution: 256
         | 
| 55 | 
            +
                      in_channels: 3
         | 
| 56 | 
            +
                      out_ch: 3
         | 
| 57 | 
            +
                      ch: 128
         | 
| 58 | 
            +
                      ch_mult:
         | 
| 59 | 
            +
                      - 1
         | 
| 60 | 
            +
                      - 2
         | 
| 61 | 
            +
                      - 4
         | 
| 62 | 
            +
                      - 4
         | 
| 63 | 
            +
                      num_res_blocks: 2
         | 
| 64 | 
            +
                      attn_resolutions: []
         | 
| 65 | 
            +
                      dropout: 0.0
         | 
| 66 | 
            +
                    lossconfig:
         | 
| 67 | 
            +
                      target: torch.nn.Identity
         | 
| 68 | 
            +
             | 
| 69 | 
            +
                cond_stage_config:
         | 
| 70 | 
            +
                  target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
         | 
| 71 | 
            +
                  params:
         | 
| 72 | 
            +
                    layer: "hidden"
         | 
| 73 | 
            +
                    layer_idx: -2
         | 
    	
        ComfyUI/models/configs/v1-inference.yaml
    ADDED
    
    | @@ -0,0 +1,70 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            model:
         | 
| 2 | 
            +
              base_learning_rate: 1.0e-04
         | 
| 3 | 
            +
              target: ldm.models.diffusion.ddpm.LatentDiffusion
         | 
| 4 | 
            +
              params:
         | 
| 5 | 
            +
                linear_start: 0.00085
         | 
| 6 | 
            +
                linear_end: 0.0120
         | 
| 7 | 
            +
                num_timesteps_cond: 1
         | 
| 8 | 
            +
                log_every_t: 200
         | 
| 9 | 
            +
                timesteps: 1000
         | 
| 10 | 
            +
                first_stage_key: "jpg"
         | 
| 11 | 
            +
                cond_stage_key: "txt"
         | 
| 12 | 
            +
                image_size: 64
         | 
| 13 | 
            +
                channels: 4
         | 
| 14 | 
            +
                cond_stage_trainable: false   # Note: different from the one we trained before
         | 
| 15 | 
            +
                conditioning_key: crossattn
         | 
| 16 | 
            +
                monitor: val/loss_simple_ema
         | 
| 17 | 
            +
                scale_factor: 0.18215
         | 
| 18 | 
            +
                use_ema: False
         | 
| 19 | 
            +
             | 
| 20 | 
            +
                scheduler_config: # 10000 warmup steps
         | 
| 21 | 
            +
                  target: ldm.lr_scheduler.LambdaLinearScheduler
         | 
| 22 | 
            +
                  params:
         | 
| 23 | 
            +
                    warm_up_steps: [ 10000 ]
         | 
| 24 | 
            +
                    cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
         | 
| 25 | 
            +
                    f_start: [ 1.e-6 ]
         | 
| 26 | 
            +
                    f_max: [ 1. ]
         | 
| 27 | 
            +
                    f_min: [ 1. ]
         | 
| 28 | 
            +
             | 
| 29 | 
            +
                unet_config:
         | 
| 30 | 
            +
                  target: ldm.modules.diffusionmodules.openaimodel.UNetModel
         | 
| 31 | 
            +
                  params:
         | 
| 32 | 
            +
                    image_size: 32 # unused
         | 
| 33 | 
            +
                    in_channels: 4
         | 
| 34 | 
            +
                    out_channels: 4
         | 
| 35 | 
            +
                    model_channels: 320
         | 
| 36 | 
            +
                    attention_resolutions: [ 4, 2, 1 ]
         | 
| 37 | 
            +
                    num_res_blocks: 2
         | 
| 38 | 
            +
                    channel_mult: [ 1, 2, 4, 4 ]
         | 
| 39 | 
            +
                    num_heads: 8
         | 
| 40 | 
            +
                    use_spatial_transformer: True
         | 
| 41 | 
            +
                    transformer_depth: 1
         | 
| 42 | 
            +
                    context_dim: 768
         | 
| 43 | 
            +
                    use_checkpoint: True
         | 
| 44 | 
            +
                    legacy: False
         | 
| 45 | 
            +
             | 
| 46 | 
            +
                first_stage_config:
         | 
| 47 | 
            +
                  target: ldm.models.autoencoder.AutoencoderKL
         | 
| 48 | 
            +
                  params:
         | 
| 49 | 
            +
                    embed_dim: 4
         | 
| 50 | 
            +
                    monitor: val/rec_loss
         | 
| 51 | 
            +
                    ddconfig:
         | 
| 52 | 
            +
                      double_z: true
         | 
| 53 | 
            +
                      z_channels: 4
         | 
| 54 | 
            +
                      resolution: 256
         | 
| 55 | 
            +
                      in_channels: 3
         | 
| 56 | 
            +
                      out_ch: 3
         | 
| 57 | 
            +
                      ch: 128
         | 
| 58 | 
            +
                      ch_mult:
         | 
| 59 | 
            +
                      - 1
         | 
| 60 | 
            +
                      - 2
         | 
| 61 | 
            +
                      - 4
         | 
| 62 | 
            +
                      - 4
         | 
| 63 | 
            +
                      num_res_blocks: 2
         | 
| 64 | 
            +
                      attn_resolutions: []
         | 
| 65 | 
            +
                      dropout: 0.0
         | 
| 66 | 
            +
                    lossconfig:
         | 
| 67 | 
            +
                      target: torch.nn.Identity
         | 
| 68 | 
            +
             | 
| 69 | 
            +
                cond_stage_config:
         | 
| 70 | 
            +
                  target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
         | 
    	
        ComfyUI/models/configs/v1-inference_clip_skip_2.yaml
    ADDED
    
    | @@ -0,0 +1,73 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            model:
         | 
| 2 | 
            +
              base_learning_rate: 1.0e-04
         | 
| 3 | 
            +
              target: ldm.models.diffusion.ddpm.LatentDiffusion
         | 
| 4 | 
            +
              params:
         | 
| 5 | 
            +
                linear_start: 0.00085
         | 
| 6 | 
            +
                linear_end: 0.0120
         | 
| 7 | 
            +
                num_timesteps_cond: 1
         | 
| 8 | 
            +
                log_every_t: 200
         | 
| 9 | 
            +
                timesteps: 1000
         | 
| 10 | 
            +
                first_stage_key: "jpg"
         | 
| 11 | 
            +
                cond_stage_key: "txt"
         | 
| 12 | 
            +
                image_size: 64
         | 
| 13 | 
            +
                channels: 4
         | 
| 14 | 
            +
                cond_stage_trainable: false   # Note: different from the one we trained before
         | 
| 15 | 
            +
                conditioning_key: crossattn
         | 
| 16 | 
            +
                monitor: val/loss_simple_ema
         | 
| 17 | 
            +
                scale_factor: 0.18215
         | 
| 18 | 
            +
                use_ema: False
         | 
| 19 | 
            +
             | 
| 20 | 
            +
                scheduler_config: # 10000 warmup steps
         | 
| 21 | 
            +
                  target: ldm.lr_scheduler.LambdaLinearScheduler
         | 
| 22 | 
            +
                  params:
         | 
| 23 | 
            +
                    warm_up_steps: [ 10000 ]
         | 
| 24 | 
            +
                    cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
         | 
| 25 | 
            +
                    f_start: [ 1.e-6 ]
         | 
| 26 | 
            +
                    f_max: [ 1. ]
         | 
| 27 | 
            +
                    f_min: [ 1. ]
         | 
| 28 | 
            +
             | 
| 29 | 
            +
                unet_config:
         | 
| 30 | 
            +
                  target: ldm.modules.diffusionmodules.openaimodel.UNetModel
         | 
| 31 | 
            +
                  params:
         | 
| 32 | 
            +
                    image_size: 32 # unused
         | 
| 33 | 
            +
                    in_channels: 4
         | 
| 34 | 
            +
                    out_channels: 4
         | 
| 35 | 
            +
                    model_channels: 320
         | 
| 36 | 
            +
                    attention_resolutions: [ 4, 2, 1 ]
         | 
| 37 | 
            +
                    num_res_blocks: 2
         | 
| 38 | 
            +
                    channel_mult: [ 1, 2, 4, 4 ]
         | 
| 39 | 
            +
                    num_heads: 8
         | 
| 40 | 
            +
                    use_spatial_transformer: True
         | 
| 41 | 
            +
                    transformer_depth: 1
         | 
| 42 | 
            +
                    context_dim: 768
         | 
| 43 | 
            +
                    use_checkpoint: True
         | 
| 44 | 
            +
                    legacy: False
         | 
| 45 | 
            +
             | 
| 46 | 
            +
                first_stage_config:
         | 
| 47 | 
            +
                  target: ldm.models.autoencoder.AutoencoderKL
         | 
| 48 | 
            +
                  params:
         | 
| 49 | 
            +
                    embed_dim: 4
         | 
| 50 | 
            +
                    monitor: val/rec_loss
         | 
| 51 | 
            +
                    ddconfig:
         | 
| 52 | 
            +
                      double_z: true
         | 
| 53 | 
            +
                      z_channels: 4
         | 
| 54 | 
            +
                      resolution: 256
         | 
| 55 | 
            +
                      in_channels: 3
         | 
| 56 | 
            +
                      out_ch: 3
         | 
| 57 | 
            +
                      ch: 128
         | 
| 58 | 
            +
                      ch_mult:
         | 
| 59 | 
            +
                      - 1
         | 
| 60 | 
            +
                      - 2
         | 
| 61 | 
            +
                      - 4
         | 
| 62 | 
            +
                      - 4
         | 
| 63 | 
            +
                      num_res_blocks: 2
         | 
| 64 | 
            +
                      attn_resolutions: []
         | 
| 65 | 
            +
                      dropout: 0.0
         | 
| 66 | 
            +
                    lossconfig:
         | 
| 67 | 
            +
                      target: torch.nn.Identity
         | 
| 68 | 
            +
             | 
| 69 | 
            +
                cond_stage_config:
         | 
| 70 | 
            +
                  target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
         | 
| 71 | 
            +
                  params:
         | 
| 72 | 
            +
                    layer: "hidden"
         | 
| 73 | 
            +
                    layer_idx: -2
         | 
    	
        ComfyUI/models/configs/v1-inference_clip_skip_2_fp16.yaml
    ADDED
    
    | @@ -0,0 +1,74 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            model:
         | 
| 2 | 
            +
              base_learning_rate: 1.0e-04
         | 
| 3 | 
            +
              target: ldm.models.diffusion.ddpm.LatentDiffusion
         | 
| 4 | 
            +
              params:
         | 
| 5 | 
            +
                linear_start: 0.00085
         | 
| 6 | 
            +
                linear_end: 0.0120
         | 
| 7 | 
            +
                num_timesteps_cond: 1
         | 
| 8 | 
            +
                log_every_t: 200
         | 
| 9 | 
            +
                timesteps: 1000
         | 
| 10 | 
            +
                first_stage_key: "jpg"
         | 
| 11 | 
            +
                cond_stage_key: "txt"
         | 
| 12 | 
            +
                image_size: 64
         | 
| 13 | 
            +
                channels: 4
         | 
| 14 | 
            +
                cond_stage_trainable: false   # Note: different from the one we trained before
         | 
| 15 | 
            +
                conditioning_key: crossattn
         | 
| 16 | 
            +
                monitor: val/loss_simple_ema
         | 
| 17 | 
            +
                scale_factor: 0.18215
         | 
| 18 | 
            +
                use_ema: False
         | 
| 19 | 
            +
             | 
| 20 | 
            +
                scheduler_config: # 10000 warmup steps
         | 
| 21 | 
            +
                  target: ldm.lr_scheduler.LambdaLinearScheduler
         | 
| 22 | 
            +
                  params:
         | 
| 23 | 
            +
                    warm_up_steps: [ 10000 ]
         | 
| 24 | 
            +
                    cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
         | 
| 25 | 
            +
                    f_start: [ 1.e-6 ]
         | 
| 26 | 
            +
                    f_max: [ 1. ]
         | 
| 27 | 
            +
                    f_min: [ 1. ]
         | 
| 28 | 
            +
             | 
| 29 | 
            +
                unet_config:
         | 
| 30 | 
            +
                  target: ldm.modules.diffusionmodules.openaimodel.UNetModel
         | 
| 31 | 
            +
                  params:
         | 
| 32 | 
            +
                    use_fp16: True
         | 
| 33 | 
            +
                    image_size: 32 # unused
         | 
| 34 | 
            +
                    in_channels: 4
         | 
| 35 | 
            +
                    out_channels: 4
         | 
| 36 | 
            +
                    model_channels: 320
         | 
| 37 | 
            +
                    attention_resolutions: [ 4, 2, 1 ]
         | 
| 38 | 
            +
                    num_res_blocks: 2
         | 
| 39 | 
            +
                    channel_mult: [ 1, 2, 4, 4 ]
         | 
| 40 | 
            +
                    num_heads: 8
         | 
| 41 | 
            +
                    use_spatial_transformer: True
         | 
| 42 | 
            +
                    transformer_depth: 1
         | 
| 43 | 
            +
                    context_dim: 768
         | 
| 44 | 
            +
                    use_checkpoint: True
         | 
| 45 | 
            +
                    legacy: False
         | 
| 46 | 
            +
             | 
| 47 | 
            +
                first_stage_config:
         | 
| 48 | 
            +
                  target: ldm.models.autoencoder.AutoencoderKL
         | 
| 49 | 
            +
                  params:
         | 
| 50 | 
            +
                    embed_dim: 4
         | 
| 51 | 
            +
                    monitor: val/rec_loss
         | 
| 52 | 
            +
                    ddconfig:
         | 
| 53 | 
            +
                      double_z: true
         | 
| 54 | 
            +
                      z_channels: 4
         | 
| 55 | 
            +
                      resolution: 256
         | 
| 56 | 
            +
                      in_channels: 3
         | 
| 57 | 
            +
                      out_ch: 3
         | 
| 58 | 
            +
                      ch: 128
         | 
| 59 | 
            +
                      ch_mult:
         | 
| 60 | 
            +
                      - 1
         | 
| 61 | 
            +
                      - 2
         | 
| 62 | 
            +
                      - 4
         | 
| 63 | 
            +
                      - 4
         | 
| 64 | 
            +
                      num_res_blocks: 2
         | 
| 65 | 
            +
                      attn_resolutions: []
         | 
| 66 | 
            +
                      dropout: 0.0
         | 
| 67 | 
            +
                    lossconfig:
         | 
| 68 | 
            +
                      target: torch.nn.Identity
         | 
| 69 | 
            +
             | 
| 70 | 
            +
                cond_stage_config:
         | 
| 71 | 
            +
                  target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
         | 
| 72 | 
            +
                  params:
         | 
| 73 | 
            +
                    layer: "hidden"
         | 
| 74 | 
            +
                    layer_idx: -2
         | 
    	
        ComfyUI/models/configs/v1-inference_fp16.yaml
    ADDED
    
    | @@ -0,0 +1,71 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            model:
         | 
| 2 | 
            +
              base_learning_rate: 1.0e-04
         | 
| 3 | 
            +
              target: ldm.models.diffusion.ddpm.LatentDiffusion
         | 
| 4 | 
            +
              params:
         | 
| 5 | 
            +
                linear_start: 0.00085
         | 
| 6 | 
            +
                linear_end: 0.0120
         | 
| 7 | 
            +
                num_timesteps_cond: 1
         | 
| 8 | 
            +
                log_every_t: 200
         | 
| 9 | 
            +
                timesteps: 1000
         | 
| 10 | 
            +
                first_stage_key: "jpg"
         | 
| 11 | 
            +
                cond_stage_key: "txt"
         | 
| 12 | 
            +
                image_size: 64
         | 
| 13 | 
            +
                channels: 4
         | 
| 14 | 
            +
                cond_stage_trainable: false   # Note: different from the one we trained before
         | 
| 15 | 
            +
                conditioning_key: crossattn
         | 
| 16 | 
            +
                monitor: val/loss_simple_ema
         | 
| 17 | 
            +
                scale_factor: 0.18215
         | 
| 18 | 
            +
                use_ema: False
         | 
| 19 | 
            +
             | 
| 20 | 
            +
                scheduler_config: # 10000 warmup steps
         | 
| 21 | 
            +
                  target: ldm.lr_scheduler.LambdaLinearScheduler
         | 
| 22 | 
            +
                  params:
         | 
| 23 | 
            +
                    warm_up_steps: [ 10000 ]
         | 
| 24 | 
            +
                    cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
         | 
| 25 | 
            +
                    f_start: [ 1.e-6 ]
         | 
| 26 | 
            +
                    f_max: [ 1. ]
         | 
| 27 | 
            +
                    f_min: [ 1. ]
         | 
| 28 | 
            +
             | 
| 29 | 
            +
                unet_config:
         | 
| 30 | 
            +
                  target: ldm.modules.diffusionmodules.openaimodel.UNetModel
         | 
| 31 | 
            +
                  params:
         | 
| 32 | 
            +
                    use_fp16: True
         | 
| 33 | 
            +
                    image_size: 32 # unused
         | 
| 34 | 
            +
                    in_channels: 4
         | 
| 35 | 
            +
                    out_channels: 4
         | 
| 36 | 
            +
                    model_channels: 320
         | 
| 37 | 
            +
                    attention_resolutions: [ 4, 2, 1 ]
         | 
| 38 | 
            +
                    num_res_blocks: 2
         | 
| 39 | 
            +
                    channel_mult: [ 1, 2, 4, 4 ]
         | 
| 40 | 
            +
                    num_heads: 8
         | 
| 41 | 
            +
                    use_spatial_transformer: True
         | 
| 42 | 
            +
                    transformer_depth: 1
         | 
| 43 | 
            +
                    context_dim: 768
         | 
| 44 | 
            +
                    use_checkpoint: True
         | 
| 45 | 
            +
                    legacy: False
         | 
| 46 | 
            +
             | 
| 47 | 
            +
                first_stage_config:
         | 
| 48 | 
            +
                  target: ldm.models.autoencoder.AutoencoderKL
         | 
| 49 | 
            +
                  params:
         | 
| 50 | 
            +
                    embed_dim: 4
         | 
| 51 | 
            +
                    monitor: val/rec_loss
         | 
| 52 | 
            +
                    ddconfig:
         | 
| 53 | 
            +
                      double_z: true
         | 
| 54 | 
            +
                      z_channels: 4
         | 
| 55 | 
            +
                      resolution: 256
         | 
| 56 | 
            +
                      in_channels: 3
         | 
| 57 | 
            +
                      out_ch: 3
         | 
| 58 | 
            +
                      ch: 128
         | 
| 59 | 
            +
                      ch_mult:
         | 
| 60 | 
            +
                      - 1
         | 
| 61 | 
            +
                      - 2
         | 
| 62 | 
            +
                      - 4
         | 
| 63 | 
            +
                      - 4
         | 
| 64 | 
            +
                      num_res_blocks: 2
         | 
| 65 | 
            +
                      attn_resolutions: []
         | 
| 66 | 
            +
                      dropout: 0.0
         | 
| 67 | 
            +
                    lossconfig:
         | 
| 68 | 
            +
                      target: torch.nn.Identity
         | 
| 69 | 
            +
             | 
| 70 | 
            +
                cond_stage_config:
         | 
| 71 | 
            +
                  target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
         | 
    	
        ComfyUI/models/configs/v1-inpainting-inference.yaml
    ADDED
    
    | @@ -0,0 +1,71 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            model:
         | 
| 2 | 
            +
              base_learning_rate: 7.5e-05
         | 
| 3 | 
            +
              target: ldm.models.diffusion.ddpm.LatentInpaintDiffusion
         | 
| 4 | 
            +
              params:
         | 
| 5 | 
            +
                linear_start: 0.00085
         | 
| 6 | 
            +
                linear_end: 0.0120
         | 
| 7 | 
            +
                num_timesteps_cond: 1
         | 
| 8 | 
            +
                log_every_t: 200
         | 
| 9 | 
            +
                timesteps: 1000
         | 
| 10 | 
            +
                first_stage_key: "jpg"
         | 
| 11 | 
            +
                cond_stage_key: "txt"
         | 
| 12 | 
            +
                image_size: 64
         | 
| 13 | 
            +
                channels: 4
         | 
| 14 | 
            +
                cond_stage_trainable: false   # Note: different from the one we trained before
         | 
| 15 | 
            +
                conditioning_key: hybrid   # important
         | 
| 16 | 
            +
                monitor: val/loss_simple_ema
         | 
| 17 | 
            +
                scale_factor: 0.18215
         | 
| 18 | 
            +
                finetune_keys: null
         | 
| 19 | 
            +
             | 
| 20 | 
            +
                scheduler_config: # 10000 warmup steps
         | 
| 21 | 
            +
                  target: ldm.lr_scheduler.LambdaLinearScheduler
         | 
| 22 | 
            +
                  params:
         | 
| 23 | 
            +
                    warm_up_steps: [ 2500 ] # NOTE for resuming. use 10000 if starting from scratch
         | 
| 24 | 
            +
                    cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
         | 
| 25 | 
            +
                    f_start: [ 1.e-6 ]
         | 
| 26 | 
            +
                    f_max: [ 1. ]
         | 
| 27 | 
            +
                    f_min: [ 1. ]
         | 
| 28 | 
            +
             | 
| 29 | 
            +
                unet_config:
         | 
| 30 | 
            +
                  target: ldm.modules.diffusionmodules.openaimodel.UNetModel
         | 
| 31 | 
            +
                  params:
         | 
| 32 | 
            +
                    image_size: 32 # unused
         | 
| 33 | 
            +
                    in_channels: 9  # 4 data + 4 downscaled image + 1 mask
         | 
| 34 | 
            +
                    out_channels: 4
         | 
| 35 | 
            +
                    model_channels: 320
         | 
| 36 | 
            +
                    attention_resolutions: [ 4, 2, 1 ]
         | 
| 37 | 
            +
                    num_res_blocks: 2
         | 
| 38 | 
            +
                    channel_mult: [ 1, 2, 4, 4 ]
         | 
| 39 | 
            +
                    num_heads: 8
         | 
| 40 | 
            +
                    use_spatial_transformer: True
         | 
| 41 | 
            +
                    transformer_depth: 1
         | 
| 42 | 
            +
                    context_dim: 768
         | 
| 43 | 
            +
                    use_checkpoint: True
         | 
| 44 | 
            +
                    legacy: False
         | 
| 45 | 
            +
             | 
| 46 | 
            +
                first_stage_config:
         | 
| 47 | 
            +
                  target: ldm.models.autoencoder.AutoencoderKL
         | 
| 48 | 
            +
                  params:
         | 
| 49 | 
            +
                    embed_dim: 4
         | 
| 50 | 
            +
                    monitor: val/rec_loss
         | 
| 51 | 
            +
                    ddconfig:
         | 
| 52 | 
            +
                      double_z: true
         | 
| 53 | 
            +
                      z_channels: 4
         | 
| 54 | 
            +
                      resolution: 256
         | 
| 55 | 
            +
                      in_channels: 3
         | 
| 56 | 
            +
                      out_ch: 3
         | 
| 57 | 
            +
                      ch: 128
         | 
| 58 | 
            +
                      ch_mult:
         | 
| 59 | 
            +
                      - 1
         | 
| 60 | 
            +
                      - 2
         | 
| 61 | 
            +
                      - 4
         | 
| 62 | 
            +
                      - 4
         | 
| 63 | 
            +
                      num_res_blocks: 2
         | 
| 64 | 
            +
                      attn_resolutions: []
         | 
| 65 | 
            +
                      dropout: 0.0
         | 
| 66 | 
            +
                    lossconfig:
         | 
| 67 | 
            +
                      target: torch.nn.Identity
         | 
| 68 | 
            +
             | 
| 69 | 
            +
                cond_stage_config:
         | 
| 70 | 
            +
                  target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
         | 
| 71 | 
            +
             | 
    	
        ComfyUI/models/configs/v2-inference-v.yaml
    ADDED
    
    | @@ -0,0 +1,68 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            model:
         | 
| 2 | 
            +
              base_learning_rate: 1.0e-4
         | 
| 3 | 
            +
              target: ldm.models.diffusion.ddpm.LatentDiffusion
         | 
| 4 | 
            +
              params:
         | 
| 5 | 
            +
                parameterization: "v"
         | 
| 6 | 
            +
                linear_start: 0.00085
         | 
| 7 | 
            +
                linear_end: 0.0120
         | 
| 8 | 
            +
                num_timesteps_cond: 1
         | 
| 9 | 
            +
                log_every_t: 200
         | 
| 10 | 
            +
                timesteps: 1000
         | 
| 11 | 
            +
                first_stage_key: "jpg"
         | 
| 12 | 
            +
                cond_stage_key: "txt"
         | 
| 13 | 
            +
                image_size: 64
         | 
| 14 | 
            +
                channels: 4
         | 
| 15 | 
            +
                cond_stage_trainable: false
         | 
| 16 | 
            +
                conditioning_key: crossattn
         | 
| 17 | 
            +
                monitor: val/loss_simple_ema
         | 
| 18 | 
            +
                scale_factor: 0.18215
         | 
| 19 | 
            +
                use_ema: False # we set this to false because this is an inference only config
         | 
| 20 | 
            +
             | 
| 21 | 
            +
                unet_config:
         | 
| 22 | 
            +
                  target: ldm.modules.diffusionmodules.openaimodel.UNetModel
         | 
| 23 | 
            +
                  params:
         | 
| 24 | 
            +
                    use_checkpoint: True
         | 
| 25 | 
            +
                    use_fp16: True
         | 
| 26 | 
            +
                    image_size: 32 # unused
         | 
| 27 | 
            +
                    in_channels: 4
         | 
| 28 | 
            +
                    out_channels: 4
         | 
| 29 | 
            +
                    model_channels: 320
         | 
| 30 | 
            +
                    attention_resolutions: [ 4, 2, 1 ]
         | 
| 31 | 
            +
                    num_res_blocks: 2
         | 
| 32 | 
            +
                    channel_mult: [ 1, 2, 4, 4 ]
         | 
| 33 | 
            +
                    num_head_channels: 64 # need to fix for flash-attn
         | 
| 34 | 
            +
                    use_spatial_transformer: True
         | 
| 35 | 
            +
                    use_linear_in_transformer: True
         | 
| 36 | 
            +
                    transformer_depth: 1
         | 
| 37 | 
            +
                    context_dim: 1024
         | 
| 38 | 
            +
                    legacy: False
         | 
| 39 | 
            +
             | 
| 40 | 
            +
                first_stage_config:
         | 
| 41 | 
            +
                  target: ldm.models.autoencoder.AutoencoderKL
         | 
| 42 | 
            +
                  params:
         | 
| 43 | 
            +
                    embed_dim: 4
         | 
| 44 | 
            +
                    monitor: val/rec_loss
         | 
| 45 | 
            +
                    ddconfig:
         | 
| 46 | 
            +
                      #attn_type: "vanilla-xformers"
         | 
| 47 | 
            +
                      double_z: true
         | 
| 48 | 
            +
                      z_channels: 4
         | 
| 49 | 
            +
                      resolution: 256
         | 
| 50 | 
            +
                      in_channels: 3
         | 
| 51 | 
            +
                      out_ch: 3
         | 
| 52 | 
            +
                      ch: 128
         | 
| 53 | 
            +
                      ch_mult:
         | 
| 54 | 
            +
                      - 1
         | 
| 55 | 
            +
                      - 2
         | 
| 56 | 
            +
                      - 4
         | 
| 57 | 
            +
                      - 4
         | 
| 58 | 
            +
                      num_res_blocks: 2
         | 
| 59 | 
            +
                      attn_resolutions: []
         | 
| 60 | 
            +
                      dropout: 0.0
         | 
| 61 | 
            +
                    lossconfig:
         | 
| 62 | 
            +
                      target: torch.nn.Identity
         | 
| 63 | 
            +
             | 
| 64 | 
            +
                cond_stage_config:
         | 
| 65 | 
            +
                  target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
         | 
| 66 | 
            +
                  params:
         | 
| 67 | 
            +
                    freeze: True
         | 
| 68 | 
            +
                    layer: "penultimate"
         | 
    	
        ComfyUI/models/configs/v2-inference-v_fp32.yaml
    ADDED
    
    | @@ -0,0 +1,68 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            model:
         | 
| 2 | 
            +
              base_learning_rate: 1.0e-4
         | 
| 3 | 
            +
              target: ldm.models.diffusion.ddpm.LatentDiffusion
         | 
| 4 | 
            +
              params:
         | 
| 5 | 
            +
                parameterization: "v"
         | 
| 6 | 
            +
                linear_start: 0.00085
         | 
| 7 | 
            +
                linear_end: 0.0120
         | 
| 8 | 
            +
                num_timesteps_cond: 1
         | 
| 9 | 
            +
                log_every_t: 200
         | 
| 10 | 
            +
                timesteps: 1000
         | 
| 11 | 
            +
                first_stage_key: "jpg"
         | 
| 12 | 
            +
                cond_stage_key: "txt"
         | 
| 13 | 
            +
                image_size: 64
         | 
| 14 | 
            +
                channels: 4
         | 
| 15 | 
            +
                cond_stage_trainable: false
         | 
| 16 | 
            +
                conditioning_key: crossattn
         | 
| 17 | 
            +
                monitor: val/loss_simple_ema
         | 
| 18 | 
            +
                scale_factor: 0.18215
         | 
| 19 | 
            +
                use_ema: False # we set this to false because this is an inference only config
         | 
| 20 | 
            +
             | 
| 21 | 
            +
                unet_config:
         | 
| 22 | 
            +
                  target: ldm.modules.diffusionmodules.openaimodel.UNetModel
         | 
| 23 | 
            +
                  params:
         | 
| 24 | 
            +
                    use_checkpoint: True
         | 
| 25 | 
            +
                    use_fp16: False
         | 
| 26 | 
            +
                    image_size: 32 # unused
         | 
| 27 | 
            +
                    in_channels: 4
         | 
| 28 | 
            +
                    out_channels: 4
         | 
| 29 | 
            +
                    model_channels: 320
         | 
| 30 | 
            +
                    attention_resolutions: [ 4, 2, 1 ]
         | 
| 31 | 
            +
                    num_res_blocks: 2
         | 
| 32 | 
            +
                    channel_mult: [ 1, 2, 4, 4 ]
         | 
| 33 | 
            +
                    num_head_channels: 64 # need to fix for flash-attn
         | 
| 34 | 
            +
                    use_spatial_transformer: True
         | 
| 35 | 
            +
                    use_linear_in_transformer: True
         | 
| 36 | 
            +
                    transformer_depth: 1
         | 
| 37 | 
            +
                    context_dim: 1024
         | 
| 38 | 
            +
                    legacy: False
         | 
| 39 | 
            +
             | 
| 40 | 
            +
                first_stage_config:
         | 
| 41 | 
            +
                  target: ldm.models.autoencoder.AutoencoderKL
         | 
| 42 | 
            +
                  params:
         | 
| 43 | 
            +
                    embed_dim: 4
         | 
| 44 | 
            +
                    monitor: val/rec_loss
         | 
| 45 | 
            +
                    ddconfig:
         | 
| 46 | 
            +
                      #attn_type: "vanilla-xformers"
         | 
| 47 | 
            +
                      double_z: true
         | 
| 48 | 
            +
                      z_channels: 4
         | 
| 49 | 
            +
                      resolution: 256
         | 
| 50 | 
            +
                      in_channels: 3
         | 
| 51 | 
            +
                      out_ch: 3
         | 
| 52 | 
            +
                      ch: 128
         | 
| 53 | 
            +
                      ch_mult:
         | 
| 54 | 
            +
                      - 1
         | 
| 55 | 
            +
                      - 2
         | 
| 56 | 
            +
                      - 4
         | 
| 57 | 
            +
                      - 4
         | 
| 58 | 
            +
                      num_res_blocks: 2
         | 
| 59 | 
            +
                      attn_resolutions: []
         | 
| 60 | 
            +
                      dropout: 0.0
         | 
| 61 | 
            +
                    lossconfig:
         | 
| 62 | 
            +
                      target: torch.nn.Identity
         | 
| 63 | 
            +
             | 
| 64 | 
            +
                cond_stage_config:
         | 
| 65 | 
            +
                  target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
         | 
| 66 | 
            +
                  params:
         | 
| 67 | 
            +
                    freeze: True
         | 
| 68 | 
            +
                    layer: "penultimate"
         | 
    	
        ComfyUI/models/configs/v2-inference.yaml
    ADDED
    
    | @@ -0,0 +1,67 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            model:
         | 
| 2 | 
            +
              base_learning_rate: 1.0e-4
         | 
| 3 | 
            +
              target: ldm.models.diffusion.ddpm.LatentDiffusion
         | 
| 4 | 
            +
              params:
         | 
| 5 | 
            +
                linear_start: 0.00085
         | 
| 6 | 
            +
                linear_end: 0.0120
         | 
| 7 | 
            +
                num_timesteps_cond: 1
         | 
| 8 | 
            +
                log_every_t: 200
         | 
| 9 | 
            +
                timesteps: 1000
         | 
| 10 | 
            +
                first_stage_key: "jpg"
         | 
| 11 | 
            +
                cond_stage_key: "txt"
         | 
| 12 | 
            +
                image_size: 64
         | 
| 13 | 
            +
                channels: 4
         | 
| 14 | 
            +
                cond_stage_trainable: false
         | 
| 15 | 
            +
                conditioning_key: crossattn
         | 
| 16 | 
            +
                monitor: val/loss_simple_ema
         | 
| 17 | 
            +
                scale_factor: 0.18215
         | 
| 18 | 
            +
                use_ema: False # we set this to false because this is an inference only config
         | 
| 19 | 
            +
             | 
| 20 | 
            +
                unet_config:
         | 
| 21 | 
            +
                  target: ldm.modules.diffusionmodules.openaimodel.UNetModel
         | 
| 22 | 
            +
                  params:
         | 
| 23 | 
            +
                    use_checkpoint: True
         | 
| 24 | 
            +
                    use_fp16: True
         | 
| 25 | 
            +
                    image_size: 32 # unused
         | 
| 26 | 
            +
                    in_channels: 4
         | 
| 27 | 
            +
                    out_channels: 4
         | 
| 28 | 
            +
                    model_channels: 320
         | 
| 29 | 
            +
                    attention_resolutions: [ 4, 2, 1 ]
         | 
| 30 | 
            +
                    num_res_blocks: 2
         | 
| 31 | 
            +
                    channel_mult: [ 1, 2, 4, 4 ]
         | 
| 32 | 
            +
                    num_head_channels: 64 # need to fix for flash-attn
         | 
| 33 | 
            +
                    use_spatial_transformer: True
         | 
| 34 | 
            +
                    use_linear_in_transformer: True
         | 
| 35 | 
            +
                    transformer_depth: 1
         | 
| 36 | 
            +
                    context_dim: 1024
         | 
| 37 | 
            +
                    legacy: False
         | 
| 38 | 
            +
             | 
| 39 | 
            +
                first_stage_config:
         | 
| 40 | 
            +
                  target: ldm.models.autoencoder.AutoencoderKL
         | 
| 41 | 
            +
                  params:
         | 
| 42 | 
            +
                    embed_dim: 4
         | 
| 43 | 
            +
                    monitor: val/rec_loss
         | 
| 44 | 
            +
                    ddconfig:
         | 
| 45 | 
            +
                      #attn_type: "vanilla-xformers"
         | 
| 46 | 
            +
                      double_z: true
         | 
| 47 | 
            +
                      z_channels: 4
         | 
| 48 | 
            +
                      resolution: 256
         | 
| 49 | 
            +
                      in_channels: 3
         | 
| 50 | 
            +
                      out_ch: 3
         | 
| 51 | 
            +
                      ch: 128
         | 
| 52 | 
            +
                      ch_mult:
         | 
| 53 | 
            +
                      - 1
         | 
| 54 | 
            +
                      - 2
         | 
| 55 | 
            +
                      - 4
         | 
| 56 | 
            +
                      - 4
         | 
| 57 | 
            +
                      num_res_blocks: 2
         | 
| 58 | 
            +
                      attn_resolutions: []
         | 
| 59 | 
            +
                      dropout: 0.0
         | 
| 60 | 
            +
                    lossconfig:
         | 
| 61 | 
            +
                      target: torch.nn.Identity
         | 
| 62 | 
            +
             | 
| 63 | 
            +
                cond_stage_config:
         | 
| 64 | 
            +
                  target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
         | 
| 65 | 
            +
                  params:
         | 
| 66 | 
            +
                    freeze: True
         | 
| 67 | 
            +
                    layer: "penultimate"
         | 
    	
        ComfyUI/models/configs/v2-inference_fp32.yaml
    ADDED
    
    | @@ -0,0 +1,67 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            model:
         | 
| 2 | 
            +
              base_learning_rate: 1.0e-4
         | 
| 3 | 
            +
              target: ldm.models.diffusion.ddpm.LatentDiffusion
         | 
| 4 | 
            +
              params:
         | 
| 5 | 
            +
                linear_start: 0.00085
         | 
| 6 | 
            +
                linear_end: 0.0120
         | 
| 7 | 
            +
                num_timesteps_cond: 1
         | 
| 8 | 
            +
                log_every_t: 200
         | 
| 9 | 
            +
                timesteps: 1000
         | 
| 10 | 
            +
                first_stage_key: "jpg"
         | 
| 11 | 
            +
                cond_stage_key: "txt"
         | 
| 12 | 
            +
                image_size: 64
         | 
| 13 | 
            +
                channels: 4
         | 
| 14 | 
            +
                cond_stage_trainable: false
         | 
| 15 | 
            +
                conditioning_key: crossattn
         | 
| 16 | 
            +
                monitor: val/loss_simple_ema
         | 
| 17 | 
            +
                scale_factor: 0.18215
         | 
| 18 | 
            +
                use_ema: False # we set this to false because this is an inference only config
         | 
| 19 | 
            +
             | 
| 20 | 
            +
                unet_config:
         | 
| 21 | 
            +
                  target: ldm.modules.diffusionmodules.openaimodel.UNetModel
         | 
| 22 | 
            +
                  params:
         | 
| 23 | 
            +
                    use_checkpoint: True
         | 
| 24 | 
            +
                    use_fp16: False
         | 
| 25 | 
            +
                    image_size: 32 # unused
         | 
| 26 | 
            +
                    in_channels: 4
         | 
| 27 | 
            +
                    out_channels: 4
         | 
| 28 | 
            +
                    model_channels: 320
         | 
| 29 | 
            +
                    attention_resolutions: [ 4, 2, 1 ]
         | 
| 30 | 
            +
                    num_res_blocks: 2
         | 
| 31 | 
            +
                    channel_mult: [ 1, 2, 4, 4 ]
         | 
| 32 | 
            +
                    num_head_channels: 64 # need to fix for flash-attn
         | 
| 33 | 
            +
                    use_spatial_transformer: True
         | 
| 34 | 
            +
                    use_linear_in_transformer: True
         | 
| 35 | 
            +
                    transformer_depth: 1
         | 
| 36 | 
            +
                    context_dim: 1024
         | 
| 37 | 
            +
                    legacy: False
         | 
| 38 | 
            +
             | 
| 39 | 
            +
                first_stage_config:
         | 
| 40 | 
            +
                  target: ldm.models.autoencoder.AutoencoderKL
         | 
| 41 | 
            +
                  params:
         | 
| 42 | 
            +
                    embed_dim: 4
         | 
| 43 | 
            +
                    monitor: val/rec_loss
         | 
| 44 | 
            +
                    ddconfig:
         | 
| 45 | 
            +
                      #attn_type: "vanilla-xformers"
         | 
| 46 | 
            +
                      double_z: true
         | 
| 47 | 
            +
                      z_channels: 4
         | 
| 48 | 
            +
                      resolution: 256
         | 
| 49 | 
            +
                      in_channels: 3
         | 
| 50 | 
            +
                      out_ch: 3
         | 
| 51 | 
            +
                      ch: 128
         | 
| 52 | 
            +
                      ch_mult:
         | 
| 53 | 
            +
                      - 1
         | 
| 54 | 
            +
                      - 2
         | 
| 55 | 
            +
                      - 4
         | 
| 56 | 
            +
                      - 4
         | 
| 57 | 
            +
                      num_res_blocks: 2
         | 
| 58 | 
            +
                      attn_resolutions: []
         | 
| 59 | 
            +
                      dropout: 0.0
         | 
| 60 | 
            +
                    lossconfig:
         | 
| 61 | 
            +
                      target: torch.nn.Identity
         | 
| 62 | 
            +
             | 
| 63 | 
            +
                cond_stage_config:
         | 
| 64 | 
            +
                  target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
         | 
| 65 | 
            +
                  params:
         | 
| 66 | 
            +
                    freeze: True
         | 
| 67 | 
            +
                    layer: "penultimate"
         | 
    	
        ComfyUI/models/configs/v2-inpainting-inference.yaml
    ADDED
    
    | @@ -0,0 +1,158 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            model:
         | 
| 2 | 
            +
              base_learning_rate: 5.0e-05
         | 
| 3 | 
            +
              target: ldm.models.diffusion.ddpm.LatentInpaintDiffusion
         | 
| 4 | 
            +
              params:
         | 
| 5 | 
            +
                linear_start: 0.00085
         | 
| 6 | 
            +
                linear_end: 0.0120
         | 
| 7 | 
            +
                num_timesteps_cond: 1
         | 
| 8 | 
            +
                log_every_t: 200
         | 
| 9 | 
            +
                timesteps: 1000
         | 
| 10 | 
            +
                first_stage_key: "jpg"
         | 
| 11 | 
            +
                cond_stage_key: "txt"
         | 
| 12 | 
            +
                image_size: 64
         | 
| 13 | 
            +
                channels: 4
         | 
| 14 | 
            +
                cond_stage_trainable: false
         | 
| 15 | 
            +
                conditioning_key: hybrid
         | 
| 16 | 
            +
                scale_factor: 0.18215
         | 
| 17 | 
            +
                monitor: val/loss_simple_ema
         | 
| 18 | 
            +
                finetune_keys: null
         | 
| 19 | 
            +
                use_ema: False
         | 
| 20 | 
            +
             | 
| 21 | 
            +
                unet_config:
         | 
| 22 | 
            +
                  target: ldm.modules.diffusionmodules.openaimodel.UNetModel
         | 
| 23 | 
            +
                  params:
         | 
| 24 | 
            +
                    use_checkpoint: True
         | 
| 25 | 
            +
                    image_size: 32 # unused
         | 
| 26 | 
            +
                    in_channels: 9
         | 
| 27 | 
            +
                    out_channels: 4
         | 
| 28 | 
            +
                    model_channels: 320
         | 
| 29 | 
            +
                    attention_resolutions: [ 4, 2, 1 ]
         | 
| 30 | 
            +
                    num_res_blocks: 2
         | 
| 31 | 
            +
                    channel_mult: [ 1, 2, 4, 4 ]
         | 
| 32 | 
            +
                    num_head_channels: 64 # need to fix for flash-attn
         | 
| 33 | 
            +
                    use_spatial_transformer: True
         | 
| 34 | 
            +
                    use_linear_in_transformer: True
         | 
| 35 | 
            +
                    transformer_depth: 1
         | 
| 36 | 
            +
                    context_dim: 1024
         | 
| 37 | 
            +
                    legacy: False
         | 
| 38 | 
            +
             | 
| 39 | 
            +
                first_stage_config:
         | 
| 40 | 
            +
                  target: ldm.models.autoencoder.AutoencoderKL
         | 
| 41 | 
            +
                  params:
         | 
| 42 | 
            +
                    embed_dim: 4
         | 
| 43 | 
            +
                    monitor: val/rec_loss
         | 
| 44 | 
            +
                    ddconfig:
         | 
| 45 | 
            +
                      #attn_type: "vanilla-xformers"
         | 
| 46 | 
            +
                      double_z: true
         | 
| 47 | 
            +
                      z_channels: 4
         | 
| 48 | 
            +
                      resolution: 256
         | 
| 49 | 
            +
                      in_channels: 3
         | 
| 50 | 
            +
                      out_ch: 3
         | 
| 51 | 
            +
                      ch: 128
         | 
| 52 | 
            +
                      ch_mult:
         | 
| 53 | 
            +
                        - 1
         | 
| 54 | 
            +
                        - 2
         | 
| 55 | 
            +
                        - 4
         | 
| 56 | 
            +
                        - 4
         | 
| 57 | 
            +
                      num_res_blocks: 2
         | 
| 58 | 
            +
                      attn_resolutions: [ ]
         | 
| 59 | 
            +
                      dropout: 0.0
         | 
| 60 | 
            +
                    lossconfig:
         | 
| 61 | 
            +
                      target: torch.nn.Identity
         | 
| 62 | 
            +
             | 
| 63 | 
            +
                cond_stage_config:
         | 
| 64 | 
            +
                  target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
         | 
| 65 | 
            +
                  params:
         | 
| 66 | 
            +
                    freeze: True
         | 
| 67 | 
            +
                    layer: "penultimate"
         | 
| 68 | 
            +
             | 
| 69 | 
            +
             | 
| 70 | 
            +
            data:
         | 
| 71 | 
            +
              target: ldm.data.laion.WebDataModuleFromConfig
         | 
| 72 | 
            +
              params:
         | 
| 73 | 
            +
                tar_base: null  # for concat as in LAION-A
         | 
| 74 | 
            +
                p_unsafe_threshold: 0.1
         | 
| 75 | 
            +
                filter_word_list: "data/filters.yaml"
         | 
| 76 | 
            +
                max_pwatermark: 0.45
         | 
| 77 | 
            +
                batch_size: 8
         | 
| 78 | 
            +
                num_workers: 6
         | 
| 79 | 
            +
                multinode: True
         | 
| 80 | 
            +
                min_size: 512
         | 
| 81 | 
            +
                train:
         | 
| 82 | 
            +
                  shards:
         | 
| 83 | 
            +
                    - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-0/{00000..18699}.tar -"
         | 
| 84 | 
            +
                    - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-1/{00000..18699}.tar -"
         | 
| 85 | 
            +
                    - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-2/{00000..18699}.tar -"
         | 
| 86 | 
            +
                    - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-3/{00000..18699}.tar -"
         | 
| 87 | 
            +
                    - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-4/{00000..18699}.tar -"  #{00000-94333}.tar"
         | 
| 88 | 
            +
                  shuffle: 10000
         | 
| 89 | 
            +
                  image_key: jpg
         | 
| 90 | 
            +
                  image_transforms:
         | 
| 91 | 
            +
                  - target: torchvision.transforms.Resize
         | 
| 92 | 
            +
                    params:
         | 
| 93 | 
            +
                      size: 512
         | 
| 94 | 
            +
                      interpolation: 3
         | 
| 95 | 
            +
                  - target: torchvision.transforms.RandomCrop
         | 
| 96 | 
            +
                    params:
         | 
| 97 | 
            +
                      size: 512
         | 
| 98 | 
            +
                  postprocess:
         | 
| 99 | 
            +
                    target: ldm.data.laion.AddMask
         | 
| 100 | 
            +
                    params:
         | 
| 101 | 
            +
                      mode: "512train-large"
         | 
| 102 | 
            +
                      p_drop: 0.25
         | 
| 103 | 
            +
                # NOTE use enough shards to avoid empty validation loops in workers
         | 
| 104 | 
            +
                validation:
         | 
| 105 | 
            +
                  shards:
         | 
| 106 | 
            +
                    - "pipe:aws s3 cp s3://deep-floyd-s3/datasets/laion_cleaned-part5/{93001..94333}.tar - "
         | 
| 107 | 
            +
                  shuffle: 0
         | 
| 108 | 
            +
                  image_key: jpg
         | 
| 109 | 
            +
                  image_transforms:
         | 
| 110 | 
            +
                  - target: torchvision.transforms.Resize
         | 
| 111 | 
            +
                    params:
         | 
| 112 | 
            +
                      size: 512
         | 
| 113 | 
            +
                      interpolation: 3
         | 
| 114 | 
            +
                  - target: torchvision.transforms.CenterCrop
         | 
| 115 | 
            +
                    params:
         | 
| 116 | 
            +
                      size: 512
         | 
| 117 | 
            +
                  postprocess:
         | 
| 118 | 
            +
                    target: ldm.data.laion.AddMask
         | 
| 119 | 
            +
                    params:
         | 
| 120 | 
            +
                      mode: "512train-large"
         | 
| 121 | 
            +
                      p_drop: 0.25
         | 
| 122 | 
            +
             | 
| 123 | 
            +
            lightning:
         | 
| 124 | 
            +
              find_unused_parameters: True
         | 
| 125 | 
            +
              modelcheckpoint:
         | 
| 126 | 
            +
                params:
         | 
| 127 | 
            +
                  every_n_train_steps: 5000
         | 
| 128 | 
            +
             | 
| 129 | 
            +
              callbacks:
         | 
| 130 | 
            +
                metrics_over_trainsteps_checkpoint:
         | 
| 131 | 
            +
                  params:
         | 
| 132 | 
            +
                    every_n_train_steps: 10000
         | 
| 133 | 
            +
             | 
| 134 | 
            +
                image_logger:
         | 
| 135 | 
            +
                  target: main.ImageLogger
         | 
| 136 | 
            +
                  params:
         | 
| 137 | 
            +
                    enable_autocast: False
         | 
| 138 | 
            +
                    disabled: False
         | 
| 139 | 
            +
                    batch_frequency: 1000
         | 
| 140 | 
            +
                    max_images: 4
         | 
| 141 | 
            +
                    increase_log_steps: False
         | 
| 142 | 
            +
                    log_first_step: False
         | 
| 143 | 
            +
                    log_images_kwargs:
         | 
| 144 | 
            +
                      use_ema_scope: False
         | 
| 145 | 
            +
                      inpaint: False
         | 
| 146 | 
            +
                      plot_progressive_rows: False
         | 
| 147 | 
            +
                      plot_diffusion_rows: False
         | 
| 148 | 
            +
                      N: 4
         | 
| 149 | 
            +
                      unconditional_guidance_scale: 5.0
         | 
| 150 | 
            +
                      unconditional_guidance_label: [""]
         | 
| 151 | 
            +
                      ddim_steps: 50  # todo check these out for depth2img,
         | 
| 152 | 
            +
                      ddim_eta: 0.0   # todo check these out for depth2img,
         | 
| 153 | 
            +
             | 
| 154 | 
            +
              trainer:
         | 
| 155 | 
            +
                benchmark: True
         | 
| 156 | 
            +
                val_check_interval: 5000000
         | 
| 157 | 
            +
                num_sanity_val_steps: 0
         | 
| 158 | 
            +
                accumulate_grad_batches: 1
         | 
    	
        ComfyUI/models/controlnet/put_controlnets_and_t2i_here
    ADDED
    
    | 
            File without changes
         | 
    	
        ComfyUI/models/diffusers/put_diffusers_models_here
    ADDED
    
    | 
            File without changes
         | 
    	
        ComfyUI/models/embeddings/put_embeddings_or_textual_inversion_concepts_here
    ADDED
    
    | 
            File without changes
         | 
    	
        ComfyUI/models/gligen/put_gligen_models_here
    ADDED
    
    | 
            File without changes
         | 
    	
        ComfyUI/models/hypernetworks/put_hypernetworks_here
    ADDED
    
    | 
            File without changes
         | 
    	
        ComfyUI/models/layer_model/layers
    ADDED
    
    | 
            File without changes
         | 
    	
        ComfyUI/models/loras/put_loras_here
    ADDED
    
    | 
            File without changes
         | 
    	
        ComfyUI/models/photomaker/put_photomaker_models_here
    ADDED
    
    | 
            File without changes
         | 
    	
        ComfyUI/models/style_models/put_t2i_style_model_here
    ADDED
    
    | 
            File without changes
         | 
    	
        ComfyUI/models/unet/put_unet_files_here
    ADDED
    
    | 
            File without changes
         | 
    	
        ComfyUI/models/upscale_models/put_esrgan_and_other_upscale_models_here
    ADDED
    
    | 
            File without changes
         | 
    	
        ComfyUI/models/vae/put_vae_here
    ADDED
    
    | 
            File without changes
         | 
    	
        ComfyUI/models/vae_approx/put_taesd_encoder_pth_and_taesd_decoder_pth_here
    ADDED
    
    | 
            File without changes
         | 
