|
The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`. |
|
Running 1 job |
|
0it [00:00, ?it/s]
0it [00:00, ?it/s] |
|
/usr/local/lib/python3.10/dist-packages/albumentations/__init__.py:13: UserWarning: A new version of Albumentations is available: 1.4.23 (you have 1.4.15). Upgrade using: pip install -U albumentations. To disable automatic update checks, set the environment variable NO_ALBUMENTATIONS_UPDATE to 1. |
|
check_for_updates() |
|
/usr/local/lib/python3.10/dist-packages/controlnet_aux/mediapipe_face/mediapipe_face_common.py:7: UserWarning: The module 'mediapipe' is not installed. The package will have limited functionality. Please install it using the command: pip install 'mediapipe' |
|
warnings.warn( |
|
/usr/local/lib/python3.10/dist-packages/timm/models/layers/__init__.py:48: FutureWarning: Importing from timm.models.layers is deprecated, please import via timm.layers |
|
warnings.warn(f"Importing from {__name__} is deprecated, please import via timm.layers", FutureWarning) |
|
/usr/local/lib/python3.10/dist-packages/timm/models/registry.py:4: FutureWarning: Importing from timm.models.registry is deprecated, please import via timm.models |
|
warnings.warn(f"Importing from {__name__} is deprecated, please import via timm.models", FutureWarning) |
|
/usr/local/lib/python3.10/dist-packages/controlnet_aux/segment_anything/modeling/tiny_vit_sam.py:654: UserWarning: Overwriting tiny_vit_5m_224 in registry with controlnet_aux.segment_anything.modeling.tiny_vit_sam.tiny_vit_5m_224. This is because the name being registered conflicts with an existing name. Please check if this is not expected. |
|
return register_model(fn_wrapper) |
|
/usr/local/lib/python3.10/dist-packages/controlnet_aux/segment_anything/modeling/tiny_vit_sam.py:654: UserWarning: Overwriting tiny_vit_11m_224 in registry with controlnet_aux.segment_anything.modeling.tiny_vit_sam.tiny_vit_11m_224. This is because the name being registered conflicts with an existing name. Please check if this is not expected. |
|
return register_model(fn_wrapper) |
|
/usr/local/lib/python3.10/dist-packages/controlnet_aux/segment_anything/modeling/tiny_vit_sam.py:654: UserWarning: Overwriting tiny_vit_21m_224 in registry with controlnet_aux.segment_anything.modeling.tiny_vit_sam.tiny_vit_21m_224. This is because the name being registered conflicts with an existing name. Please check if this is not expected. |
|
return register_model(fn_wrapper) |
|
/usr/local/lib/python3.10/dist-packages/controlnet_aux/segment_anything/modeling/tiny_vit_sam.py:654: UserWarning: Overwriting tiny_vit_21m_384 in registry with controlnet_aux.segment_anything.modeling.tiny_vit_sam.tiny_vit_21m_384. This is because the name being registered conflicts with an existing name. Please check if this is not expected. |
|
return register_model(fn_wrapper) |
|
/usr/local/lib/python3.10/dist-packages/controlnet_aux/segment_anything/modeling/tiny_vit_sam.py:654: UserWarning: Overwriting tiny_vit_21m_512 in registry with controlnet_aux.segment_anything.modeling.tiny_vit_sam.tiny_vit_21m_512. This is because the name being registered conflicts with an existing name. Please check if this is not expected. |
|
return register_model(fn_wrapper) |
|
/workspace/ai-toolkit/extensions_built_in/sd_trainer/SDTrainer.py:61: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead. |
|
self.scaler = torch.cuda.amp.GradScaler() |
|
You set `add_prefix_space`. The tokenizer needs to be converted from the slow tokenizers |
|
{ |
|
"type": "sd_trainer", |
|
"training_folder": "output", |
|
"device": "cuda:0", |
|
"network": { |
|
"type": "lora", |
|
"linear": 16, |
|
"linear_alpha": 16 |
|
}, |
|
"save": { |
|
"dtype": "float16", |
|
"save_every": 500, |
|
"max_step_saves_to_keep": 4, |
|
"push_to_hub": false |
|
}, |
|
"datasets": [ |
|
{ |
|
"folder_path": "/workspace/ai-toolkit/images", |
|
"caption_ext": "txt", |
|
"caption_dropout_rate": 0.05, |
|
"shuffle_tokens": false, |
|
"cache_latents_to_disk": true, |
|
"resolution": [ |
|
512, |
|
768, |
|
1024 |
|
] |
|
} |
|
], |
|
"train": { |
|
"batch_size": 1, |
|
"steps": 2000, |
|
"gradient_accumulation_steps": 1, |
|
"train_unet": true, |
|
"train_text_encoder": false, |
|
"gradient_checkpointing": true, |
|
"noise_scheduler": "flowmatch", |
|
"optimizer": "adamw8bit", |
|
"lr": 0.0004, |
|
"ema_config": { |
|
"use_ema": true, |
|
"ema_decay": 0.99 |
|
}, |
|
"dtype": "bf16" |
|
}, |
|
"model": { |
|
"name_or_path": "black-forest-labs/FLUX.1-dev", |
|
"is_flux": true, |
|
"quantize": true |
|
}, |
|
"sample": { |
|
"sampler": "flowmatch", |
|
"sample_every": 500, |
|
"width": 1024, |
|
"height": 1024, |
|
"prompts": [ |
|
"Photo of xtina holding a sign that says 'I LOVE PROMPTS!'", |
|
"Professional headshot of xtina in a business suit.", |
|
"A happy pilot xtina of a Boeing 747.", |
|
"A doctor xtina talking to a patient.", |
|
"A chef xtina in the middle of a bustling kitchen, plating a beautifully arranged dish.", |
|
"A young xtina with a big grin, holding a large ice cream cone in front of an old-fashioned ice cream parlor.", |
|
"A person xtina in a tuxedo, looking directly into the camera with a confident smile, standing on a red carpet at a gala event.", |
|
"Person xtina with a bitchin' 80's mullet hairstyle leaning out the window of a pontiac firebird" |
|
], |
|
"neg": "", |
|
"seed": 42, |
|
"walk_seed": true, |
|
"guidance_scale": 4, |
|
"sample_steps": 20 |
|
}, |
|
"trigger_word": "xtina" |
|
} |
|
Using EMA |
|
|
|
############################################# |
|
# Running job: my_first_flux_lora_v1 |
|
############################################# |
|
|
|
|
|
Running 1 process |
|
Loading Flux model |
|
Loading transformer |
|
Quantizing transformer |
|
Loading vae |
|
Loading t5 |
|
Downloading shards: 0%| | 0/2 [00:00<?, ?it/s]
Downloading shards: 50%|βββββ | 1/2 [00:04<00:04, 4.62s/it]
Downloading shards: 100%|ββββββββββ| 2/2 [00:09<00:00, 4.89s/it]
Downloading shards: 100%|ββββββββββ| 2/2 [00:09<00:00, 4.85s/it] |
|
Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s]
Loading checkpoint shards: 50%|βββββ | 1/2 [00:00<00:00, 5.62it/s]
Loading checkpoint shards: 100%|ββββββββββ| 2/2 [00:00<00:00, 6.28it/s]
Loading checkpoint shards: 100%|ββββββββββ| 2/2 [00:00<00:00, 6.17it/s] |
|
Quantizing T5 |
|
Loading clip |
|
making pipe |
|
preparing |
|
create LoRA network. base dim (rank): 16, alpha: 16 |
|
neuron dropout: p=None, rank dropout: p=None, module dropout: p=None |
|
create LoRA for Text Encoder: 0 modules. |
|
create LoRA for U-Net: 494 modules. |
|
enable LoRA for U-Net |
|
Dataset: /workspace/ai-toolkit/images |
|
- Preprocessing image dimensions |
|
0%| | 0/40 [00:00<?, ?it/s]
10%|β | 4/40 [00:00<00:00, 37.15it/s]
20%|ββ | 8/40 [00:00<00:01, 24.03it/s]
32%|ββββ | 13/40 [00:00<00:00, 28.82it/s]
57%|ββββββ | 23/40 [00:00<00:00, 49.26it/s]
75%|ββββββββ | 30/40 [00:00<00:00, 53.40it/s]
90%|βββββββββ | 36/40 [00:00<00:00, 46.91it/s]
100%|ββββββββββ| 40/40 [00:00<00:00, 46.25it/s] |
|
- Found 40 images |
|
Bucket sizes for /workspace/ai-toolkit/images: |
|
384x576: 18 files |
|
448x512: 1 files |
|
448x576: 13 files |
|
576x448: 5 files |
|
384x640: 1 files |
|
512x512: 2 files |
|
6 buckets made |
|
Caching latents for /workspace/ai-toolkit/images |
|
- Saving latents to disk |
|
Caching latents to disk: 0%| | 0/40 [00:00<?, ?it/s]
Caching latents to disk: 2%|β | 1/40 [00:00<00:13, 2.96it/s]
Caching latents to disk: 8%|β | 3/40 [00:00<00:05, 7.12it/s]
Caching latents to disk: 12%|ββ | 5/40 [00:00<00:04, 7.24it/s]
Caching latents to disk: 15%|ββ | 6/40 [00:00<00:05, 6.79it/s]
Caching latents to disk: 20%|ββ | 8/40 [00:01<00:03, 8.77it/s]
Caching latents to disk: 25%|βββ | 10/40 [00:01<00:02, 11.04it/s]
Caching latents to disk: 30%|βββ | 12/40 [00:01<00:02, 11.69it/s]
Caching latents to disk: 35%|ββββ | 14/40 [00:01<00:02, 12.78it/s]
Caching latents to disk: 42%|βββββ | 17/40 [00:01<00:01, 15.20it/s]
Caching latents to disk: 48%|βββββ | 19/40 [00:01<00:01, 13.76it/s]
Caching latents to disk: 52%|ββββββ | 21/40 [00:01<00:01, 14.47it/s]
Caching latents to disk: 57%|ββββββ | 23/40 [00:02<00:01, 15.54it/s]
Caching latents to disk: 62%|βββββββ | 25/40 [00:02<00:00, 16.31it/s]
Caching latents to disk: 68%|βββββββ | 27/40 [00:02<00:00, 14.53it/s]
Caching latents to disk: 72%|ββββββββ | 29/40 [00:02<00:00, 14.32it/s]
Caching latents to disk: 78%|ββββββββ | 31/40 [00:02<00:00, 13.07it/s]
Caching latents to disk: 82%|βββββββββ | 33/40 [00:02<00:00, 12.63it/s]
Caching latents to disk: 88%|βββββββββ | 35/40 [00:02<00:00, 13.71it/s]
Caching latents to disk: 95%|ββββββββββ| 38/40 [00:03<00:00, 15.45it/s]
Caching latents to disk: 100%|ββββββββββ| 40/40 [00:03<00:00, 16.01it/s]
Caching latents to disk: 100%|ββββββββββ| 40/40 [00:03<00:00, 12.62it/s] |
|
Dataset: /workspace/ai-toolkit/images |
|
- Preprocessing image dimensions |
|
0%| | 0/40 [00:00<?, ?it/s]
100%|ββββββββββ| 40/40 [00:00<00:00, 117569.84it/s] |
|
- Found 40 images |
|
Bucket sizes for /workspace/ai-toolkit/images: |
|
576x832: 12 files |
|
640x768: 6 files |
|
640x832: 8 files |
|
576x896: 7 files |
|
832x640: 3 files |
|
768x640: 2 files |
|
704x768: 1 files |
|
768x768: 1 files |
|
8 buckets made |
|
Caching latents for /workspace/ai-toolkit/images |
|
- Saving latents to disk |
|
Caching latents to disk: 0%| | 0/40 [00:00<?, ?it/s]
Caching latents to disk: 5%|β | 2/40 [00:00<00:03, 11.61it/s]
Caching latents to disk: 10%|β | 4/40 [00:00<00:03, 10.89it/s]
Caching latents to disk: 15%|ββ | 6/40 [00:00<00:05, 6.04it/s]
Caching latents to disk: 18%|ββ | 7/40 [00:00<00:05, 6.59it/s]
Caching latents to disk: 22%|βββ | 9/40 [00:01<00:03, 7.94it/s]
Caching latents to disk: 28%|βββ | 11/40 [00:01<00:03, 8.43it/s]
Caching latents to disk: 30%|βββ | 12/40 [00:01<00:03, 8.41it/s]
Caching latents to disk: 35%|ββββ | 14/40 [00:01<00:02, 9.25it/s]
Caching latents to disk: 40%|ββββ | 16/40 [00:01<00:02, 10.22it/s]
Caching latents to disk: 45%|βββββ | 18/40 [00:01<00:02, 10.93it/s]
Caching latents to disk: 50%|βββββ | 20/40 [00:02<00:01, 11.45it/s]
Caching latents to disk: 55%|ββββββ | 22/40 [00:02<00:01, 11.53it/s]
Caching latents to disk: 60%|ββββββ | 24/40 [00:02<00:01, 10.76it/s]
Caching latents to disk: 65%|βββββββ | 26/40 [00:02<00:01, 11.04it/s]
Caching latents to disk: 70%|βββββββ | 28/40 [00:02<00:01, 11.21it/s]
Caching latents to disk: 75%|ββββββββ | 30/40 [00:03<00:00, 10.44it/s]
Caching latents to disk: 80%|ββββββββ | 32/40 [00:03<00:00, 10.46it/s]
Caching latents to disk: 85%|βββββββββ | 34/40 [00:03<00:00, 9.28it/s]
Caching latents to disk: 90%|βββββββββ | 36/40 [00:03<00:00, 9.80it/s]
Caching latents to disk: 95%|ββββββββββ| 38/40 [00:03<00:00, 10.41it/s]
Caching latents to disk: 100%|ββββββββββ| 40/40 [00:04<00:00, 11.05it/s]
Caching latents to disk: 100%|ββββββββββ| 40/40 [00:04<00:00, 9.88it/s] |
|
Dataset: /workspace/ai-toolkit/images |
|
- Preprocessing image dimensions |
|
0%| | 0/40 [00:00<?, ?it/s]
100%|ββββββββββ| 40/40 [00:00<00:00, 111328.57it/s] |
|
- Found 40 images |
|
Bucket sizes for /workspace/ai-toolkit/images: |
|
832x1216: 12 files |
|
896x1088: 6 files |
|
896x1152: 5 files |
|
832x1152: 6 files |
|
768x1280: 2 files |
|
1152x832: 2 files |
|
768x1152: 1 files |
|
704x1024: 1 files |
|
1088x896: 2 files |
|
1152x896: 1 files |
|
960x1024: 1 files |
|
1024x1024: 1 files |
|
12 buckets made |
|
Caching latents for /workspace/ai-toolkit/images |
|
- Saving latents to disk |
|
Caching latents to disk: 0%| | 0/40 [00:00<?, ?it/s]
Caching latents to disk: 2%|β | 1/40 [00:00<00:05, 6.96it/s]
Caching latents to disk: 5%|β | 2/40 [00:00<00:05, 7.24it/s]
Caching latents to disk: 8%|β | 3/40 [00:00<00:06, 5.70it/s]
Caching latents to disk: 10%|β | 4/40 [00:00<00:05, 6.25it/s]
Caching latents to disk: 12%|ββ | 5/40 [00:00<00:06, 5.15it/s]
Caching latents to disk: 15%|ββ | 6/40 [00:01<00:07, 4.66it/s]
Caching latents to disk: 18%|ββ | 7/40 [00:01<00:06, 5.15it/s]
Caching latents to disk: 20%|ββ | 8/40 [00:01<00:05, 5.70it/s]
Caching latents to disk: 22%|βββ | 9/40 [00:01<00:05, 6.17it/s]
Caching latents to disk: 25%|βββ | 10/40 [00:01<00:04, 6.68it/s]
Caching latents to disk: 28%|βββ | 11/40 [00:01<00:04, 6.80it/s]
Caching latents to disk: 30%|βββ | 12/40 [00:01<00:04, 6.48it/s]
Caching latents to disk: 32%|ββββ | 13/40 [00:02<00:04, 6.75it/s]
Caching latents to disk: 35%|ββββ | 14/40 [00:02<00:03, 7.12it/s]
Caching latents to disk: 38%|ββββ | 15/40 [00:02<00:03, 7.51it/s]
Caching latents to disk: 40%|ββββ | 16/40 [00:02<00:03, 7.73it/s]
Caching latents to disk: 45%|βββββ | 18/40 [00:02<00:02, 8.14it/s]
Caching latents to disk: 48%|βββββ | 19/40 [00:02<00:02, 7.76it/s]
Caching latents to disk: 50%|βββββ | 20/40 [00:03<00:02, 7.66it/s]
Caching latents to disk: 52%|ββββββ | 21/40 [00:03<00:02, 7.90it/s]
Caching latents to disk: 55%|ββββββ | 22/40 [00:03<00:02, 7.62it/s]
Caching latents to disk: 57%|ββββββ | 23/40 [00:03<00:02, 7.40it/s]
Caching latents to disk: 60%|ββββββ | 24/40 [00:03<00:02, 7.36it/s]
Caching latents to disk: 62%|βββββββ | 25/40 [00:03<00:02, 7.36it/s]
Caching latents to disk: 65%|βββββββ | 26/40 [00:03<00:02, 6.48it/s]
Caching latents to disk: 68%|βββββββ | 27/40 [00:04<00:01, 6.77it/s]
Caching latents to disk: 70%|βββββββ | 28/40 [00:04<00:01, 6.48it/s]
Caching latents to disk: 72%|ββββββββ | 29/40 [00:04<00:01, 6.21it/s]
Caching latents to disk: 75%|ββββββββ | 30/40 [00:04<00:01, 5.74it/s]
Caching latents to disk: 78%|ββββββββ | 31/40 [00:04<00:01, 5.89it/s]
Caching latents to disk: 80%|ββββββββ | 32/40 [00:04<00:01, 5.80it/s]
Caching latents to disk: 82%|βββββββββ | 33/40 [00:05<00:01, 5.59it/s]
Caching latents to disk: 85%|βββββββββ | 34/40 [00:05<00:00, 6.04it/s]
Caching latents to disk: 88%|βββββββββ | 35/40 [00:05<00:00, 6.29it/s]
Caching latents to disk: 90%|βββββββββ | 36/40 [00:05<00:00, 6.70it/s]
Caching latents to disk: 92%|ββββββββββ| 37/40 [00:05<00:00, 7.03it/s]
Caching latents to disk: 95%|ββββββββββ| 38/40 [00:05<00:00, 7.05it/s]
Caching latents to disk: 98%|ββββββββββ| 39/40 [00:05<00:00, 7.34it/s]
Caching latents to disk: 100%|ββββββββββ| 40/40 [00:06<00:00, 7.40it/s]
Caching latents to disk: 100%|ββββββββββ| 40/40 [00:06<00:00, 6.64it/s] |
|
Generating baseline samples before training |
|
Generating Images: 0%| | 0/8 [00:00<?, ?it/s]
Generating Images: 12%|ββ | 1/8 [00:20<02:26, 20.93s/it]
Generating Images: 25%|βββ | 2/8 [00:41<02:05, 20.89s/it]
Generating Images: 38%|ββββ | 3/8 [01:02<01:44, 20.95s/it] |