|
model: |
|
transport: |
|
target: tim.schedulers.transports.OT_FM |
|
params: |
|
P_mean: 0.0 |
|
P_std: 1.6 |
|
sigma_d: 1.0 |
|
unified_dcm_loss: |
|
diffusion_ratio: 0.5 |
|
consistency_ratio: 0.1 |
|
derivative_type: dde |
|
differential_epsilon: 0.005 |
|
weight_time_type: sqrt |
|
weight_time_tangent: True |
|
network: |
|
target: tim.models.t2i.tim_model.TiM |
|
params: |
|
input_size: 16 |
|
patch_size: 1 |
|
in_channels: 32 |
|
depth: 28 |
|
hidden_size: 1152 |
|
cap_feat_dim: 1152 |
|
num_heads: 16 |
|
encoder_depth: 8 |
|
qk_norm: True |
|
z_dim: 768 |
|
new_condition: t-r |
|
use_new_embed: True |
|
distance_aware: True |
|
lora_hidden_size: 384 |
|
|
|
vae_dir: mit-han-lab/dc-ae-f32c32-sana-1.1-diffusers |
|
|
|
text_encoder_dir: google/gemma-3-1b-it |
|
proportion_empty_prompts: 0.1 |
|
use_last_hidden_state: True |
|
max_seq_length: 256 |
|
|
|
enc_dir: checkpoints/radio/radio-v2.5-b_half.pth.tar |
|
proj_coeff: 1.0 |
|
|
|
use_ema: True |
|
ema_decay: 0.9999 |
|
|
|
data: |
|
data_type: image_ms |
|
dataset: |
|
root_dir: datasets/t2i_toy_dataset |
|
packed_json: datasets/t2i_toy_dataset/bucket_sampler.json |
|
jsonl_dir: datasets/t2i_toy_dataset/data_info.jsonl |
|
dataloader: |
|
num_workers: 4 |
|
batch_size: 128 |
|
|
|
|
|
training: |
|
tracker: null |
|
max_train_steps: 500000 |
|
checkpointing_steps: 1000 |
|
checkpoints_total_limit: 2 |
|
resume_from_checkpoint: latest |
|
learning_rate: 1.0e-4 |
|
learning_rate_base_batch_size: 512 |
|
scale_lr: True |
|
lr_scheduler: constant |
|
lr_warmup_steps: 0 |
|
gradient_accumulation_steps: 1 |
|
optimizer: |
|
target: torch.optim.AdamW |
|
params: |
|
|
|
betas: [0.9, 0.95] |
|
weight_decay: 1.0e-2 |
|
eps: 1.0e-6 |
|
max_grad_norm: 1.0 |
|
proportion_empty_prompts: 0.0 |
|
mixed_precision: bf16 |
|
allow_tf32: True |
|
validation_steps: 500 |
|
checkpoint_list: [100000, 200000, 300000, 400000] |
|
|