{ | |
"experiment": { | |
"tokenizer_checkpoint": "tokenizer_titok_s128.bin", | |
"generator_checkpoint": "generator_titok_s128.bin", | |
"output_dir": "titok_s_128" | |
}, | |
"model": { | |
"vq_model": { | |
"codebook_size": 4096, | |
"token_size": 12, | |
"use_l2_norm": true, | |
"commitment_cost": 0.25, | |
"vit_enc_model_size": "small", | |
"vit_dec_model_size": "small", | |
"vit_enc_patch_size": 16, | |
"vit_dec_patch_size": 16, | |
"num_latent_tokens": 128 | |
}, | |
"generator": { | |
"model_type": "UViT", | |
"hidden_size": 1024, | |
"num_hidden_layers": 20, | |
"num_attention_heads": 16, | |
"intermediate_size": 4096, | |
"dropout": 0.1, | |
"attn_drop": 0.1, | |
"num_steps": 64, | |
"mask_schedule_strategy": "arccos", | |
"class_label_dropout": 0.1, | |
"image_seq_len": "${model.vq_model.num_latent_tokens}", | |
"condition_num_classes": 1000, | |
"randomize_temperature": 2.8, | |
"guidance_scale": 6.9, | |
"guidance_decay": "power-cosine" | |
} | |
}, | |
"dataset": { | |
"preprocessing": { | |
"crop_size": 256 | |
} | |
} | |
} |