{ "experiment": { "tokenizer_checkpoint": "tokenizer_titok_b64.bin", "generator_checkpoint": "generator_titok_b64.bin", "output_dir": "titok_b_64" }, "model": { "vq_model": { "codebook_size": 4096, "token_size": 12, "use_l2_norm": true, "commitment_cost": 0.25, "vit_enc_model_size": "base", "vit_dec_model_size": "base", "vit_enc_patch_size": 16, "vit_dec_patch_size": 16, "num_latent_tokens": 64 }, "generator": { "model_type": "ViT", "hidden_size": 768, "num_hidden_layers": 24, "num_attention_heads": 16, "intermediate_size": 3072, "dropout": 0.1, "attn_drop": 0.1, "num_steps": 8, "mask_schedule_strategy": "arccos", "class_label_dropout": 0.1, "image_seq_len": "${model.vq_model.num_latent_tokens}", "condition_num_classes": 1000, "randomize_temperature": 11.0, "guidance_scale": 3.0, "guidance_decay": "linear" } }, "dataset": { "preprocessing": { "crop_size": 256 } } }