|
config: configs/vq_8k_siglip_b_res_p02_pw15_enc.yaml |
|
exp_index: |
|
data_path: /mnt/bn/cloud-project-lq/code/liuyh/data/vq_data/train |
|
cloud_save_path: experiments/tokenizer |
|
no_local_save: true |
|
vq_model: VQ-16 |
|
vq_ckpt: |
|
finetune: false |
|
finetune_decoder: false |
|
model_weight_strict: true |
|
ema: false |
|
codebook_size: 8192 |
|
codebook_embed_dim: 8 |
|
codebook_l2_norm: true |
|
codebook_weight: 1.0 |
|
entropy_loss_ratio: 0.0 |
|
vq_loss_ratio: 1.0 |
|
commit_loss_beta: 0.25 |
|
reconstruction_weight: 1.0 |
|
reconstruction_loss: l2 |
|
kl_loss_weight: 1e-06 |
|
tau: 0.07 |
|
num_codebooks: 1 |
|
perceptual_weight: 1.0 |
|
perceptual_loss: vgg |
|
perceptual_model: vgg |
|
perceptual_dino_variants: depth12_no_train |
|
perceptual_intermediate_loss: false |
|
perceptual_logit_loss: false |
|
perceptual_resize: false |
|
perceptual_warmup: 10000 |
|
disc_weight: 0.2 |
|
disc_start: 40000 |
|
disc_dim: 64 |
|
disc_type: dino |
|
disc_loss: hinge |
|
gen_loss: hinge |
|
lecam_loss_weight: 0.001 |
|
use_diff_aug: true |
|
disc_cr_loss_weight: 4.0 |
|
disc_adaptive_weight: false |
|
compile: false |
|
dropout_p: 0.0 |
|
results_dir: ./logs/task/detailflow_demo_task_256token |
|
dataset: imagenet |
|
image_size: 256 |
|
epochs: 250 |
|
optimizer: adam |
|
lr: 1.0e-4 |
|
lr_warmup_epochs: 1 |
|
lr_scheduler: cosine |
|
weight_decay: 0.0001 |
|
beta1: 0.9 |
|
beta2: 0.95 |
|
max_grad_norm: 1.0 |
|
global_batch_size: 256 |
|
global_seed: 42 |
|
num_workers: 16 |
|
log_every: 50 |
|
vis_every: 5000 |
|
ckpt_every: 5000 |
|
save_epochs: 1 |
|
gradient_accumulation_steps: 1 |
|
mixed_precision: bf16 |
|
enc_type: siglip2 |
|
dec_type: siglip2 |
|
num_latent_tokens: 256 |
|
encoder_model: siglip2_base |
|
decoder_model: siglip2_base |
|
encoder_tuning_method: full |
|
decoder_tuning_method: full |
|
encoder_pretrained: true |
|
decoder_pretrained: false |
|
encoder_patch_size: 16 |
|
decoder_patch_size: 16 |
|
repa: false |
|
repa_model: siglip2 |
|
repa_patch_size: 16 |
|
repa_proj_dim: 1024 |
|
repa_loss_weight: 0.5 |
|
repa_align: global |
|
repa_layer_indices: 1 |
|
resume_from_newest_ckpt: true |
|
gradient_checkpointing_encoder: false |
|
gradient_checkpointing_decoder: false |
|
debug_mode: false |
|
content_degradation: resolution_power |
|
degradation_prob: 0.2 |
|
degradation_loss_res: 224 |
|
degradation_power: 1.5 |
|
causal_encoder: true |
|
causal_decoder: false |
|
max_image_size: 256 |
|
min_image_size: 256 |
|
dynamic_max_image_size: |
|
dynamic_resolution_prob: 0 |
|
max_resolution_prob: 0 |
|
adjust_bs_by_resolution: false |
|
group_size: 8 |
|
global_token_loss_weight: 1.0 |
|
correction_training: true |
|
causal_num: |
|
rank: 0 |
|
world_size: 16 |
|
gpu: 0 |
|
dist_url: env:// |
|
distributed: true |
|
dist_backend: nccl |
|
|