Spaces:

aswerdlow
/

unidisc

Paused

App Files Files Community

aswerdlow commited on Mar 18

Commit

131da64

0 Parent(s):

Initial commit

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +2 -0
.gitignore +37 -0
.gitmodules +15 -0
Dockerfile +79 -0
README.md +82 -0
__builtins__.pyi +7 -0
configs/config.yaml +451 -0
configs/config_empty.yaml +8 -0
configs/experiments/ar.yaml +10 -0
configs/experiments/elm.yaml +15 -0
configs/experiments/eval_model.yaml +21 -0
configs/experiments/eval_text.yaml +26 -0
configs/experiments/eval_text_only.yaml +30 -0
configs/experiments/eval_unified.yaml +27 -0
configs/experiments/fid_cc12m.yaml +22 -0
configs/experiments/fid_datacomp1b.yaml +22 -0
configs/experiments/fid_hf.yaml +25 -0
configs/experiments/jan_cub.yaml +51 -0
configs/experiments/large_maskdit_exp.yaml +7 -0
configs/experiments/large_scale_high_res_interleaved_inference.yaml +51 -0
configs/experiments/large_scale_train.yaml +151 -0
configs/experiments/large_scale_train_high_res.yaml +39 -0
configs/experiments/large_scale_train_high_res_inference.yaml +30 -0
configs/experiments/large_scale_train_high_res_interleaved.yaml +105 -0
configs/experiments/maskgit.yaml +6 -0
configs/experiments/master_eval.yaml +49 -0
configs/experiments/mscoco_fid.yaml +21 -0
configs/experiments/paired_standalone_fid_eval.yaml +29 -0
configs/experiments/small_scale_train.yaml +187 -0
configs/experiments/small_scale_train_caching.yaml +186 -0
configs/experiments/small_text_only.yaml +28 -0
configs/experiments/standalone_fid_eval.yaml +18 -0
configs/experiments/titok.yaml +8 -0
configs/experiments/titok_sl256.yaml +7 -0
configs/experiments/txt_only.yaml +21 -0
configs/experiments/unified.yaml +23 -0
configs/experiments/vq16.yaml +9 -0
configs/experiments/vq16_1024.yaml +8 -0
configs/experiments/vq16_magvit.yaml +9 -0
configs/experiments/vq16_t2i.yaml +10 -0
configs/experiments/webdataset.yaml +12 -0
configs/experiments/zero_shot_eval.yaml +29 -0
configs/lr_scheduler/constant_warmup.yaml +2 -0
configs/lr_scheduler/constant_warmup_cosine_decay.yaml +3 -0
configs/lr_scheduler/cosine_decay_warmup.yaml +7 -0
configs/lr_scheduler/cosine_with_hard_restarts_schedule_with_warmup.yaml +4 -0
configs/model/extra_large.yaml +10 -0
configs/model/large.yaml +14 -0
configs/model/medium.yaml +12 -0
configs/model/small-ar.yaml +11 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ *.jpg filter=lfs diff=lfs merge=lfs -text
2	+ *.webp filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,37 @@

+__pycache__/
+outputs/
+ckpts/
+vqgan/vqgan_pretrained/
+vqgan/vqgan_taming_ckpt/
+data/
+models/datasets/.cache/
+*.json
+output/
+tmp*
+multirun/
+.nfs*
+lightning_logs/
+static/
+archive/
+output_profile/
+logs/
+.history/
+.cache/
+output*/
+*.out
+*.parquet
+wandb/
+vqgan/
+*.csv
+.python-version
+ft_cache/
+alias.txt
+env.sh
+generated_image.png
+Untitled-1.ipynb
+*.log
+demo/old
+*.pem
+.sesskey
+icons.py
+generated/

.gitmodules ADDED Viewed

	@@ -0,0 +1,15 @@

+[submodule "third_party/LlamaGen"]
+	path = third_party/LlamaGen
+	url = https://github.com/alexanderswerdlow/LlamaGen.git
+	branch = wip_v1
+[submodule "third_party/Lumina-mGPT"]
+	path = third_party/Lumina-mGPT
+	url = https://github.com/alexanderswerdlow/Lumina-mGPT.git
+	branch = non_causal
+[submodule "third_party/Show-o"]
+	path = third_party/Show-o
+	url = https://github.com/showlab/Show-o.git
+[submodule "third_party/1d-tokenizer"]
+	path = third_party/1d-tokenizer
+	url = https://github.com/bytedance/1d-tokenizer.git
+	branch = main

Dockerfile ADDED Viewed

	@@ -0,0 +1,79 @@

+# Base image with CUDA 12.6.3 and cuDNN
+FROM nvidia/cuda:12.6.3-cudnn-devel-ubuntu22.04
+# Set environment variables
+ARG DEBIAN_FRONTEND=noninteractive
+ENV PYTHONUNBUFFERED=1 \
+    SYSTEM=spaces \
+    AM_I_IN_A_DOCKER_CONTAINER=Yes \
+    PYTHONPATH=/home/appuser/app \
+    HF_HOME=/home/appuser/.cache \
+    TORCH_HOME=/home/appuser/.cache \
+    TMP_DIR=/home/appuser/tmp \
+    TRANSFORMERS_CACHE=/home/appuser/.cache/transformers \
+    NVIDIA_VISIBLE_DEVICES=all \
+    NVIDIA_DRIVER_CAPABILITIES=compute,utility
+# Install system dependencies and set Python 3.10 as default
+RUN apt-get update && apt-get install --no-install-recommends -y \
+    build-essential \
+    python3.10 \
+    python3.10-distutils \
+    python3-pip \
+    ffmpeg \
+    libsm6 \
+    libxext6 \
+    libgl1 \
+    git \
+    openssh-client \
+    && ln -sf /usr/bin/python3.10 /usr/bin/python \
+    && ln -sf /usr/bin/pip3 /usr/bin/pip \
+    && apt-get clean && rm -rf /var/lib/apt/lists/*
+# Install `uv`
+RUN pip install --upgrade pip \
+    && pip install uv
+# Create a non-root user
+RUN useradd -m -u 1000 appuser
+# Set working directory
+WORKDIR /home/appuser/app
+# Copy dependency files and install dependencies
+COPY --chown=appuser pyproject.toml uv.lock README.md ./
+RUN mkdir -p -m 0600 ~/.ssh && ssh-keyscan github.com >> ~/.ssh/known_hosts
+RUN --mount=type=ssh uv sync --no-group dev
+RUN --mount=type=ssh uv sync --frozen --no-cache \
+    && chown -R appuser:appuser /home/appuser/app/.venv \
+    && rm -rf /root/.cache /home/appuser/.cache
+# Ensure non-root user has write access to cache and tmp directories
+RUN mkdir -p /home/appuser/.cache/transformers /home/appuser/tmp /home/appuser/.cache \
+    && chown -R appuser:appuser /home/appuser/.cache /home/appuser/tmp/ /home/appuser/app/
+RUN chmod -R 777 /tmp
+# Copy application code
+COPY --chown=appuser demo demo
+COPY --chown=appuser unidisc unidisc
+COPY --chown=appuser models models
+COPY --chown=appuser configs configs
+COPY --chown=appuser third_party third_party
+COPY --chown=appuser ckpts ckpts
+COPY --chown=appuser ./__* ./
+COPY --chown=appuser ./*.py ./
+COPY --chown=appuser ./archive/pytorch_model_fsdp.bin ./
+# Switch to non-root user
+USER appuser
+# Expose port for Gradio
+EXPOSE 5003
+# Command to run the application
+CMD ["bash", "demo/demo.sh"]
+# DOCKER_BUILDKIT=1 docker build --ssh default --network=host -t unidisc .
+# docker run --network=host -it -p 5003:5003 unidisc

README.md ADDED Viewed

	@@ -0,0 +1,82 @@

+<div align="center">
+<br>
+<img src="docs/images/banner.webp" width="1000">
+<h3>Unified Multimodal Discrete Diffusion</h3>
+[Alexander Swerdlow](https://aswerdlow.com/)<sup>1&#42;</sup>&nbsp;
+[Mihir Prabhudesai](https://mihirp1998.github.io/)<sup>1&#42;</sup>&nbsp;
+[Siddharth Gandhi](hhttps://www.ssgandhi.com/)<sup>1</sup>&nbsp;
+[Deepak Pathak](https://www.cs.cmu.edu/~dpathak/)<sup>1</sup>&nbsp;
+[Katerina Fragkiadaki](https://www.cs.cmu.edu/~katef/)<sup>1</sup>&nbsp;
+<br>
+<sup>1</sup> Carnegie Mellon University&nbsp;
+[![ArXiv](https://img.shields.io/badge/ArXiv-<0000.00000>-<COLOR>.svg)](https://arxiv.org/pdf/0000.00000) [![Webpage](https://img.shields.io/badge/Webpage-UniDisc-<COLOR>.svg)](https://unidisc.github.io/)
+<!-- [![Demo](https://img.shields.io/badge/Demo-Custom-<COLOR>.svg)](https://huggingface.co/spaces/todo) -->
+</div>
+## Hugging Face models and annotations
+The UniDisc checkpoints are available on [Hugging Face](https://huggingface.co/unidisc):
+* [unidisc/todo](https://huggingface.co/unidisc/todo)
+## Getting Started
+To install the dependencies, run:
+```bash
+git submodule update --init --recursive
+uv sync --no-group dev
+uv sync
+```
+For a more detailed installation guide, please refer to [INSTALL.md](docs/INSTALL.md).
+## Training
+See [TRAIN.md](docs/TRAIN.md) for details.
+## Inference
+<!-- Inference demo for **TODO**.
+```
+TODO
+``` -->
+<!-- <img src="docs/todo.png" width="1000"> -->
+Interactive demo for **TODO**.
+```
+python demo/server.py
+python demo/client_simple_fasthtml.py
+```
+## Training
+See [TRAINING.md](docs/TRAINING.md) for details.
+## Evaluation
+See [EVAL.md](docs/EVAL.md) for details.
+### Citation
+To cite our work, please use the following:
+```
+@article{TODO,
+  title={TODO},
+  author={TODO},
+  journal={arXiv preprint arXiv:TODO},
+  year={TODO}
+}
+```
+## Credits
+This repository is built on top of the following repositories:
+- [MDLM](https://github.com/kuleshov-group/mdlm)
+- [Lumina-T2X](https://github.com/Alpha-VLLM/Lumina-T2X)

__builtins__.pyi ADDED Viewed

	@@ -0,0 +1,7 @@

+from ipdb import set_trace as st
+from decoupled_utils import start_timing as start_timing
+from decoupled_utils import end_timing as end_timing
+ENABLE_TIMING: bool
+ENABLE_TIMING_SYNC: bool
+DEVICE_BACKEND_TYPE: str
+exists = lambda v: v is not None

configs/config.yaml ADDED Viewed

	@@ -0,0 +1,451 @@

+defaults:
+  - _self_
+  - /model: small
+  - /noise: loglinear
+  - /lr_scheduler: constant_warmup
+  - /experiments: []
+  # - override hydra/launcher: submitit_slurm
+slurm: False
+debug: False
+mode: train  # train / eval
+diffusion: absorbing_state
+backbone: dit  # dit / dimamba / ar
+parameterization: subs  # subs / d3pm / sedd
+time_conditioning: False
+T: 0  # 0 (continuous time) / 1000
+subs_masking: False
+seed: 42
+profile: False
+# These belong in trainer.* and hydra.launcher.* but are put here for CLI convinience
+devices: ${device_count:}
+nodes: 1
+partition: ${find_partition:}
+constraint: ${find_constraint:}
+ckpt: null
+loader:
+  desired_global_batch_size: 512
+  global_batch_size: null
+  eval_global_batch_size: ${.global_batch_size}
+  batch_size: ${div_up:${.desired_global_batch_size}, ${eval:${trainer.devices} * ${trainer.num_nodes}}}
+  eval_batch_size: ${div_up:${.desired_global_batch_size}, ${eval:${trainer.devices} * ${trainer.num_nodes}}}
+  num_workers: ${eval:"max(len(__import__('os').sched_getaffinity(0)) // 16, 4)"}
+  pin_memory: True
+  persistent_workers: True
+sampling:
+  predictor: ddpm_cache # analytic, ddpm, ddpm_cache
+  steps: 1000
+  max_sampling_steps: 500 # The highest level we use for sampling
+  noise_removal: True
+  num_sample_log: 2
+  semi_ar: False
+  stride_length: 1
+  num_strides: 1
+eval:
+  checkpoint_path: ''  # Used to evaluate a checkpoint after training.
+  disable_ema: False
+  compute_generative_perplexity: False
+  perplexity_batch_size: 8
+  gen_ppl_eval_model_name_or_path: gpt2-large  # gpt2-large, meta-llama/Llama-2-7b-hf
+  generate_samples: True
+  cfg: null
+  num_masking_viz_batches: 1
+  num_sample_batches: 2  # Total samples: `num_gpus` * `loader.eval_batch_size` * num_sample_batches
+  test_eval_speed: False
+  standalone_fid: False
+  visualize_data_only: false
+  val_with_train_data: false
+  max_num_fid_batches_per_device: null
+  class_conditional_fid: false
+  compute_entropy: false
+  compute_standalone_mauve: false
+  compute_standalone_entropy: false
+  compute_img_to_txt_mauve_clip: false
+  compute_img_to_txt_mauve_during_unconditional_fid: false
+  mauve_num_samples: 5000
+  mauve_divergence_curve_discretization_size: 25 # default in mauve repo
+  mauve_average_over_seeds: 3
+  mauve_scaling_factor: 5 # default in mauve repo
+  txt_conditional_fid: false
+  unconditional_fid: false
+  fid_mode: inline
+  calculate_clip_score: false
+  clean_fid_use_precomputed_stats: false
+  clean_fid_precomputed_name: null
+  clean_fid_precomputed_split: null
+  clean_fid_precomputed_res: null
+  attention_caching: false
+  set_random_gen_seed: false
+  compute_val_metrics_standalone: false
+  num_val_metrics_standalone_batches_per_device: ${eval:'max(${eval.num_val_metrics_standalone_samples} // (${trainer.devices} * ${loader.eval_batch_size}), 1)'}
+  num_val_metrics_standalone_samples: -1
+  return_unweighed_sim: false
+  compute_chameleon_perplexity: false
+  global_disable_mauve: false
+  bypass_normal_validation: false
+  auto_enhance: false
+  num_auto_enhance_iter: 2
+  ar_inpainting_min_val: 0.5
+  ar_inpainting_max_val: 1.0
+  ar_inpainting_force_val: null
+optim:
+  weight_decay: 0
+  lr: 3e-4
+  beta1: 0.9
+  beta2: 0.999
+  eps: 1e-8
+  fused: true
+model:
+  use_custom_vae_config: false
+  use_custom_vae_ckpt: null
+  downscale_ratio: null
+  image_vocab_size: null
+  vae_type: null
+  use_attention_mask: false
+  cond_use_custom_vae_config: false
+  cond_use_custom_vae_ckpt: null
+  cond_downscale_ratio: null
+  cond_image_vocab_size: null
+  cond_vae_type: null
+  text_model: true
+  attn_type: flash
+  force_varlen_attn: false
+  force_cast_bf16: false
+  norm_type: layernorm
+  mup: false
+  qk_norm: false
+  distillation: false
+  force_argmax_valid_indices: false
+  use_flash_attn_3: false
+  use_spda_attn: false # Spelled wrong...
+  rope_2d: false
+  modality_embed: false
+  zero_linear_init: true
+  full_attention: true
+  use_lora: false
+  use_kv_cache: false
+  force_optimized_native_attn: false
+  use_pretrained_img_emb: true
+  use_flex_attention: false
+  add_labels: null
+  flex_attention_txt_masking_prob: null
+  flex_attention_img_masking_prob: null
+trainer:
+  _target_: lightning.Trainer
+  accelerator: cuda
+  num_nodes: ${nodes}
+  devices: ${devices}
+  # Given a desired global batch size (e.g., how many batches we see before a optim.step, summed over all nodes/gpus/accum_steps), we find the number of gradient accumulations that gets us closest given our current configuration. We assume that loader.batch_size is the largest that can fit in a single fwd/bwd.
+  accumulate_grad_batches: ${find_grad_accum:${loader.desired_global_batch_size}, ${eval:${trainer.devices} * ${loader.batch_size} * ${trainer.num_nodes}}}
+  gradient_clip_val: 1.0
+  precision: 'bf16'
+  max_steps: 1_000_000_000
+  num_epochs: 1_000_000_000
+  optimizer_cls: adamw
+  set_grads_to_none: true
+  eval_on_start: true
+  eval_decay_steps: false
+  eval_epochs: null
+  ckpt_steps: 100000
+  fsdp: false
+  force_enable_checkpointing: false
+  limit_val_batches: null
+  ckpt_every_n_minutes: 60
+  ckpt_recent_timeout_minutes: 10
+  checkpoint_all_ranks: true
+  force_null_sigma: false
+  log_every_n_steps: 10
+  limit_train_batches: 1.0   # train on full dataset, can be used to toggle quick run
+  val_check_interval: 100
+  ema: 0.9999
+  antithetic_sampling: True
+  importance_sampling: False
+  sampling_eps: 1e-3
+  change_of_variables: False
+  benchmark: true
+  backward_pass: true
+  forward_pass: true
+  profile_memory: false
+  pytorch_profile: false
+  nvtx_profile: false
+  custom_ddp_bf16: true
+  log_seperate_modal_losses: true
+  use_gradient_checkpointing: false
+  text_loss_weight: null
+  img_loss_weight: null
+  disable_strict_load: false
+  attach_oom_observer_eval: false
+  find_unused_parameters: false
+  restart_on_failure: false
+  skip_early_checkpointing: true
+  log_flops: true
+  sync_timing: false
+  use_custom_ema: false
+  scale_lr_by_batch_size: false
+  tpu_eager: false
+  allow_dynamic_nodes: false
+  force_disable_signal_handler: false
+  tpu_profile: false
+  tpu_cache: false
+  enable_jax_smi: false
+  tpu_compile_debug: false
+  xla_spmd: false
+  log_grad_norm: true
+  tpu_profile_markers: true
+  compile: false
+  disable_all_checkpointing: false
+  tpu_force_mark_step: false
+  ar_shift: false
+  ar_llm_loss: false
+  ar_print_loss: false
+  chameleon_z_loss: null
+  image_mode: discrete # continuous / discrete
+  chameleon_use_ce_loss: false
+  low_precision_loss: false
+  low_precision_params: false
+  scratch: false
+  use_spmd_distributed_checkpointing: null
+  use_simple_spmd_distributed_checkpointing: false
+  load_from_state_dict: null
+  load_from_optimizer_state_dict: null
+  multimodal_batches: false
+  sync_dataloader_timing: false
+  compile_flag_pos_emb: false
+  compile_fullgraph: false
+  compile_mode: max-autotune-no-cudagraphs
+  joint_ar_nar_prob: null
+  joint_ar_nar_prob_warmup_steps: null
+  joint_ar_nar_timestep_warmup_steps: null
+  spmd_mesh: null
+  detect_anomaly: false
+  freeze_chameleon_embeddings: false
+  ckpt_model_only: false
+  use_orig_params: null
+  disable_adjust_num_warmup_steps: false
+  mask_entire_modality: null
+  iterate_dataloader_only: false
+  force_bf16_eval: false
+  disable_all_eval_generation: false
+  debug_xla_sept: false
+  ignore_text_in_unified: false
+  allow_null_sigma: false
+  disable_forward_autocast_during_eval: false
+  viz_images_only: false
+  add_label: false
+  first_token_dropout: null
+  disable_ddp_optimizer: false
+  rand_flip_ar_prob: null
+  rand_ar_modality_dropout: null
+  use_linear_warmup_cosine_annealing: false
+  no_ce_weighting: false
+  interleaved: false
+  interleaved_training_flex_attention: false
+  awr: false
+  ar_inpainting: false
+wandb:
+  entity: grads
+  project: ${eval:'"unidisc-debug" if ${debug} else "unidisc"'}
+  resume: ${eval:'"allow" if ${slurm} else None'}
+  id: null
+  group: null
+  job_type: null
+  name: null
+  tags:
+    - ${data.train}
+checkpointing_root_dir: ${oc.env:UNIDISC_CHECKPOINTING_ROOT_DIR,null}
+root_output_dir: ${oc.env:UNIDISC_ROOT_OUTPUT_DIR,outputs}
+python_orig: |
+              accelerate launch \
+              --num_machines $SLURM_NNODES \
+              --num_processes $NUM_PROCESSES \
+              --rdzv_backend c10d \
+              --main_process_ip $MASTER_ADDR \
+              --main_process_port $MASTER_PORT \
+              --machine_rank $SLURM_PROCID \
+              --mixed_precision bf16 \
+              --dynamo_backend no \
+              --enable_cpu_affinity \
+              --max_restarts 0 \
+mem_per_gpu: 40
+cpus_per_gpu: 8
+slurm_name: null
+timeout_min: ${partition_limit:${partition}}
+hydra:
+  run:
+    dir: ${oc.env:HYDRA_RUN_DIR,${root_output_dir}/outputs/${get_dir_name:}/${oc.env:HYDRA_RUN_DIR_NAME,${now:%Y_%m_%d}/${now:%H_%M_%S}}}
+  sweep:
+    dir: ${oc.env:HYDRA_RUN_DIR,${root_output_dir}/outputs/${get_dir_name:}/${oc.env:HYDRA_RUN_DIR_NAME,${now:%Y_%m_%d}/${now:%H_%M_%S}}}
+    subdir: ${hydra.job.id}
+  job:
+    chdir: true
+  # launcher:
+  #   name: ${get_slurm_name:}
+  #   # See https://hydra.cc/docs/configure_hydra/workdir/
+  #   submitit_folder: ${hydra.sweep.dir}/%j
+  #   nodes: ${nodes} # Number of nodes. This value is *per* node
+  #   mem_gb: ${eval:'${mem_per_gpu} * ${trainer.devices}'} # 40GB per gpu. This value is *per* node
+  #   gpus_per_node: ${trainer.devices}
+  #   partition: ${partition}
+  #   constraint: ${constraint}
+  #   exclude: ${exclude_nodes:}
+  #   timeout_min: ${timeout_min}
+  #   max_num_timeout: 12 # Num requeue exlcuding pre-emptions
+  #   comment: aswerdlo
+  #   stderr_to_stdout: true
+  #   # Be careful with changing anything below.
+  #   # see: https://github.com/stas00/ml-engineering/tree/master/training/fault-tolerance#approach-b2-choosing-which-process-to-send-the-signal-to
+  #   # see: https://github.com/huggingface/accelerate/issues/1918
+  #   # The accelerate launcher w/1 initial process and then spawn 1 per GPU
+  #   tasks_per_node: 1
+  #   cpus_per_task: ${eval:'${cpus_per_gpu} * ${trainer.devices}'}
+  #   python: |
+  #           bash -c "torchrun --nnodes $SLURM_NNODES --nproc_per_node $SLURM_GPUS_PER_NODE --role \$(hostname -s|tr -dc '0-9'): --node_rank \$SLURM_PROCID --max-restarts=2 --rdzv_id $RANDOM --rdzv_backend c10d --rdzv_endpoint $MASTER_ADDR:$MASTER_PORT \
+  #   # python: "${getpythoncmd:}"
+  #   # tasks_per_node: ${devices}
+  #   # cpus_per_task: 8
+  #   # python: 'python'
+  #   python_suffix: ' --dummy-arg $SLURM_JOB_ID" &'
+  #   signal: 'B:USR2@360'
+  #   post_srun_commands:
+  #     - ''
+  #     - wait
+  #   srun_args:
+  #     - '--jobid $SLURM_JOB_ID'
+  #   setup:
+  #     - |
+  #       export MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1)
+  #       export MASTER_PORT=$(( ($SLURM_JOB_ID % 20001) + 30000 ))
+  #       export NUM_PROCESSES=$((SLURM_NNODES * SLURM_GPUS_PER_NODE))
+  #       export NCCL_DEBUG=INFO
+  #       export NCCL_NSOCKS_PERTHREAD=4
+  #       export NCCL_SOCKET_NTHREADS=2
+  #       export OMP_NUM_THREADS=2
+  #       export PYTHONUNBUFFERED=1
+  #       export STDOUT_PATH=$(scontrol show job $SLURM_JOB_ID | grep -oP "StdOut=\K[^ ]+")
+  #       export LOCAL_JOB_FOLDER=$(dirname $STDOUT_PATH)
+  #       export NCCL_TOPO_DUMP_FILE="$LOCAL_JOB_FOLDER/nccl_topo.xml"
+  #       if [ -n "$SLURM_RESTART_COUNT" ]; then
+  #         export RESTART_COUNT=$SLURM_RESTART_COUNT
+  #       else
+  #         export RESTART_COUNT=0
+  #       fi
+  #       export MAIN_LOG_PATH="$LOCAL_JOB_FOLDER/log_$RESTART_COUNT.txt"
+  #       mkdir -p $LOCAL_JOB_FOLDER
+  #       printenv > "$LOCAL_JOB_FOLDER"/env_"$SLURM_LOCALID_$RESTART_COUNT.txt"
+  #       echo "ibstatus: $(ibstatus)"
+  #       echo "ibdev2netdev: $(ibdev2netdev)"
+  #       echo "rdma device: $(rdma link)"
+  #       echo "environment: $(env | grep NCCL)"
+  #       echo "NUM_PROCESSES: $NUM_PROCESSES, SLURM_NNODES: $SLURM_NNODES SLURM_GPUS_PER_NODE: $SLURM_GPUS_PER_NODE"
+  #       echo "NODE_ID: $SLURM_NODEID, SLURM_PROCID: $SLURM_PROCID, MASTER_ADDR: $MASTER_ADDR, MASTER_PORT: $MASTER_PORT"
+  #       echo "PWD: $PWD, LOCAL_JOB_FOLDER: $LOCAL_JOB_FOLDER, MAIN_LOG_PATH: $MAIN_LOG_PATH"
+  #       trap 'echo "SIGUSR2 received for $SLURM_JOB_ID"; \
+  #       if [ -n "$SLURM_ARRAY_JOB_ID" ]; then echo "SLURM_ARRAY_JOB_ID: $SLURM_ARRAY_JOB_ID"; fi; \
+  #       if [ -n "$SLURM_ARRAY_TASK_ID" ]; then echo "SLURM_ARRAY_TASK_ID: $SLURM_ARRAY_TASK_ID"; fi; \
+  #       # ps auxww | grep $USER; \
+  #       pid=$(pgrep -u $USER -f "python.*(accelerate|torchrun|deepspeed|distributed\.run).*dummy-arg $SLURM_JOB_ID"); \
+  #       echo "Found parent PIDs: $pid"; \
+  #       for p in $pid; do \
+  #         echo "Parent PID has cmd: $(ps -p $p -o cmd=)"; \
+  #         children=$(pgrep -P $p); \
+  #         echo "Children: $children"; \
+  #         if [ -n "$children" ]; then \
+  #           for child in $children; do \
+  #             ppid=$(ps -o ppid= -p $child | tr -d " ")
+  #             if [ "$ppid" -eq "$p" ]; then
+  #               echo "Killing direct child process: PID $child with cmd: $(ps -p $child -o cmd=)"
+  #               kill -USR2 $child &
+  #             else
+  #               echo "Skipping non-direct child process: PID $child with PPID $ppid"
+  #             fi
+  #           done; \
+  #           echo "Sent kill signals to children of $p"; \
+  #         else \
+  #           echo "No children found for $p"; \
+  #         fi; \
+  #       done; \
+  #       wait;' SIGUSR2
+checkpointing:
+  # Use custom `save_dir` if, e.g., saving to S3 bucket, otherwise leave this parameter as is
+  save_dir: ${cwd:}/checkpoints
+  # Note: `checkpoints` path should correspond to `checkpoint_every_n_steps.dirpath`
+  resume_from_ckpt: true
+  resume_ckpt_path: ${cwd:}/checkpoints
+  initial_resume_ckpt_path: null
+  resume_wandb: true
+  checkpoints_total_limit: 2
+  use_automatic_naming: false
+data:
+  cache_dir: ${oc.env:HF_DATASETS_CACHE,/grogu/user/mprabhud/aswerdlo/huggingface/datasets}
+  num_proc: ${eval:"max(len(__import__('os').sched_getaffinity(0)) // 4, 16)"}
+  cond_resolution: null
+  iterable: false
+  force_disable_shuffle: false
+  pin_dataset_to_gpu: false
+  webdataset_iterable: false
+  webdataset_train_data: null
+  webdataset_val_data: null
+  webdataset_train_num_samples: null
+  webdataset_val_num_samples: null
+  webdataset_indexed: false
+  dataset_type: null
+  keep_tensordict_on_disk: false
+  use_token_dataset: false
+  use_custom_tensordict_collate: false
+  use_weighted_tensordict_sampler: false
+  enable_cuda_in_tensordict_collate: true
+  data_dir_train: null
+  data_dir_val: null
+  token_output_dir: null
+  wrap_dataloaders: true
+  force_shuffle_train: false
+  move_tensordict_to_shm: false
+  keep_hf_dataset_in_memory: false
+  use_chameleon: false
+  tokenize_vqvae_in_dataloader: false
+  force_mp_spawn: false
+  force_raw_images_in_multiple_tensordict: false
+  disable_text_modality: false
+  txt_only: false
+  disable_mask_after_eos: false
+  allow_label: false
+  split_dataset: false
+  img_token_shift: ${model.text_vocab_size}
+  zero_shot_eval_dataset: null
+  require_sample_ids: false
+  use_packing_collate: false
+  dynamic_packing_lengths: false
+  remove_txt_img_padding: false
+  add_image_gen_tokens: false
+  use_slow_tokenizer: false
+  add_image_token: false
+dummyarg: null

configs/config_empty.yaml ADDED Viewed

	@@ -0,0 +1,8 @@

+defaults:
+  - _self_
+  - /model: small
+  - /experiments: []
+# from omegaconf import OmegaConf
+# with open("config.yaml", "w") as fp:
+#   OmegaConf.save(config=config, f=fp.name)

configs/experiments/ar.yaml ADDED Viewed

	@@ -0,0 +1,10 @@

+# @package _global_
+parameterization: ar
+trainer:
+  ar_shift: true
+model:
+  full_attention: false
+  use_flex_attention: false

configs/experiments/elm.yaml ADDED Viewed

	@@ -0,0 +1,15 @@

+# @package _global_
+backbone: elm
+data:
+  tokenizer_name_or_path: NousResearch/Llama-2-7b-hf
+model:
+  use_lora: false
+  full_attention: true
+  model_id: apple/OpenELM-270M # apple/OpenELM-1_1B
+trainer:
+  use_gradient_checkpointing: false
+  sd3_compile_config: false

configs/experiments/eval_model.yaml ADDED Viewed

	@@ -0,0 +1,21 @@

+# @package _global_
+mode: eval
+loader:
+  batch_size: 16
+  eval_batch_size: 16
+trainer:
+  disable_all_eval_generation: false
+eval:
+  compute_generative_perplexity: true
+  generate_samples: true
+  num_sample_batches: 20
+  log_every_n_fid: 1
+  log_every_n_evals: 1
+  compute_standalone_mauve: true
+  mauve_num_samples: 5000
+  # mauve_divergence_curve_discretization_size: 200 # works well for our repo
+  # mauve_scaling_factor: 2 # works well for our repo

configs/experiments/eval_text.yaml ADDED Viewed

	@@ -0,0 +1,26 @@

+# @package _global_
+mode: eval
+sampling:
+  steps: 100
+  max_sampling_steps: 100
+loader:
+  batch_size: 2
+  eval_batch_size: 2
+trainer:
+  fsdp: false
+eval:
+  perplexity_batch_size: 2
+  num_masking_viz_batches: 2
+  log_every_n_evals: 1
+  num_uncond_sample_batches: 2
+  num_sample_batches: 2
+  num_random_masking: 1
+  masking_batch_size: 2
+  cfg: null
+  generate_samples: true
+  compute_generative_perplexity: false

configs/experiments/eval_text_only.yaml ADDED Viewed

	@@ -0,0 +1,30 @@

+# @package _global_
+mode: eval
+debug: true
+sampling:
+  steps: 100
+  max_sampling_steps: 100
+loader:
+  batch_size: 2
+  eval_batch_size: 2
+trainer:
+  fsdp: false
+model:
+  image_model_fid_eval: false
+eval:
+  log_every_n_evals: 1
+  perplexity_batch_size: 2
+  num_uncond_sample_batches: 2
+  num_sample_batches: 2
+  num_masking_viz_batches: -1
+  num_random_masking: -1
+  masking_batch_size: -1
+  cfg: null
+  generate_samples: true
+  compute_generative_perplexity: true

configs/experiments/eval_unified.yaml ADDED Viewed

	@@ -0,0 +1,27 @@

+# @package _global_
+mode: eval
+devices: ${device_count:}
+sampling:
+  steps: 500
+  max_sampling_steps: 1000
+loader:
+  batch_size: 6
+  eval_batch_size: 6
+trainer:
+  fsdp: false
+  disable_all_eval_generation: false
+eval:
+  perplexity_batch_size: 6
+  num_masking_viz_batches: 12
+  log_every_n_evals: 1
+  num_uncond_sample_batches: 5
+  num_sample_batches: 2
+  num_random_masking: 3
+  masking_batch_size: 6
+  cfg: 6.0
+  generate_samples: false

configs/experiments/fid_cc12m.yaml ADDED Viewed

	@@ -0,0 +1,22 @@

+# @package _global_
+data:
+  keep_hf_dataset_in_memory: true
+  aggressive_aug: false
+  n_duplicate_train: null
+  n_duplicate_val: null
+  tokenize_vqvae_in_dataloader: false
+  enable_cuda_in_tensordict_collate: false
+  force_mp_spawn: false
+  keep_tensordict_on_disk: false
+  move_tensordict_to_shm: false
+  fid_dataset: cc12m_tokens_val_256
+  image_data_train: null
+  image_data_val: null
+  data_dir_train: ${data.data_dir_val}
+  data_dir_val:
+    - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/scratch_ssd_tokens/cc12m_tokens_val_256
+      weight: 1
+      name: ${data.fid_dataset}

configs/experiments/fid_datacomp1b.yaml ADDED Viewed

	@@ -0,0 +1,22 @@

+# @package _global_
+data:
+  keep_hf_dataset_in_memory: true
+  aggressive_aug: false
+  n_duplicate_train: null
+  n_duplicate_val: null
+  tokenize_vqvae_in_dataloader: false
+  enable_cuda_in_tensordict_collate: false
+  force_mp_spawn: false
+  keep_tensordict_on_disk: false
+  move_tensordict_to_shm: false
+  fid_dataset: datacomp1b_8_magvit_val
+  image_data_train: null
+  image_data_val: null
+  data_dir_train: ${data.data_dir_val}
+  data_dir_val:
+    - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/scratch_ssd_tokens/datacomp1b_8_magvit_val
+      weight: -1
+      name: ${data.fid_dataset}

configs/experiments/fid_hf.yaml ADDED Viewed

	@@ -0,0 +1,25 @@

+# @package _global_
+data:
+  disable_text_modality: false
+  keep_hf_dataset_in_memory: true
+  aggressive_aug: false
+  n_duplicate_train: null
+  n_duplicate_val: null
+  data_dir_train: []
+  data_dir_val: []
+  fid_dataset: sayakpaul/coco-30-val-2014
+  train: combined_tokens
+  val: {.train}
+  image_data_val:
+    - val: ${data.fid_dataset}
+      weight: -1
+      name: ${.val}
+      tokenize_vqvae_in_dataloader: false
+      raw_images: true
+  image_data_train:
+    - train: ${data.fid_dataset}
+      weight: -1
+      name: ${.train}
+      tokenize_vqvae_in_dataloader: false
+      raw_images: true

configs/experiments/jan_cub.yaml ADDED Viewed

	@@ -0,0 +1,51 @@

+# @package _global_
+defaults:
+  - override /model: medium
+  - override /lr_scheduler: cosine_with_hard_restarts_schedule_with_warmup
+loader:
+  batch_size: 16
+  eval_batch_size: 16
+  desired_global_batch_size: 128
+  num_workers: 4
+trainer:
+  ckpt_steps: 5000
+  val_check_interval: 100
+  use_legacy_update_batch_fn: true
+  mask_txt_only: true
+  mask_entire_modality: 0.15
+  ema: 0.9999
+  use_custom_ema: true
+  force_enable_checkpointing: true
+  skip_early_checkpointing: false
+  force_after_eos_padding: false
+checkpointing:
+  checkpoints_total_limit: 20
+lr_scheduler:
+  num_warmup_steps: 10000
+  num_training_steps: 400000
+  num_cycles: 80
+data:
+  resolution: 256
+  train: cub2011_custom
+  use_weighted_tensordict_sampler: false
+model:
+  vae_type: titok128
+  txt_length: 18
+  img_length: 128
+  rope_2d: false
+  force_text_vocab_size: 5450
+  text_vocab_size: 5451
+  image_vocab_size: 8192
+  attn_dropout: 0.1
+optim:
+  lr: 1.0e-04
+  weight_decay: 0.2
+  beta2: 0.99

configs/experiments/large_maskdit_exp.yaml ADDED Viewed

	@@ -0,0 +1,7 @@

+# @package _global_
+defaults:
+  - override /model: large_maskdit
+backbone: maskdit

configs/experiments/large_scale_high_res_interleaved_inference.yaml ADDED Viewed

	@@ -0,0 +1,51 @@

+# @package _global_
+debug: true
+seed: 163
+loader:
+  eval_batch_size: 1
+  batch_size: 1
+data:
+  move_tensordict_to_shm: false
+  resolution: 1024
+  disable_mask_after_eos: true
+  disable_packing: true
+  data_dir_val:
+    - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/matrix/HPDv2_image_reward_v1_v2_v3/train
+      weight: 1.0
+      name: HPDv2_image_reward_512
+model:
+  img_length: 4096
+  txt_length: 1024
+  length: 5120
+trainer:
+  compile: false
+  limit_val_batches: 2
+  fsdp: false
+  force_full_attention_mask: true
+  force_null_sigma: true
+  allow_null_sigma: true
+eval:
+  num_sample_batches: 1
+  num_random_masking: 0
+  num_masking_viz_batches: 0
+  limit_val_batches_manual: 1
+  num_uncond_sample_batches: 10
+  eval_large_batch: 10
+  val_with_train_data: false
+  maskgit_r_temp: 4.5
+  half_uncond: false
+  cfg: 3.0
+  return_interleaved_modalities_split: true
+  static_img_txt_demo: true
+  visualize_sample: true
+sampling:
+  steps: 50
+  max_sampling_steps: 50
+  predictor: "maskgit"

configs/experiments/large_scale_train.yaml ADDED Viewed

	@@ -0,0 +1,151 @@

+# @package _global_
+defaults:
+  - vq16_t2i
+  - override /model: extra_large
+data:
+  train: combined_tokens
+  valid: ${.train}
+  precache: false
+  streaming: false
+  resolution: 256
+  block_size: 128
+  tokenizer_name_or_path: NousResearch/Llama-2-7b-hf
+  wrap: true
+  iterable: false
+  webdataset_iterable: false
+  webdataset_indexed: false
+  unpaired: false
+  dataset_type: null
+  tokens_flip_collate: false
+  n_val_samples: null
+  n_train_samples: null
+  n_duplicate_train: null
+  n_duplicate_val: null
+  raw_data_dir: null
+  save_train_dataloader: true
+  save_validation_dataloader: true
+  tokenizers_parallelism: false
+  token_data_dir: null
+  force_disable_shuffle: false
+  use_custom_tensordict_collate: true
+  use_weighted_tensordict_sampler: true
+  force_mp_spawn: false
+  enable_cuda_in_tensordict_collate: false
+  use_token_dataset: true
+  keep_tensordict_on_disk: true
+  move_tensordict_to_shm: false
+  add_text_to_weighted_sampler: false
+  data_dir_train:
+  # - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/matrix/HPDv2_image_reward_v1_v2_v3/train
+  #   weight: 15.0
+  #   name: hpdv2
+  - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/07_31_2024_matrix/pixelprose_tokens
+    weight: 1.0
+    name: pixelprose
+  - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/07_31_2024_grogu/journeydb_train
+    weight: 10.0
+    name: journeydb_train
+  - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/07_31_2024_grogu/datacomp_1b_datacomp1b_0_tokens
+    weight: 1.0
+    name: datacomp0
+  - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/07_31_2024_grogu/datacomp_1b_datacomp1b_1_tokens
+    weight: 1.0
+    name: datacomp1
+  - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/07_31_2024_matrix/datacomp_1b_datacomp1b_2_tokens
+    weight: 1.0
+    name: datacomp2
+  - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/07_31_2024_grogu/datacomp_1b_datacomp1b_3_tokens
+    weight: 1.0
+    name: datacomp3
+  - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/07_31_2024_matrix/datacomp_1b_datacomp1b_4_tokens
+    weight: 1.0
+    name: datacomp4
+  - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/07_31_2024_matrix/datacomp_1b_datacomp1b_5_tokens
+    weight: 1.0
+    name: datacomp5
+  - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/07_31_2024_grogu/datacomp_1b_datacomp1b_6_tokens
+    weight: 1.0
+    name: datacomp6
+  data_dir_val:
+  - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/07_31_2024_matrix/pixelprose_tokens
+    weight: 1.0
+    name: dummy_1
+model:
+  img_length: ${eval:'(${data.resolution} // ${model.downscale_ratio})**2'}
+  txt_length: ${eval:'${data.block_size} if ${.unified_model} else 0'}
+  length: ${eval:'${.txt_length} + ${.img_length}'}
+  unified_model: true
+  image_model: true
+  text_model: true
+  image_model_fid_eval: false
+  force_argmax_valid_indices: true
+  use_pretrained_img_emb: false
+  rope_2d: true
+  modality_embed: true
+  norm_type: rms
+  qk_norm: true
+  sandwich_normalization: true
+  text_vocab_size: 32001
+loader:
+  batch_size: 8
+  eval_batch_size: ${eval:'${.batch_size} // 2'}
+  desired_global_batch_size: 512
+  persistent_workers: true
+  pin_memory: false
+  num_workers: 0
+  num_eval_workers: 0
+eval:
+  log_every_n_evals: -1
+  log_every_n_fid: -1
+  limit_val_batches_manual: 16
+  generate_samples: true
+  compute_generative_perplexity: false
+  perplexity_batch_size: ${loader.eval_batch_size}
+  cfg: 5.0
+  num_val_metrics_standalone_samples: -1
+  num_val_metrics_standalone_batches_per_device: -1
+  auto_enhance_reward_config:
+    dfn_score: 1.0
+    laion_aesthetic_score: 1.0
+trainer:
+  log_flops: false
+  log_every_n_steps: 10
+  custom_ddp_bf16: true
+  log_seperate_modal_losses: true
+  limit_val_batches: 16
+  softmin_snr: 5
+  text_loss_weight: 1.0
+  img_loss_weight: 0.6
+  use_gradient_checkpointing: false
+  ckpt_steps: 20000
+  ckpt_every_n_minutes: 180
+  ckpt_recent_timeout_minutes: 10
+  use_custom_ema: false
+  ema: 0.0
+  fsdp: true
+  restart_on_failure: true
+  eval_on_start: false
+  val_check_interval: 100000000000
+  scale_lr_by_batch_size: false
+  watch_gradients: false
+  compile: true
+  mask_entire_modality: 0.15
+  compile_flag_pos_emb: true
+  multimodal_batches: true
+optim:
+  lr: 0.0001
+sampling:
+  steps: 128
+  num_sample_batches: 2
+wandb:
+  mode: online
+checkpointing:
+  checkpoints_total_limit: 10
+  use_automatic_naming: false
+lr_scheduler:
+  num_warmup_steps: 10000

configs/experiments/large_scale_train_high_res.yaml ADDED Viewed

	@@ -0,0 +1,39 @@

+# @package _global_
+data:
+  resolution: 512
+  data_dir_train:
+    - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/matrix/HPDv2_image_reward_v1_v2_v3/train
+      weight: 1
+      name: HPDv2_image_reward_512
+    - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/grogu/pick_score_sac_prompts_v1_v2_v3_512
+      weight: 2
+      name: pick_score_sac_prompts_v1_v2_v3_512
+    - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/scratch_ssd_tokens/datacomp1b_7_512
+      weight: 0.5
+      name: datacomp1b_7_512
+    - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/grogu/text/slimpajama6b
+      weight: 2.5
+      name: slimpajama6b
+  data_dir_val:
+    - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/matrix/gecko_eval_512
+      weight: 1.0
+      name: gecko_eval_512
+trainer:
+  text_loss_weight: 1.0
+  img_loss_weight: 0.5
+  force_full_attention_mask: true
+  mask_entire_modality: 0.1
+loader:
+  pin_memory: false
+  num_workers: 4
+  num_eval_workers: 4
+lr_scheduler:
+  num_warmup_steps: 5000
+model:
+  linear_factor: 2

configs/experiments/large_scale_train_high_res_inference.yaml ADDED Viewed

	@@ -0,0 +1,30 @@

+# @package _global_
+data:
+  use_token_dataset: true
+  disable_mask_after_eos: true
+  move_tensordict_to_shm: false
+trainer:
+  compile_flag_pos_emb: true
+  multimodal_batches: true
+  allow_null_sigma: true
+eval:
+  num_sample_batches: 1
+  num_random_masking: 0
+  num_masking_viz_batches: 0
+  limit_val_batches_manual: 1
+  num_uncond_sample_batches: 10
+  eval_large_batch: 10
+  val_with_train_data: false
+  maskgit_r_temp: 4.5
+  half_uncond: false
+  cfg: 3.0
+  static_img_txt_demo: true
+  visualize_sample: true
+sampling:
+  steps: 50
+  max_sampling_steps: 50
+  predictor: "maskgit"

configs/experiments/large_scale_train_high_res_interleaved.yaml ADDED Viewed

	@@ -0,0 +1,105 @@

+# @package _global_
+data:
+  move_tensordict_to_shm: false
+  enable_cuda_in_tensordict_collate: false
+  force_mp_spawn: false
+  resolution: 512
+  add_text_to_weighted_sampler: false
+  add_image_gen_tokens: true
+  use_packing_collate: true
+  dynamic_packing_lengths: true
+  remove_txt_img_padding: true
+  require_sample_ids: true
+  block_size: ${model.length}
+  disable_mask_after_eos: true
+  add_image_token: true
+  use_slow_tokenizer: true
+  force_seed: true
+  data_dir_train:
+    - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/matrix/HPDv2_image_reward_v1_v2_v3/train
+      weight: 0.5
+      name: HPDv2_image_reward_v1_v2_v3 # 3593248
+    - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/grogu/pick_score_sac_prompts_v1_v2_v3_512
+      weight: 1.0
+      name: pick_score_sac_prompts_v1_v2_v3_512 # 9330810
+    - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/07_31_2024_matrix/pixelprose_tokens
+      weight: 1.0
+      name: pixelprose_tokens # 6627589
+    - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/babel/cambrian_10m_v5
+      weight: 1.0
+      name: cambrian_10m_v5 # 8215264
+    - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/scratch_ssd_tokens/datacomp1b_7_512
+      weight: 1.0
+      name: datacomp1b_7_512 # 23955209
+    - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/07_31_2024_matrix/datacomp_1b_datacomp1b_2_tokens
+      weight: 0.5
+      name: datacomp_1b_datacomp1b_2_tokens # 10161505
+    - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/07_31_2024_matrix/datacomp_1b_datacomp1b_4_tokens
+      weight: 0.5
+      name: datacomp_1b_datacomp1b_4_tokens # 27895717
+    - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/babel/mmc4_fewer_faces_v0
+      weight: 2.0
+      name: mmc4_fewer_faces_v0 # 22605524
+    - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/07_31_2024_matrix/datacomp_1b_datacomp1b_5_tokens
+      weight: 0.5
+      name: datacomp_1b_datacomp1b_5_tokens
+    - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/07_31_2024_grogu/datacomp_1b_datacomp1b_0_tokens
+      weight: 0.5
+      name: datacomp_1b_datacomp1b_0_tokens
+    - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/07_31_2024_grogu/datacomp_1b_datacomp1b_1_tokens
+      weight: 0.5
+      name: datacomp_1b_datacomp1b_1_tokens
+    - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/babel/cosmopedia_2_v0
+      weight: 1.0
+      name: cosmopedia_v2
+    - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/babel/fineweb_edu_dedup_v0
+      weight: 1.0
+      name: fineweb_edu_dedup
+  data_dir_val:
+    - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/matrix/gecko_eval_512
+      weight: 1.0
+      name: gecko_eval_512
+trainer:
+  text_loss_weight: 1.0
+  img_loss_weight: 0.2
+  mask_entire_modality: 0.2
+  force_full_attention_mask: false
+  force_full_attention_mask_loss_only: false
+  disable_all_eval_generation: true
+  interleaved: true
+  interleaved_training_flex_attention: true
+  force_convert_to_dict: true
+  val_check_interval: -1
+  use_gradient_checkpointing: true
+  disable_all_checkpointing: false
+  set_max_txt_loss_ratio: true
+  gradient_clip_val: 1.0
+  skip_early_checkpointing: false
+  bypass_load_from_state_dicts_if_resuming: true
+loader:
+  num_workers: 4
+  num_eval_workers: 4
+lr_scheduler:
+  num_warmup_steps: 5000
+model:
+  linear_factor: 2
+  use_flex_attention: true
+  use_spda_attn: true
+  length: 1536
+  txt_length: ${.length}
+  img_length: ${.length}
+eval:
+  generate_samples: false
+  disable_visualization: true

configs/experiments/maskgit.yaml ADDED Viewed

	@@ -0,0 +1,6 @@

+# @package _global_
+model:
+  downscale_ratio: 16
+  image_vocab_size: 1024
+  vae_type: maskgit

configs/experiments/master_eval.yaml ADDED Viewed

	@@ -0,0 +1,49 @@

+# @package _global_
+mode: eval
+eval:
+  fid_samples: 4096
+  max_num_fid_batches_per_device: ${eval:'max(${eval.fid_samples} // (${trainer.devices} * ${loader.eval_batch_size}), 1)'}
+  compute_generative_perplexity: true
+  generate_samples: true
+  log_every_n_fid: 1
+  log_every_n_evals: 1
+  class_conditional_fid: false
+  txt_conditional_fid: true
+  calculate_clip_score: true
+  cfg: 5
+  num_sample_batches: 2
+  compute_standalone_mauve: false
+  mauve_num_samples: -1
+  set_random_gen_seed: true
+  # gen_ppl_eval_model_name_or_path: 'meta-llama/Meta-Llama-3-8B'
+  compute_img_to_txt_mauve_clip: true
+  compute_img_to_txt_mauve_during_unconditional_fid: true
+  force_eval_uncond: true
+  ablation_config: true
+  compute_val_metrics_standalone: true
+  num_val_metrics_standalone_samples: 2000
+trainer:
+  disable_all_eval_generation: false
+  force_after_eos_padding: true
+model:
+  image_model_fid_eval: true
+  use_kv_cache: ${is_ar:${parameterization}}
+loader:
+  batch_size: 64
+  eval_batch_size: 64
+  num_workers: 0
+  num_eval_workers: 1
+sampling:
+  steps: ${model.length}
+  max_sampling_steps: ${sampling.steps}
+  sampling_step_frac: null
+data:
+  fid_dataset: null

configs/experiments/mscoco_fid.yaml ADDED Viewed

	@@ -0,0 +1,21 @@

+# @package _global_
+data:
+  disable_text_modality: false
+  keep_hf_dataset_in_memory: true
+  aggressive_aug: false
+  n_duplicate_train: null
+  n_duplicate_val: null
+  data_dir_train: []
+  data_dir_val: []
+  image_data_train: ${data.image_data_val}
+  image_data_val:
+    - val: sayakpaul/coco-30-val-2014
+      weight: -1
+      name: mscoco_val
+      tokenize_vqvae_in_dataloader: false
+      raw_images: true
+eval:
+  compute_generative_perplexity: true
+  generate_samples: true

configs/experiments/paired_standalone_fid_eval.yaml ADDED Viewed

	@@ -0,0 +1,29 @@

+# @package _global_
+mode: eval
+debug: true
+eval:
+  fid_samples: 4096
+  max_num_fid_batches_per_device: ${eval:'max(${eval.fid_samples} // (${trainer.devices} * ${loader.eval_batch_size}), 1)'}
+  compute_generative_perplexity: false
+  generate_samples: false
+  log_every_n_fid: 1
+  log_every_n_evals: 1
+  class_conditional_fid: false
+  txt_conditional_fid: true
+  calculate_clip_score: true
+  cfg: 5
+model:
+  image_model_fid_eval: true
+loader:
+  eval_batch_size: 32
+sampling:
+  steps: ${model.length}
+  max_sampling_steps: ${model.length}
+data:
+  keep_hf_dataset_in_memory: false

configs/experiments/small_scale_train.yaml ADDED Viewed

	@@ -0,0 +1,187 @@

+# @package _global_
+defaults:
+  - vq16_magvit
+  - override /model: small
+  - override /lr_scheduler: constant_warmup_cosine_decay
+model:
+  img_length: ${eval:'(${data.resolution} // ${model.downscale_ratio})**2'}
+  txt_length: ${eval:'${data.block_size} if ${.unified_model} else 0'}
+  length: ${eval:'${.txt_length} + ${.img_length}'}
+  image_model: true
+  text_model: true
+  unified_model: true
+  image_model_fid_eval: false
+  force_argmax_valid_indices: true
+  use_pretrained_img_emb: false
+  codebook_embed_dim: 256
+  qk_norm: true
+  norm_type: rms
+  sandwich_normalization: true
+  zero_linear_init: false
+  modality_embed: true
+  rope_2d: false
+  use_spda_attn: true
+  force_optimized_native_attn: true
+  freeze_txt_emb: false
+  add_labels: null
+  txt_dropout: null
+  text_vocab_size: 32001
+data:
+  train: combined_tokens
+  valid: ${.train}
+  n_duplicate_train: null
+  wrap: true
+  streaming: false
+  precache: false
+  tokenizer_name_or_path: NousResearch/Llama-2-7b-hf
+  resolution: 256
+  block_size: 128
+  n_val_samples: null
+  unpaired: false
+  n_duplicate_val: null
+  save_train_dataloader: true
+  save_validation_dataloader: true
+  iterable: false
+  webdataset_iterable: false
+  webdataset_indexed: false
+  dataset_type: null
+  tokens_flip_collate: false
+  n_train_samples: null
+  raw_data_dir: null
+  tokenizers_parallelism: false
+  token_data_dir: null
+  force_disable_shuffle: false
+  keep_tensordict_on_disk: true
+  use_custom_tensordict_collate: true
+  force_mp_spawn: false
+  enable_cuda_in_tensordict_collate: false
+  use_weighted_tensordict_sampler: true
+  fraction_txt_data: 0.0
+  tokenize_vqvae_in_dataloader: false
+  use_token_dataset: true
+  image_dataset: tglcourse/lsun_church_train
+  image_data_train: null
+  image_data_val: null
+  keep_hf_dataset_in_memory: true
+  allow_label: false
+  disable_text_modality: true
+  force_raw_train_images: false
+  aggressive_aug: true
+  allow_aug_vqvae_dataloader: true
+  move_tensordict_to_shm: false
+  data_dir_train:
+    - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/scratch_ssd_tokens/datacomp1b_8_magvit
+      weight: -1
+      name: datacomp1b_8_magvit_train
+    - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/scratch_ssd_tokens/cc12m_tokens_train_256
+      weight: -1
+      name: cc12m_tokens_train_256
+    - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/grogu/HPDv2_image_reward_v1_v2_v3_magvit
+      weight: -1
+      name: HPDv2_image_reward_v1_v2_v3_magvit
+    - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/grogu/pick_score_sac_prompts_v1_v2_v3_magvit
+      weight: -1
+      name: pick_score_sac_prompts_v1_v2_v3_magvit
+    - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/grogu/datacomp1b_0_1_6_magvit
+      weight: -1
+      name: datacomp1b_0_1_6_magvit
+    - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/grogu/laion400m_magvit_part_0
+      weight: -1
+      name: laion400m_magvit_part_0
+    - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/grogu/laion400m_magvit_part_1
+      weight: -1
+      name: laion400m_magvit_part_1
+  data_dir_val:
+    - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/scratch_ssd_tokens/datacomp1b_8_magvit_val
+      weight: 1
+      name: datacomp1b_8_magvit_val
+    - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/scratch_ssd_tokens/cc12m_tokens_val_256
+      weight: 1
+      name: cc12m_tokens_val_256
+eval:
+  generate_samples: true
+  compute_generative_perplexity: true
+  log_every_n_evals: 10
+  log_every_n_fid: 20
+  limit_val_batches_manual: 16
+  perplexity_batch_size: ${loader.eval_batch_size}
+  num_masking_viz_batches: -1
+  cfg: null
+  class_conditional_fid: false
+  force_cfg_value: true
+  split_cfg_batches: true
+  max_num_fid_batches_per_device: ${eval:'8192 // (${trainer.devices} * ${loader.eval_batch_size})'}
+  fid_mode: clean
+  clean_fid_precomputed_name: lsun_church
+  clean_fid_precomputed_split: trainfull
+  clean_fid_precomputed_res: 256
+trainer:
+  log_every_n_steps: 10
+  val_check_interval: 1000
+  custom_ddp_bf16: true
+  scale_lr_by_batch_size: false
+  limit_val_batches: 16
+  use_gradient_checkpointing: false
+  log_seperate_modal_losses: true
+  softmin_snr: 5
+  text_loss_weight: 1.0
+  img_loss_weight: null
+  low_precision_loss: false
+  compile: true
+  multimodal_batches: true
+  compile_fullgraph: false
+  log_grad_norm_every_n_steps: 10
+  mask_entire_modality: 0.1
+  force_shift_image_batches: false
+  ckpt_steps: 10000
+  ckpt_every_n_minutes: -1
+  ignore_text_in_unified: false
+  disable_all_eval_generation: true
+  eval_on_start: false
+  ckpt_model_only: false
+  ema: 0.0
+  use_custom_ema: false
+  log_flops: false
+  disable_distributed_torchmetrics: true
+  restart_on_failure: true
+  force_null_sigma: true
+  allow_null_sigma: true
+  compile_flag_pos_emb: true
+  add_label: false
+  first_token_dropout: null
+  force_shift_raw_image_batches: true
+  txt_dropout: 0.1
+  force_full_attention_mask_loss_only: true
+optim:
+  lr: 0.0003
+  weight_decay: 0.05
+loader:
+  batch_size: 64
+  eval_batch_size: ${loader.batch_size}
+  num_workers: 4
+  desired_global_batch_size: 512
+  persistent_workers: true
+  pin_memory: true
+  num_eval_workers: 1
+sampling:
+  steps: ${model.length}
+  num_sample_batches: 2
+  max_sampling_steps: ${model.length}
+wandb:
+  mode: online
+lr_scheduler:
+  num_warmup_steps: 5000
+  num_training_steps: ${trainer.max_steps}
+checkpointing:
+  checkpoints_total_limit: 10

configs/experiments/small_scale_train_caching.yaml ADDED Viewed

	@@ -0,0 +1,186 @@

+# @package _global_
+defaults:
+  - /model: small
+model:
+  downscale_ratio: 16
+  image_vocab_size: 8192
+  vae_type: magvit
+  use_custom_vae_ckpt: null
+  custom_vae_name: null
+  img_length: 256
+  txt_length: 128
+  image_model: true
+  text_model: true
+  unified_model: true
+  image_model_fid_eval: false
+  force_argmax_valid_indices: true
+  use_pretrained_img_emb: false
+  codebook_embed_dim: 256
+  qk_norm: true
+  norm_type: rms
+  sandwich_normalization: true
+  zero_linear_init: false
+  modality_embed: true
+  rope_2d: false
+  use_spda_attn: true
+  force_optimized_native_attn: true
+  freeze_txt_emb: false
+  add_labels: null
+  txt_dropout: null
+  text_vocab_size: 32001
+  use_flex_attention: true
+  flex_attention_txt_masking_prob: 0.1
+  flex_attention_img_masking_prob: 0.1
+  linear_factor: 1
+data:
+  train: combined_tokens
+  valid: ${.train}
+  n_duplicate_train: null
+  wrap: true
+  streaming: false
+  precache: false
+  tokenizer_name_or_path: NousResearch/Llama-2-7b-hf
+  resolution: 256
+  block_size: 128
+  n_val_samples: null
+  unpaired: false
+  n_duplicate_val: null
+  save_train_dataloader: true
+  save_validation_dataloader: true
+  iterable: false
+  webdataset_iterable: false
+  webdataset_indexed: false
+  dataset_type: null
+  tokens_flip_collate: false
+  n_train_samples: null
+  raw_data_dir: null
+  tokenizers_parallelism: false
+  token_data_dir: null
+  force_disable_shuffle: false
+  keep_tensordict_on_disk: true
+  use_custom_tensordict_collate: true
+  force_mp_spawn: false
+  enable_cuda_in_tensordict_collate: false
+  use_weighted_tensordict_sampler: true
+  fraction_txt_data: 0.0
+  data_dir_train:
+  - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/scratch_ssd_tokens/datacomp1b_8_magvit
+    weight: -1
+    name: datacomp1b_8_magvit_train
+  - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/scratch_ssd_tokens/cc12m_tokens_train_256
+    weight: -1
+    name: cc12m_tokens_train_256
+  - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/grogu/HPDv2_image_reward_v1_v2_v3_magvit
+    weight: -1
+    name: HPDv2_image_reward_v1_v2_v3_magvit
+  - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/grogu/pick_score_sac_prompts_v1_v2_v3_magvit
+    weight: -1
+    name: pick_score_sac_prompts_v1_v2_v3_magvit
+  - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/grogu/datacomp1b_0_1_6_magvit
+    weight: -1
+    name: datacomp1b_0_1_6_magvit
+  - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/grogu/laion400m_magvit_part_0
+    weight: -1
+    name: laion400m_magvit_part_0
+  - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/grogu/laion400m_magvit_part_1
+    weight: -1
+    name: laion400m_magvit_part_1
+  data_dir_val:
+  - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/scratch_ssd_tokens/datacomp1b_8_magvit_val
+    weight: 1
+    name: datacomp1b_8_magvit_val
+  - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/scratch_ssd_tokens/cc12m_tokens_val_256
+    weight: 1
+    name: cc12m_tokens_val_256
+  tokenize_vqvae_in_dataloader: false
+  val:
+    .train: null
+  use_token_dataset: true
+  image_dataset: tglcourse/lsun_church_train
+  image_data_train: null
+  image_data_val: null
+  keep_hf_dataset_in_memory: true
+  allow_label: false
+  disable_text_modality: true
+  force_raw_train_images: false
+  aggressive_aug: true
+  allow_aug_vqvae_dataloader: true
+  move_tensordict_to_shm: false
+  force_full_attention_mask: false
+eval:
+  generate_samples: false
+  compute_generative_perplexity: false
+  log_every_n_evals: 10
+  log_every_n_fid: 20
+  limit_val_batches_manual: 16
+  perplexity_batch_size: ${loader.eval_batch_size}
+  num_masking_viz_batches: -1
+  max_num_fid_batches_per_device: ${eval:'8192 // (${trainer.devices} * ${loader.eval_batch_size})'}
+  cfg: null
+  class_conditional_fid: false
+  force_cfg_value: true
+  split_cfg_batches: true
+  fid_mode: clean
+  clean_fid_precomputed_name: lsun_church
+  clean_fid_precomputed_split: trainfull
+  clean_fid_precomputed_res: 256
+trainer:
+  log_every_n_steps: 10
+  val_check_interval: 1000
+  custom_ddp_bf16: true
+  scale_lr_by_batch_size: false
+  limit_val_batches: 16
+  use_gradient_checkpointing: false
+  log_seperate_modal_losses: true
+  softmin_snr: 5
+  text_loss_weight: 1.0
+  img_loss_weight: null
+  low_precision_loss: false
+  compile: false
+  multimodal_batches: true
+  compile_fullgraph: false
+  log_grad_norm_every_n_steps: 10
+  mask_entire_modality: 0.1
+  force_shift_image_batches: false
+  ckpt_steps: 10000
+  ckpt_every_n_minutes: -1
+  ignore_text_in_unified: false
+  disable_all_eval_generation: false
+  eval_on_start: false
+  ckpt_model_only: false
+  ema: 0.0
+  use_custom_ema: false
+  log_flops: false
+  disable_distributed_torchmetrics: true
+  restart_on_failure: true
+  force_null_sigma: true
+  allow_null_sigma: true
+  compile_flag_pos_emb: true
+  add_label: false
+  first_token_dropout: null
+  force_shift_raw_image_batches: true
+  txt_dropout: 0.1
+  disable_ddp_optimizer: true
+optim:
+  lr: 0.0003
+  weight_decay: 0.05
+loader:
+  batch_size: 64
+  eval_batch_size: ${loader.batch_size}
+  num_workers: 1
+  desired_global_batch_size: 512
+  persistent_workers: true
+  pin_memory: true
+  num_eval_workers: 1
+sampling:
+  steps: ${model.length}
+  num_sample_batches: 2
+  max_sampling_steps: ${model.length}
+wandb:
+  mode: online
+lr_scheduler:
+  num_warmup_steps: 5000
+checkpointing:
+  checkpoints_total_limit: 4

configs/experiments/small_text_only.yaml ADDED Viewed

	@@ -0,0 +1,28 @@

+# @package _global_
+defaults:
+  - lsun_text8_exp_2
+  - owt_only
+  - override /model: small
+backbone: dit
+loader:
+  batch_size: 64
+trainer:
+  val_check_interval: 10000
+  ckpt_steps: 10000
+  softmin_snr: null
+optim:
+  fused: true
+  weight_decay: 0.03
+sampling:
+  num_sample_batches: 4
+  max_sampling_steps: 256
+model:
+  txt_length: 1024

configs/experiments/standalone_fid_eval.yaml ADDED Viewed

	@@ -0,0 +1,18 @@

+# @package _global_
+mode: eval
+debug: true
+eval:
+  max_num_fid_batches_per_device: ${eval:'4096 // (${trainer.devices} * ${loader.eval_batch_size})'}
+  compute_generative_perplexity: false
+  generate_samples: false
+  log_every_n_fid: 1
+  log_every_n_evals: 1
+loader:
+  eval_batch_size: 32
+sampling:
+  steps: 500
+  max_sampling_steps: 500

configs/experiments/titok.yaml ADDED Viewed

	@@ -0,0 +1,8 @@

+# @package _global_
+data:
+  resolution: 256
+  downscale_ratio: 16
+model:
+  vae_type: titok

configs/experiments/titok_sl256.yaml ADDED Viewed

	@@ -0,0 +1,7 @@

+# @package _global_
+data:
+  resolution: 256
+model:
+  vae_type: titok

configs/experiments/txt_only.yaml ADDED Viewed

	@@ -0,0 +1,21 @@

+# @package _global_
+data:
+  streaming: False
+  unpaired: false
+trainer:
+  img_loss_weight: null
+  text_loss_weight: null
+model:
+  use_pretrained_img_emb: false
+  image_model_fid_eval: false
+  unified_model: false
+  image_model: false
+  txt_length: 256
+  img_length: 0
+eval:
+  log_every_n_evals: -1
+  log_every_n_fid: -1

configs/experiments/unified.yaml ADDED Viewed

	@@ -0,0 +1,23 @@

+# @package _global_
+data:
+  zero_shot_eval_dataset: "nlphuji/flickr30k"
+  precache: False
+  tokenizers_parallelism: False # parallelism causes some weird error
+  n_val_samples: 2048
+  block_size: 128
+model:
+  unified_model: True
+  text_model: true
+checkpointing:
+  resume_from_ckpt: True
+  load_from_text_model: "ckpts/unidisc-owt/model.safetensors"
+loader:
+  batch_size: 12
+trainer:
+  val_check_interval: 2000
+  log_seperate_modal_losses: true

configs/experiments/vq16.yaml ADDED Viewed

	@@ -0,0 +1,9 @@

+# @package _global_
+model:
+  downscale_ratio: 16
+  image_vocab_size: 16384
+  vae_type: VQ-16
+  use_custom_vae_ckpt: null
+  custom_vae_name: null
+  img_length: ${eval:'(${data.resolution} // ${model.downscale_ratio})**2'}

configs/experiments/vq16_1024.yaml ADDED Viewed

	@@ -0,0 +1,8 @@

+# @package _global_
+model:
+  downscale_ratio: 16
+  image_vocab_size: 1024
+  codebook_embed_dim: 256
+  vae_type: VQ-16
+  use_custom_vae_ckpt: ${oc.env:DIFFUSION_DATA_DIR}/ckpts/2024-07-03-01-10-53_022-VQ-16_0042000.pt

configs/experiments/vq16_magvit.yaml ADDED Viewed

	@@ -0,0 +1,9 @@

+# @package _global_
+model:
+  downscale_ratio: 16
+  image_vocab_size: 8192
+  vae_type: magvit
+  use_custom_vae_ckpt: null
+  custom_vae_name: null
+  img_length: ${eval:'(${data.resolution} // ${model.downscale_ratio})**2'}

configs/experiments/vq16_t2i.yaml ADDED Viewed

	@@ -0,0 +1,10 @@

+# @package _global_
+model:
+  downscale_ratio: 16
+  image_vocab_size: 16384
+  vae_type: VQ-16
+  use_custom_vae_ckpt: ${get_repo_dir:}/ckpts/vq_ds16_t2i.pt
+  custom_vae_name: _t2i
+  codebook_embed_dim: 8
+  img_length: ${eval:'(${data.resolution} // ${model.downscale_ratio})**2'}

configs/experiments/webdataset.yaml ADDED Viewed

	@@ -0,0 +1,12 @@

+# @package _global_
+data:
+  train: datacomp1b_indexed
+  valid: ${.train}
+  iterable: false
+  webdataset_iterable: false
+  webdataset_indexed: true
+  unpaired: false
+  dataset_type: null
+  tokens_flip_collate: false

configs/experiments/zero_shot_eval.yaml ADDED Viewed

	@@ -0,0 +1,29 @@

+# @package _global_
+mode: zero-shot-eval
+data:
+  # train: "nlphuji/flickr30k"
+  train: "facebook/winoground"
+  precache: False
+  tokenizers_parallelism: False # parallelism causes some weird error
+  n_val_samples: 2048
+  block_size: 128
+  disable_text_modality: false
+eval:
+  cfg: 5
+  compute_val_metrics_standalone: false
+  compute_img_to_txt_mauve_clip: false
+loader:
+  batch_size: 16
+  eval_batch_size: 16
+model:
+  unified_model: True
+  text_model: true
+  image_model: true
+  vae_type: magvit
+  force_optimized_native_attn: false

configs/lr_scheduler/constant_warmup.yaml ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ _target_: transformers.get_constant_schedule_with_warmup
2	+ num_warmup_steps: 2500

configs/lr_scheduler/constant_warmup_cosine_decay.yaml ADDED Viewed

	@@ -0,0 +1,3 @@

+_target_: transformers.get_cosine_schedule_with_warmup
+num_warmup_steps: 2500
+num_training_steps: 1000000

configs/lr_scheduler/cosine_decay_warmup.yaml ADDED Viewed

	@@ -0,0 +1,7 @@

+_target_: utils.CosineDecayWarmupLRScheduler
+t_in_epochs: False
+t_initial: ${eval:${trainer.max_steps}-${.warmup_t}}
+warmup_prefix: True
+warmup_lr_init: 1e-6
+warmup_t: ${eval:0.1*${trainer.max_steps}}
+lr_min: 1e-6

configs/lr_scheduler/cosine_with_hard_restarts_schedule_with_warmup.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+_target_: transformers.get_cosine_with_hard_restarts_schedule_with_warmup
+num_warmup_steps: 2500
+num_training_steps: 1000000
+num_cycles: 1

configs/model/extra_large.yaml ADDED Viewed

	@@ -0,0 +1,10 @@

+name: extra_large
+type: ddit
+hidden_size: 2048
+cond_dim: 128
+length: 1024
+n_blocks: 24
+n_heads: 16
+scale_by_sigma: True
+dropout: 0.1
+tie_word_embeddings: False

configs/model/large.yaml ADDED Viewed

	@@ -0,0 +1,14 @@

+name: large
+type: ddit
+hidden_size: 1280
+cond_dim: 128
+length: 1024
+base_n_blocks: 28
+# We try to roughly match parameter count
+n_blocks: ${adjust_n_blocks:}
+n_heads: 20
+scale_by_sigma: True
+dropout: 0.1
+tie_word_embeddings: False
+# 36 1280 20

configs/model/medium.yaml ADDED Viewed

	@@ -0,0 +1,12 @@

+name: medium
+type: ddit
+hidden_size: 1024
+cond_dim: 128
+length: 1024
+base_n_blocks: 24
+# We try to roughly match parameter count
+n_blocks: ${adjust_n_blocks:}
+n_heads: 16
+scale_by_sigma: True
+dropout: 0.1
+tie_word_embeddings: False

configs/model/small-ar.yaml ADDED Viewed

	@@ -0,0 +1,11 @@

+name: small
+type: ddit
+hidden_size: 768
+cond_dim: 128
+length: 1024
+n_blocks: 12
+n_heads: 12
+scale_by_sigma: True
+dropout: 0.1
+causal: True
+tie_word_embeddings: False