{
  "model_type": "llama",  // Assuming the base model type is LLaMA; replace if different
  "input_resampler": {
    "grid_size": 8,
    "embed_dim": 4096,
    "num_heads": 32,
    "kv_dim": 4096
  },
  "output_resampler": {
    "grid_size": 16,
    "embed_dim": 4096,
    "num_heads": 32,
    "kv_dim": 4096
  },
  "lm_loss_scale": 1.0,
  "rec_loss_scale": 1.0
}