{ "model_type": "llama", // Assuming the base model type is LLaMA; replace if different "input_resampler": { "grid_size": 8, "embed_dim": 4096, "num_heads": 32, "kv_dim": 4096 }, "output_resampler": { "grid_size": 16, "embed_dim": 4096, "num_heads": 32, "kv_dim": 4096 }, "lm_loss_scale": 1.0, "rec_loss_scale": 1.0 }