--- {} --- ## Training Configuration ```yaml _config_type: _is_type: true _module: capsules.train _qualname: TrainConfig run_dir: /data/sabri/capsules/2025-05-10-14-56-42-train_longhealth_simple/68e4c064-dc5a-46c8-a726-b3c7977e9e1a output_dir: /data/sabri/capsules run_id: 68e4c064-dc5a-46c8-a726-b3c7977e9e1a launch_id: 2025-05-10-14-56-42-train_longhealth_simple script_id: train_longhealth_simple name: train_longhealth_simple_p10_lr0.02_toks2048 model: _config_type: _is_type: true _module: capsules.config _qualname: HFModelConfig checkpoint_path: null pretrained_model_name_or_path: meta-llama/Llama-3.2-3B-Instruct load_kwargs: {} peft: _config_type: _is_type: true _module: capsules.config _qualname: PeftConfig enabled: false method: lora r: 8 alpha: 16 dropout: 0.0 bias: none task_type: CAUSAL_LM num_virtual_tokens: 20 encoder_hidden_size: null prefix_projection: false prompt_tuning_init: null prompt_tuning_init_text: null encoder_reparameterization_type: MLP encoder_dropout: 0.0 adapter_reduction_factor: 16 adapter_non_linearity: relu target_modules: null extra_params: {} tuning_method: custom_prefix model_cls: _is_type: true _module: capsules.models.llama _qualname: LlamaForCausalLM attn_implementation: einsum wandb: _config_type: _is_type: true _module: capsules.utils.wandb _qualname: WandBConfig project: capsules entity: hazy-research name: train_longhealth_simple_p10_lr0.02_toks2048 tags: - train - longhealth - patientsp10 notes: null group: null dataset: _config_type: _is_type: true _module: capsules.datasets _qualname: CapsuleDatasetLatest.Config target: _is_type: true _module: capsules.datasets _qualname: CapsuleDatasetLatest kwargs: {} data_sources: - !!python/tuple - hazy-research/capsules/generate_longhealth_simple_p10_s5_n65536:v0 - null - !!python/tuple - hazy-research/capsules/generate_longhealth_simple_p10_s5_n65536:v1 - null is_wandb: true label_type: logits top_k_logits: 20 dataset_weights: null user_prompt_prefix: null convo_transforms: null max_sequence_length: 1024 context: _config_type: _is_type: true _module: capsules.tasks.longhealth.context _qualname: LongHealthStructuredContextConfig patient_ids: - patient_01 - patient_02 - patient_03 - patient_04 - patient_05 - patient_06 - patient_07 - patient_08 - patient_09 - patient_10 eval_every_n_steps: 256 eval_datasets: - _config_type: _is_type: true _module: capsules.train _qualname: EvalDatasetConfig local_batch_size: 16 dataset: _config_type: _is_type: true _module: capsules.tasks.longhealth _qualname: LongHealthEvalDataset.Config target: _is_type: true _module: capsules.tasks.longhealth _qualname: LongHealthEvalDataset kwargs: {} data_sources: [] is_wandb: false label_type: tokens top_k_logits: 20 dataset_weights: null user_prompt_prefix: null convo_transforms: null patient_ids: - patient_01 - patient_02 - patient_03 - patient_04 - patient_05 - patient_06 - patient_07 - patient_08 - patient_09 - patient_10 max_questions: 256 name_for_wandb: longhealth_mc only_eval_rank_0: false dataloader_num_workers: 0 eval_log_table: true eval_max_samples: null generate_every_n_steps: 512 generate_datasets: - _config_type: _is_type: true _module: capsules.train _qualname: GenerateDatasetConfig dataset: _config_type: _is_type: true _module: capsules.tasks.longhealth _qualname: LongHealthMultipleChoiceGenerateDataset.Config target: _is_type: true _module: capsules.tasks.longhealth _qualname: LongHealthMultipleChoiceGenerateDataset kwargs: {} patient_ids: - patient_01 - patient_02 - patient_03 - patient_04 - patient_05 - patient_06 - patient_07 - patient_08 - patient_09 - patient_10 max_questions: null include_diagnosis: true cot: true name_for_wandb: longhealth_mc dataloader_num_workers: 0 num_samples: 4 num_samples_final: 8 temperature: 0.3 batch_size: 16 override_max_tokens: null generate_max_new_tokens: 512 global_batch_size: 64 local_batch_size: 4 use_batch_sampler: false tokenizer: meta-llama/Llama-3.2-1B-Instruct epochs: 2 device: cuda distributed_backend: gloo optimizer: adam lr: 0.02 lr_scheduler: null kv_cache_initializer: _config_type: _is_type: true _module: capsules.kv_initialization.strategies.first_n_tokens _qualname: KVCacheInitFromFirstNTokensOfContext.Config target: _is_type: true _module: capsules.kv_initialization.strategies.first_n_tokens _qualname: KVCacheInitFromFirstNTokensOfContext kwargs: {} num_frozen_tokens: 1 max_tokens: 2048 context: null pretrained_cache_path: null loss_type: logits save_every_n_steps: 512 save_after_training: true keep_last_n_saved: 1 save_to_wandb: true online_model: true ema_cache: false cache_ema_alpha: 0.9 max_optimizer_steps: -1 seed: 42 log_logprob_viz: false ```