|
_config_type: |
|
_is_type: true |
|
_module: capsules.train |
|
_qualname: TrainConfig |
|
cache_ema_alpha: 0.9 |
|
context: |
|
_config_type: |
|
_is_type: true |
|
_module: capsules.tasks.longhealth.context |
|
_qualname: LongHealthStructuredContextConfig |
|
patient_ids: |
|
- patient_01 |
|
- patient_02 |
|
- patient_03 |
|
- patient_04 |
|
- patient_05 |
|
- patient_06 |
|
- patient_07 |
|
- patient_08 |
|
- patient_09 |
|
- patient_10 |
|
dataset: |
|
_config_type: |
|
_is_type: true |
|
_module: capsules.datasets |
|
_qualname: CapsuleDatasetLatest.Config |
|
convo_transforms: null |
|
data_sources: |
|
- !!python/tuple |
|
- hazy-research/capsules/generate_longhealth_simple_p10_s5_n65536:v0 |
|
- null |
|
- !!python/tuple |
|
- hazy-research/capsules/generate_longhealth_simple_p10_s5_n65536:v1 |
|
- null |
|
dataset_weights: null |
|
is_wandb: true |
|
kwargs: {} |
|
label_type: logits |
|
max_sequence_length: 1024 |
|
target: |
|
_is_type: true |
|
_module: capsules.datasets |
|
_qualname: CapsuleDatasetLatest |
|
top_k_logits: 20 |
|
user_prompt_prefix: null |
|
device: cuda |
|
distributed_backend: gloo |
|
ema_cache: false |
|
epochs: 2 |
|
eval_datasets: |
|
- _config_type: |
|
_is_type: true |
|
_module: capsules.train |
|
_qualname: EvalDatasetConfig |
|
dataloader_num_workers: 0 |
|
dataset: |
|
_config_type: |
|
_is_type: true |
|
_module: capsules.tasks.longhealth |
|
_qualname: LongHealthEvalDataset.Config |
|
convo_transforms: null |
|
data_sources: [] |
|
dataset_weights: null |
|
is_wandb: false |
|
kwargs: {} |
|
label_type: tokens |
|
max_questions: 256 |
|
patient_ids: |
|
- patient_01 |
|
- patient_02 |
|
- patient_03 |
|
- patient_04 |
|
- patient_05 |
|
- patient_06 |
|
- patient_07 |
|
- patient_08 |
|
- patient_09 |
|
- patient_10 |
|
target: |
|
_is_type: true |
|
_module: capsules.tasks.longhealth |
|
_qualname: LongHealthEvalDataset |
|
top_k_logits: 20 |
|
user_prompt_prefix: null |
|
local_batch_size: 16 |
|
name_for_wandb: longhealth_mc |
|
only_eval_rank_0: false |
|
eval_every_n_steps: 256 |
|
eval_log_table: true |
|
eval_max_samples: null |
|
generate_datasets: |
|
- _config_type: |
|
_is_type: true |
|
_module: capsules.train |
|
_qualname: GenerateDatasetConfig |
|
batch_size: 16 |
|
dataloader_num_workers: 0 |
|
dataset: |
|
_config_type: |
|
_is_type: true |
|
_module: capsules.tasks.longhealth |
|
_qualname: LongHealthMultipleChoiceGenerateDataset.Config |
|
cot: true |
|
include_diagnosis: true |
|
kwargs: {} |
|
max_questions: null |
|
patient_ids: |
|
- patient_01 |
|
- patient_02 |
|
- patient_03 |
|
- patient_04 |
|
- patient_05 |
|
- patient_06 |
|
- patient_07 |
|
- patient_08 |
|
- patient_09 |
|
- patient_10 |
|
target: |
|
_is_type: true |
|
_module: capsules.tasks.longhealth |
|
_qualname: LongHealthMultipleChoiceGenerateDataset |
|
name_for_wandb: longhealth_mc |
|
num_samples: 4 |
|
num_samples_final: 8 |
|
override_max_tokens: null |
|
temperature: 0.3 |
|
generate_every_n_steps: 512 |
|
generate_max_new_tokens: 512 |
|
global_batch_size: 64 |
|
keep_last_n_saved: 1 |
|
kv_cache_initializer: |
|
_config_type: |
|
_is_type: true |
|
_module: capsules.kv_initialization.strategies.first_n_tokens |
|
_qualname: KVCacheInitFromFirstNTokensOfContext.Config |
|
context: null |
|
kwargs: {} |
|
max_tokens: 2048 |
|
num_frozen_tokens: 1 |
|
target: |
|
_is_type: true |
|
_module: capsules.kv_initialization.strategies.first_n_tokens |
|
_qualname: KVCacheInitFromFirstNTokensOfContext |
|
launch_id: 2025-05-10-14-56-42-train_longhealth_simple |
|
local_batch_size: 4 |
|
log_logprob_viz: false |
|
loss_type: logits |
|
lr: 0.02 |
|
lr_scheduler: null |
|
max_optimizer_steps: -1 |
|
model: |
|
_config_type: |
|
_is_type: true |
|
_module: capsules.config |
|
_qualname: HFModelConfig |
|
attn_implementation: einsum |
|
checkpoint_path: null |
|
load_kwargs: {} |
|
model_cls: |
|
_is_type: true |
|
_module: capsules.models.llama |
|
_qualname: LlamaForCausalLM |
|
peft: |
|
_config_type: |
|
_is_type: true |
|
_module: capsules.config |
|
_qualname: PeftConfig |
|
adapter_non_linearity: relu |
|
adapter_reduction_factor: 16 |
|
alpha: 16 |
|
bias: none |
|
dropout: 0.0 |
|
enabled: false |
|
encoder_dropout: 0.0 |
|
encoder_hidden_size: null |
|
encoder_reparameterization_type: MLP |
|
extra_params: {} |
|
method: lora |
|
num_virtual_tokens: 20 |
|
prefix_projection: false |
|
prompt_tuning_init: null |
|
prompt_tuning_init_text: null |
|
r: 8 |
|
target_modules: null |
|
task_type: CAUSAL_LM |
|
pretrained_model_name_or_path: meta-llama/Llama-3.2-3B-Instruct |
|
tuning_method: custom_prefix |
|
name: train_longhealth_simple_p10_lr0.02_toks2048 |
|
online_model: true |
|
optimizer: adam |
|
output_dir: /data/sabri/capsules |
|
pretrained_cache_path: null |
|
run_dir: /data/sabri/capsules/2025-05-10-14-56-42-train_longhealth_simple/68e4c064-dc5a-46c8-a726-b3c7977e9e1a |
|
run_id: 68e4c064-dc5a-46c8-a726-b3c7977e9e1a |
|
save_after_training: true |
|
save_every_n_steps: 512 |
|
save_to_wandb: true |
|
script_id: train_longhealth_simple |
|
seed: 42 |
|
tokenizer: meta-llama/Llama-3.2-1B-Instruct |
|
use_batch_sampler: false |
|
wandb: |
|
_config_type: |
|
_is_type: true |
|
_module: capsules.utils.wandb |
|
_qualname: WandBConfig |
|
entity: hazy-research |
|
group: null |
|
name: train_longhealth_simple_p10_lr0.02_toks2048 |
|
notes: null |
|
project: capsules |
|
tags: |
|
- train |
|
- longhealth |
|
- patientsp10 |
|
|