hazyresearch
/

cartridge-wauoq23f

Model card Files Files and versions Community

sabrieyuboglu commited on Jun 11

Commit

56eec05

verified ·

1 Parent(s): 0e72f89

Upload README.md with huggingface_hub

Browse files

Files changed (1) hide show

README.md +222 -0

README.md ADDED Viewed

	@@ -0,0 +1,222 @@

+---
+{}
+---
+## Training Configuration
+```yaml
+_config_type:
+  _is_type: true
+  _module: capsules.train
+  _qualname: TrainConfig
+run_dir: /data/sabri/capsules/2025-05-10-14-56-42-train_longhealth_simple/68e4c064-dc5a-46c8-a726-b3c7977e9e1a
+output_dir: /data/sabri/capsules
+run_id: 68e4c064-dc5a-46c8-a726-b3c7977e9e1a
+launch_id: 2025-05-10-14-56-42-train_longhealth_simple
+script_id: train_longhealth_simple
+name: train_longhealth_simple_p10_lr0.02_toks2048
+model:
+  _config_type:
+    _is_type: true
+    _module: capsules.config
+    _qualname: HFModelConfig
+  checkpoint_path: null
+  pretrained_model_name_or_path: meta-llama/Llama-3.2-3B-Instruct
+  load_kwargs: {}
+  peft:
+    _config_type:
+      _is_type: true
+      _module: capsules.config
+      _qualname: PeftConfig
+    enabled: false
+    method: lora
+    r: 8
+    alpha: 16
+    dropout: 0.0
+    bias: none
+    task_type: CAUSAL_LM
+    num_virtual_tokens: 20
+    encoder_hidden_size: null
+    prefix_projection: false
+    prompt_tuning_init: null
+    prompt_tuning_init_text: null
+    encoder_reparameterization_type: MLP
+    encoder_dropout: 0.0
+    adapter_reduction_factor: 16
+    adapter_non_linearity: relu
+    target_modules: null
+    extra_params: {}
+  tuning_method: custom_prefix
+  model_cls:
+    _is_type: true
+    _module: capsules.models.llama
+    _qualname: LlamaForCausalLM
+  attn_implementation: einsum
+wandb:
+  _config_type:
+    _is_type: true
+    _module: capsules.utils.wandb
+    _qualname: WandBConfig
+  project: capsules
+  entity: hazy-research
+  name: train_longhealth_simple_p10_lr0.02_toks2048
+  tags:
+  - train
+  - longhealth
+  - patientsp10
+  notes: null
+  group: null
+dataset:
+  _config_type:
+    _is_type: true
+    _module: capsules.datasets
+    _qualname: CapsuleDatasetLatest.Config
+  target:
+    _is_type: true
+    _module: capsules.datasets
+    _qualname: CapsuleDatasetLatest
+  kwargs: {}
+  data_sources:
+  - !!python/tuple
+    - hazy-research/capsules/generate_longhealth_simple_p10_s5_n65536:v0
+    - null
+  - !!python/tuple
+    - hazy-research/capsules/generate_longhealth_simple_p10_s5_n65536:v1
+    - null
+  is_wandb: true
+  label_type: logits
+  top_k_logits: 20
+  dataset_weights: null
+  user_prompt_prefix: null
+  convo_transforms: null
+  max_sequence_length: 1024
+context:
+  _config_type:
+    _is_type: true
+    _module: capsules.tasks.longhealth.context
+    _qualname: LongHealthStructuredContextConfig
+  patient_ids:
+  - patient_01
+  - patient_02
+  - patient_03
+  - patient_04
+  - patient_05
+  - patient_06
+  - patient_07
+  - patient_08
+  - patient_09
+  - patient_10
+eval_every_n_steps: 256
+eval_datasets:
+- _config_type:
+    _is_type: true
+    _module: capsules.train
+    _qualname: EvalDatasetConfig
+  local_batch_size: 16
+  dataset:
+    _config_type:
+      _is_type: true
+      _module: capsules.tasks.longhealth
+      _qualname: LongHealthEvalDataset.Config
+    target:
+      _is_type: true
+      _module: capsules.tasks.longhealth
+      _qualname: LongHealthEvalDataset
+    kwargs: {}
+    data_sources: []
+    is_wandb: false
+    label_type: tokens
+    top_k_logits: 20
+    dataset_weights: null
+    user_prompt_prefix: null
+    convo_transforms: null
+    patient_ids:
+    - patient_01
+    - patient_02
+    - patient_03
+    - patient_04
+    - patient_05
+    - patient_06
+    - patient_07
+    - patient_08
+    - patient_09
+    - patient_10
+    max_questions: 256
+  name_for_wandb: longhealth_mc
+  only_eval_rank_0: false
+  dataloader_num_workers: 0
+eval_log_table: true
+eval_max_samples: null
+generate_every_n_steps: 512
+generate_datasets:
+- _config_type:
+    _is_type: true
+    _module: capsules.train
+    _qualname: GenerateDatasetConfig
+  dataset:
+    _config_type:
+      _is_type: true
+      _module: capsules.tasks.longhealth
+      _qualname: LongHealthMultipleChoiceGenerateDataset.Config
+    target:
+      _is_type: true
+      _module: capsules.tasks.longhealth
+      _qualname: LongHealthMultipleChoiceGenerateDataset
+    kwargs: {}
+    patient_ids:
+    - patient_01
+    - patient_02
+    - patient_03
+    - patient_04
+    - patient_05
+    - patient_06
+    - patient_07
+    - patient_08
+    - patient_09
+    - patient_10
+    max_questions: null
+    include_diagnosis: true
+    cot: true
+  name_for_wandb: longhealth_mc
+  dataloader_num_workers: 0
+  num_samples: 4
+  num_samples_final: 8
+  temperature: 0.3
+  batch_size: 16
+  override_max_tokens: null
+generate_max_new_tokens: 512
+global_batch_size: 64
+local_batch_size: 4
+use_batch_sampler: false
+tokenizer: meta-llama/Llama-3.2-1B-Instruct
+epochs: 2
+device: cuda
+distributed_backend: gloo
+optimizer: adam
+lr: 0.02
+lr_scheduler: null
+kv_cache_initializer:
+  _config_type:
+    _is_type: true
+    _module: capsules.kv_initialization.strategies.first_n_tokens
+    _qualname: KVCacheInitFromFirstNTokensOfContext.Config
+  target:
+    _is_type: true
+    _module: capsules.kv_initialization.strategies.first_n_tokens
+    _qualname: KVCacheInitFromFirstNTokensOfContext
+  kwargs: {}
+  num_frozen_tokens: 1
+  max_tokens: 2048
+  context: null
+pretrained_cache_path: null
+loss_type: logits
+save_every_n_steps: 512
+save_after_training: true
+keep_last_n_saved: 1
+save_to_wandb: true
+online_model: true
+ema_cache: false
+cache_ema_alpha: 0.9
+max_optimizer_steps: -1
+seed: 42
+log_logprob_viz: false
+```