sabrieyuboglu commited on
Commit
56eec05
·
verified ·
1 Parent(s): 0e72f89

Upload README.md with huggingface_hub

Browse files
Files changed (1) hide show
  1. README.md +222 -0
README.md ADDED
@@ -0,0 +1,222 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ {}
3
+ ---
4
+
5
+ ## Training Configuration
6
+ ```yaml
7
+ _config_type:
8
+ _is_type: true
9
+ _module: capsules.train
10
+ _qualname: TrainConfig
11
+ run_dir: /data/sabri/capsules/2025-05-10-14-56-42-train_longhealth_simple/68e4c064-dc5a-46c8-a726-b3c7977e9e1a
12
+ output_dir: /data/sabri/capsules
13
+ run_id: 68e4c064-dc5a-46c8-a726-b3c7977e9e1a
14
+ launch_id: 2025-05-10-14-56-42-train_longhealth_simple
15
+ script_id: train_longhealth_simple
16
+ name: train_longhealth_simple_p10_lr0.02_toks2048
17
+ model:
18
+ _config_type:
19
+ _is_type: true
20
+ _module: capsules.config
21
+ _qualname: HFModelConfig
22
+ checkpoint_path: null
23
+ pretrained_model_name_or_path: meta-llama/Llama-3.2-3B-Instruct
24
+ load_kwargs: {}
25
+ peft:
26
+ _config_type:
27
+ _is_type: true
28
+ _module: capsules.config
29
+ _qualname: PeftConfig
30
+ enabled: false
31
+ method: lora
32
+ r: 8
33
+ alpha: 16
34
+ dropout: 0.0
35
+ bias: none
36
+ task_type: CAUSAL_LM
37
+ num_virtual_tokens: 20
38
+ encoder_hidden_size: null
39
+ prefix_projection: false
40
+ prompt_tuning_init: null
41
+ prompt_tuning_init_text: null
42
+ encoder_reparameterization_type: MLP
43
+ encoder_dropout: 0.0
44
+ adapter_reduction_factor: 16
45
+ adapter_non_linearity: relu
46
+ target_modules: null
47
+ extra_params: {}
48
+ tuning_method: custom_prefix
49
+ model_cls:
50
+ _is_type: true
51
+ _module: capsules.models.llama
52
+ _qualname: LlamaForCausalLM
53
+ attn_implementation: einsum
54
+ wandb:
55
+ _config_type:
56
+ _is_type: true
57
+ _module: capsules.utils.wandb
58
+ _qualname: WandBConfig
59
+ project: capsules
60
+ entity: hazy-research
61
+ name: train_longhealth_simple_p10_lr0.02_toks2048
62
+ tags:
63
+ - train
64
+ - longhealth
65
+ - patientsp10
66
+ notes: null
67
+ group: null
68
+ dataset:
69
+ _config_type:
70
+ _is_type: true
71
+ _module: capsules.datasets
72
+ _qualname: CapsuleDatasetLatest.Config
73
+ target:
74
+ _is_type: true
75
+ _module: capsules.datasets
76
+ _qualname: CapsuleDatasetLatest
77
+ kwargs: {}
78
+ data_sources:
79
+ - !!python/tuple
80
+ - hazy-research/capsules/generate_longhealth_simple_p10_s5_n65536:v0
81
+ - null
82
+ - !!python/tuple
83
+ - hazy-research/capsules/generate_longhealth_simple_p10_s5_n65536:v1
84
+ - null
85
+ is_wandb: true
86
+ label_type: logits
87
+ top_k_logits: 20
88
+ dataset_weights: null
89
+ user_prompt_prefix: null
90
+ convo_transforms: null
91
+ max_sequence_length: 1024
92
+ context:
93
+ _config_type:
94
+ _is_type: true
95
+ _module: capsules.tasks.longhealth.context
96
+ _qualname: LongHealthStructuredContextConfig
97
+ patient_ids:
98
+ - patient_01
99
+ - patient_02
100
+ - patient_03
101
+ - patient_04
102
+ - patient_05
103
+ - patient_06
104
+ - patient_07
105
+ - patient_08
106
+ - patient_09
107
+ - patient_10
108
+ eval_every_n_steps: 256
109
+ eval_datasets:
110
+ - _config_type:
111
+ _is_type: true
112
+ _module: capsules.train
113
+ _qualname: EvalDatasetConfig
114
+ local_batch_size: 16
115
+ dataset:
116
+ _config_type:
117
+ _is_type: true
118
+ _module: capsules.tasks.longhealth
119
+ _qualname: LongHealthEvalDataset.Config
120
+ target:
121
+ _is_type: true
122
+ _module: capsules.tasks.longhealth
123
+ _qualname: LongHealthEvalDataset
124
+ kwargs: {}
125
+ data_sources: []
126
+ is_wandb: false
127
+ label_type: tokens
128
+ top_k_logits: 20
129
+ dataset_weights: null
130
+ user_prompt_prefix: null
131
+ convo_transforms: null
132
+ patient_ids:
133
+ - patient_01
134
+ - patient_02
135
+ - patient_03
136
+ - patient_04
137
+ - patient_05
138
+ - patient_06
139
+ - patient_07
140
+ - patient_08
141
+ - patient_09
142
+ - patient_10
143
+ max_questions: 256
144
+ name_for_wandb: longhealth_mc
145
+ only_eval_rank_0: false
146
+ dataloader_num_workers: 0
147
+ eval_log_table: true
148
+ eval_max_samples: null
149
+ generate_every_n_steps: 512
150
+ generate_datasets:
151
+ - _config_type:
152
+ _is_type: true
153
+ _module: capsules.train
154
+ _qualname: GenerateDatasetConfig
155
+ dataset:
156
+ _config_type:
157
+ _is_type: true
158
+ _module: capsules.tasks.longhealth
159
+ _qualname: LongHealthMultipleChoiceGenerateDataset.Config
160
+ target:
161
+ _is_type: true
162
+ _module: capsules.tasks.longhealth
163
+ _qualname: LongHealthMultipleChoiceGenerateDataset
164
+ kwargs: {}
165
+ patient_ids:
166
+ - patient_01
167
+ - patient_02
168
+ - patient_03
169
+ - patient_04
170
+ - patient_05
171
+ - patient_06
172
+ - patient_07
173
+ - patient_08
174
+ - patient_09
175
+ - patient_10
176
+ max_questions: null
177
+ include_diagnosis: true
178
+ cot: true
179
+ name_for_wandb: longhealth_mc
180
+ dataloader_num_workers: 0
181
+ num_samples: 4
182
+ num_samples_final: 8
183
+ temperature: 0.3
184
+ batch_size: 16
185
+ override_max_tokens: null
186
+ generate_max_new_tokens: 512
187
+ global_batch_size: 64
188
+ local_batch_size: 4
189
+ use_batch_sampler: false
190
+ tokenizer: meta-llama/Llama-3.2-1B-Instruct
191
+ epochs: 2
192
+ device: cuda
193
+ distributed_backend: gloo
194
+ optimizer: adam
195
+ lr: 0.02
196
+ lr_scheduler: null
197
+ kv_cache_initializer:
198
+ _config_type:
199
+ _is_type: true
200
+ _module: capsules.kv_initialization.strategies.first_n_tokens
201
+ _qualname: KVCacheInitFromFirstNTokensOfContext.Config
202
+ target:
203
+ _is_type: true
204
+ _module: capsules.kv_initialization.strategies.first_n_tokens
205
+ _qualname: KVCacheInitFromFirstNTokensOfContext
206
+ kwargs: {}
207
+ num_frozen_tokens: 1
208
+ max_tokens: 2048
209
+ context: null
210
+ pretrained_cache_path: null
211
+ loss_type: logits
212
+ save_every_n_steps: 512
213
+ save_after_training: true
214
+ keep_last_n_saved: 1
215
+ save_to_wandb: true
216
+ online_model: true
217
+ ema_cache: false
218
+ cache_ema_alpha: 0.9
219
+ max_optimizer_steps: -1
220
+ seed: 42
221
+ log_logprob_viz: false
222
+ ```