zerofata commited on
Commit
4c4b72b
·
verified ·
1 Parent(s): ddde70b

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +65 -139
README.md CHANGED
@@ -98,6 +98,40 @@ body {font-family: sans-serif; background-color: #080c14; color: #e1e9f0; line-h
98
  .data-arrow {color: #33ff99; width: 20px; display: inline-block;}
99
  .data-label {color: #00c3ff; width: 80px; display: inline-block;}
100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  /* Subheading styling */
102
  .subheading {color: #00c3ff; font-size: 1.1rem; margin-top: 20px; margin-bottom: 15px; font-weight: 400; border-bottom: 1px dashed rgba(0, 195, 255, 0.3); display: inline-block; text-transform: uppercase; letter-spacing: 1px; font-family: 'Orbitron', sans-serif;}
103
 
@@ -214,21 +248,16 @@ a:hover {text-decoration: underline;}
214
  <div class="section-content">
215
  <p>The model first went through SFT with a small synthetic dataset of 2.9 million tokens, approximately 750 conversations. Primarily RP data with small amounts of random instruct / assistant data and creative writing.</p>
216
  <p>The model then went through DPO training using approx 1100 chosen examples from the SFT dataset that were of exceptional quality or showed verifiable instruction following. Rejected samples were generated using another Llama 3.3 finetune that is known for poor instruction following.</p>
217
- <h3 class="subheading">SFT 1*H200</h3>
218
- <div class="data-box">
219
- <pre style="overflow-x: auto; color: #e1e9f0; margin: 0;"># ====================
220
- # MODEL CONFIGURATION
221
- # ====================
222
- base_model: zerofata/L3.3-GeneticLemonade-Unleashed-70B
223
  model_type: AutoModelForCausalLM
224
  tokenizer_type: AutoTokenizer
225
  special_tokens:
226
  pad_token: "<|finetune_right_pad_id|>"
227
  chat_template: llama3
228
 
229
- # ====================
230
- # DATASET CONFIGURATION
231
- # ====================
232
  datasets:
233
  - path: ./dataset.jsonl
234
  type: chat_template
@@ -243,109 +272,44 @@ datasets:
243
  assistant: ["assistant"]
244
  system: ["system"]
245
 
246
- test_datasets:
247
- - path: ./validate_dataset.jsonl
248
- type: chat_template
249
- split: train
250
- chat_template_strategy: tokenizer
251
- field_messages: messages
252
- message_property_mappings:
253
- role: role
254
- content: content
255
- roles:
256
- user: ["user"]
257
- assistant: ["assistant"]
258
- system: ["system"]
259
 
260
- dataset_prepared_path:
261
- train_on_inputs: false # Only train on assistant responses
 
 
 
 
262
 
263
- # ====================
264
- # QLORA CONFIGURATION
265
- # ====================
266
  adapter: qlora
267
  load_in_4bit: true
268
  lora_r: 64
269
  lora_alpha: 128
270
  lora_dropout: 0.1
271
  lora_target_linear: true
272
- # lora_modules_to_save: # Uncomment only if you added NEW tokens
273
 
274
- # ====================
275
- # TRAINING PARAMETERS
276
- # ====================
277
- num_epochs: 2
278
- micro_batch_size: 4
279
- gradient_accumulation_steps: 2
280
- learning_rate: 1.5e-5
281
- optimizer: paged_adamw_8bit
282
- lr_scheduler: rex
283
- warmup_ratio: 0.05
284
- weight_decay: 0.01
285
- max_grad_norm: 1.0
286
-
287
- # ====================
288
- # SEQUENCE & PACKING
289
- # ====================
290
  sequence_len: 8192
291
  sample_packing: true
292
- eval_sample_packing: false
293
  pad_to_sequence_len: true
294
 
295
- # ====================
296
- # HARDWARE OPTIMIZATIONS
297
- # ====================
298
  bf16: auto
299
  flash_attention: true
300
- gradient_checkpointing: true
301
-
302
- # ====================
303
- # EVALUATION & CHECKPOINTING
304
- # ====================
305
- evaluation_strategy: steps
306
- eval_steps: 5
307
- save_strategy: steps
308
- save_steps: 5
309
- save_total_limit: 5 # Keep best + last few checkpoints
310
- load_best_model_at_end: true
311
- metric_for_best_model: eval_loss
312
- greater_is_better: false
313
- early_stopping_patience: 5
314
-
315
- # ====================
316
- # LOGGING & OUTPUT
317
- # ====================
318
- output_dir: ./output_model
319
- logging_steps: 2
320
- save_safetensors: true
321
-
322
- # ====================
323
- # WANDB TRACKING
324
- # ====================
325
- wandb_project: project_name
326
- # wandb_entity: your_entity # Uncomment and set if needed
327
- # wandb_name: your_run_name # Uncomment and set if needed</pre>
328
  </div>
329
- <h3 class="subheading">DPO 2*H200</h3>
330
- <div class="data-box">
331
- <pre style="overflow-x: auto; color: #e1e9f0; margin: 0;"># ====================
332
- # MODEL CONFIGURATION
333
- # ====================
334
- base_model: ApocalypseParty/unleashed-fulldata30
335
  model_type: AutoModelForCausalLM
336
  tokenizer_type: AutoTokenizer
337
- special_tokens: {}
338
  chat_template: tokenizer_default
339
 
340
- # ====================
341
- # RL/DPO CONFIGURATION
342
- # ====================
343
  rl: dpo
344
  rl_beta: 0.07
345
 
346
- # ====================
347
- # DATASET CONFIGURATION
348
- # ====================
349
  datasets:
350
  - path: ./dpo_cleaned-v3_deduplicated.jsonl
351
  type: chat_template.default
@@ -359,71 +323,33 @@ datasets:
359
  system: ["system"]
360
  user: ["user"]
361
  assistant: ["assistant"]
362
- dataset_prepared_path:
363
- train_on_inputs: false # Only train on assistant responses
364
 
365
- # ====================
366
- # QLORA CONFIGURATION
367
- # ====================
 
 
 
 
 
368
  adapter: qlora
369
  load_in_4bit: true
370
  lora_r: 32
371
  lora_alpha: 64
372
  lora_dropout: 0.05
373
  lora_target_linear: true
374
- # lora_modules_to_save: # Uncomment only if you added NEW tokens
375
 
376
- # ====================
377
- # TRAINING PARAMETERS
378
- # ====================
379
- num_epochs: 1
380
- micro_batch_size: 4
381
- gradient_accumulation_steps: 2
382
- learning_rate: 2e-6
383
- optimizer: adamw_8bit
384
- lr_scheduler: cosine
385
- warmup_steps: 5
386
- weight_decay: 0.01
387
- max_grad_norm: 1.0
388
-
389
- # ====================
390
- # SEQUENCE CONFIGURATION
391
- # ====================
392
  sequence_len: 4096
393
  pad_to_sequence_len: true
394
 
395
- # ====================
396
- # HARDWARE OPTIMIZATIONS
397
- # ====================
398
  bf16: auto
399
- tf32: false
400
  flash_attention: true
401
  gradient_checkpointing: offload
402
- deepspeed: deepspeed_configs/zero1.json
403
-
404
- # ====================
405
- # CHECKPOINTING
406
- # ====================
407
- save_steps: 10
408
- save_total_limit: 10
409
- load_best_model_at_end: true
410
- metric_for_best_model: eval_loss
411
- greater_is_better: false
412
-
413
- # ====================
414
- # LOGGING & OUTPUT
415
- # ====================
416
- output_dir: ./dpo_model
417
- logging_steps: 2
418
- save_safetensors: true
419
-
420
- # ====================
421
- # WANDB TRACKING
422
- # ====================
423
- wandb_project: project_name
424
- # wandb_entity: your_entity # Uncomment and set if needed
425
- # wandb_name: your_run_name # Uncomment and set if needed</pre>
426
  </div>
 
427
  </div>
428
  </div>
429
  </div>
 
98
  .data-arrow {color: #33ff99; width: 20px; display: inline-block;}
99
  .data-label {color: #00c3ff; width: 80px; display: inline-block;}
100
 
101
+ /* Code display styling */
102
+ .code-section {
103
+ margin: 15px 0;
104
+ border-left: 2px solid #33ff99;
105
+ background-color: rgba(0, 0, 0, 0.3);
106
+ overflow-x: auto;
107
+ }
108
+ .code-header {
109
+ background-color: rgba(51, 255, 153, 0.1);
110
+ padding: 8px 15px;
111
+ font-family: 'Orbitron', sans-serif;
112
+ color: #33ff99;
113
+ font-size: 0.9rem;
114
+ letter-spacing: 1px;
115
+ border-bottom: 1px solid rgba(51, 255, 153, 0.2);
116
+ }
117
+ .code-content {
118
+ padding: 15px;
119
+ font-family: 'JetBrains Mono', monospace;
120
+ font-size: 0.85rem;
121
+ line-height: 1.4;
122
+ color: #e1e9f0;
123
+ white-space: pre;
124
+ }
125
+ .code-comment {
126
+ color: #5f8baa;
127
+ }
128
+ .code-key {
129
+ color: #00c3ff;
130
+ }
131
+ .code-value {
132
+ color: #e1e9f0;
133
+ }
134
+
135
  /* Subheading styling */
136
  .subheading {color: #00c3ff; font-size: 1.1rem; margin-top: 20px; margin-bottom: 15px; font-weight: 400; border-bottom: 1px dashed rgba(0, 195, 255, 0.3); display: inline-block; text-transform: uppercase; letter-spacing: 1px; font-family: 'Orbitron', sans-serif;}
137
 
 
248
  <div class="section-content">
249
  <p>The model first went through SFT with a small synthetic dataset of 2.9 million tokens, approximately 750 conversations. Primarily RP data with small amounts of random instruct / assistant data and creative writing.</p>
250
  <p>The model then went through DPO training using approx 1100 chosen examples from the SFT dataset that were of exceptional quality or showed verifiable instruction following. Rejected samples were generated using another Llama 3.3 finetune that is known for poor instruction following.</p>
251
+ <h3 class="subheading">Axolotl Configurations</h3>
252
+ <p>SFT Configuration:</p>
253
+ <div style="background-color: #0a0e16; padding: 15px; border-radius: 4px; border-left: 2px solid #33ff99; margin-bottom: 20px; overflow-x: auto;">
254
+ <code style="font-family: 'JetBrains Mono', monospace; font-size: 0.85rem; display: block; white-space: pre;">base_model: zerofata/L3.3-GeneticLemonade-Unleashed-70B
 
 
255
  model_type: AutoModelForCausalLM
256
  tokenizer_type: AutoTokenizer
257
  special_tokens:
258
  pad_token: "<|finetune_right_pad_id|>"
259
  chat_template: llama3
260
 
 
 
 
261
  datasets:
262
  - path: ./dataset.jsonl
263
  type: chat_template
 
272
  assistant: ["assistant"]
273
  system: ["system"]
274
 
275
+ test_datasets: [...]
 
 
 
 
 
 
 
 
 
 
 
 
276
 
277
+ # Training configuration
278
+ train_on_inputs: false
279
+ num_epochs: 2
280
+ micro_batch_size: 4
281
+ gradient_accumulation_steps: 2
282
+ learning_rate: 1.5e-5
283
 
284
+ # LoRA parameters
 
 
285
  adapter: qlora
286
  load_in_4bit: true
287
  lora_r: 64
288
  lora_alpha: 128
289
  lora_dropout: 0.1
290
  lora_target_linear: true
 
291
 
292
+ # Sequence handling
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
293
  sequence_len: 8192
294
  sample_packing: true
 
295
  pad_to_sequence_len: true
296
 
297
+ # Hardware optimizations
 
 
298
  bf16: auto
299
  flash_attention: true
300
+ gradient_checkpointing: true</code>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
301
  </div>
302
+ <p>DPO Configuration:</p>
303
+ <div style="background-color: #0a0e16; padding: 15px; border-radius: 4px; border-left: 2px solid #33ff99; margin-bottom: 20px; overflow-x: auto;">
304
+ <code style="font-family: 'JetBrains Mono', monospace; font-size: 0.85rem; display: block; white-space: pre;">base_model: ApocalypseParty/unleashed-fulldata30
 
 
 
305
  model_type: AutoModelForCausalLM
306
  tokenizer_type: AutoTokenizer
 
307
  chat_template: tokenizer_default
308
 
309
+ # DPO specific
 
 
310
  rl: dpo
311
  rl_beta: 0.07
312
 
 
 
 
313
  datasets:
314
  - path: ./dpo_cleaned-v3_deduplicated.jsonl
315
  type: chat_template.default
 
323
  system: ["system"]
324
  user: ["user"]
325
  assistant: ["assistant"]
 
 
326
 
327
+ # Training configuration
328
+ train_on_inputs: false
329
+ num_epochs: 1
330
+ micro_batch_size: 4
331
+ gradient_accumulation_steps: 2
332
+ learning_rate: 2e-6
333
+
334
+ # LoRA parameters
335
  adapter: qlora
336
  load_in_4bit: true
337
  lora_r: 32
338
  lora_alpha: 64
339
  lora_dropout: 0.05
340
  lora_target_linear: true
 
341
 
342
+ # Sequence handling
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
343
  sequence_len: 4096
344
  pad_to_sequence_len: true
345
 
346
+ # Hardware optimizations
 
 
347
  bf16: auto
 
348
  flash_attention: true
349
  gradient_checkpointing: offload
350
+ deepspeed: deepspeed_configs/zero1.json</code>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
351
  </div>
352
+ <p>Full configurations are available in the repository for those interested in complete training details.</p>
353
  </div>
354
  </div>
355
  </div>