Update README.md
Browse files
README.md
CHANGED
@@ -98,6 +98,40 @@ body {font-family: sans-serif; background-color: #080c14; color: #e1e9f0; line-h
|
|
98 |
.data-arrow {color: #33ff99; width: 20px; display: inline-block;}
|
99 |
.data-label {color: #00c3ff; width: 80px; display: inline-block;}
|
100 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
/* Subheading styling */
|
102 |
.subheading {color: #00c3ff; font-size: 1.1rem; margin-top: 20px; margin-bottom: 15px; font-weight: 400; border-bottom: 1px dashed rgba(0, 195, 255, 0.3); display: inline-block; text-transform: uppercase; letter-spacing: 1px; font-family: 'Orbitron', sans-serif;}
|
103 |
|
@@ -214,21 +248,16 @@ a:hover {text-decoration: underline;}
|
|
214 |
<div class="section-content">
|
215 |
<p>The model first went through SFT with a small synthetic dataset of 2.9 million tokens, approximately 750 conversations. Primarily RP data with small amounts of random instruct / assistant data and creative writing.</p>
|
216 |
<p>The model then went through DPO training using approx 1100 chosen examples from the SFT dataset that were of exceptional quality or showed verifiable instruction following. Rejected samples were generated using another Llama 3.3 finetune that is known for poor instruction following.</p>
|
217 |
-
<h3 class="subheading">
|
218 |
-
<
|
219 |
-
|
220 |
-
|
221 |
-
# ====================
|
222 |
-
base_model: zerofata/L3.3-GeneticLemonade-Unleashed-70B
|
223 |
model_type: AutoModelForCausalLM
|
224 |
tokenizer_type: AutoTokenizer
|
225 |
special_tokens:
|
226 |
pad_token: "<|finetune_right_pad_id|>"
|
227 |
chat_template: llama3
|
228 |
|
229 |
-
# ====================
|
230 |
-
# DATASET CONFIGURATION
|
231 |
-
# ====================
|
232 |
datasets:
|
233 |
- path: ./dataset.jsonl
|
234 |
type: chat_template
|
@@ -243,109 +272,44 @@ datasets:
|
|
243 |
assistant: ["assistant"]
|
244 |
system: ["system"]
|
245 |
|
246 |
-
test_datasets:
|
247 |
-
- path: ./validate_dataset.jsonl
|
248 |
-
type: chat_template
|
249 |
-
split: train
|
250 |
-
chat_template_strategy: tokenizer
|
251 |
-
field_messages: messages
|
252 |
-
message_property_mappings:
|
253 |
-
role: role
|
254 |
-
content: content
|
255 |
-
roles:
|
256 |
-
user: ["user"]
|
257 |
-
assistant: ["assistant"]
|
258 |
-
system: ["system"]
|
259 |
|
260 |
-
|
261 |
-
train_on_inputs: false
|
|
|
|
|
|
|
|
|
262 |
|
263 |
-
#
|
264 |
-
# QLORA CONFIGURATION
|
265 |
-
# ====================
|
266 |
adapter: qlora
|
267 |
load_in_4bit: true
|
268 |
lora_r: 64
|
269 |
lora_alpha: 128
|
270 |
lora_dropout: 0.1
|
271 |
lora_target_linear: true
|
272 |
-
# lora_modules_to_save: # Uncomment only if you added NEW tokens
|
273 |
|
274 |
-
#
|
275 |
-
# TRAINING PARAMETERS
|
276 |
-
# ====================
|
277 |
-
num_epochs: 2
|
278 |
-
micro_batch_size: 4
|
279 |
-
gradient_accumulation_steps: 2
|
280 |
-
learning_rate: 1.5e-5
|
281 |
-
optimizer: paged_adamw_8bit
|
282 |
-
lr_scheduler: rex
|
283 |
-
warmup_ratio: 0.05
|
284 |
-
weight_decay: 0.01
|
285 |
-
max_grad_norm: 1.0
|
286 |
-
|
287 |
-
# ====================
|
288 |
-
# SEQUENCE & PACKING
|
289 |
-
# ====================
|
290 |
sequence_len: 8192
|
291 |
sample_packing: true
|
292 |
-
eval_sample_packing: false
|
293 |
pad_to_sequence_len: true
|
294 |
|
295 |
-
#
|
296 |
-
# HARDWARE OPTIMIZATIONS
|
297 |
-
# ====================
|
298 |
bf16: auto
|
299 |
flash_attention: true
|
300 |
-
gradient_checkpointing: true
|
301 |
-
|
302 |
-
# ====================
|
303 |
-
# EVALUATION & CHECKPOINTING
|
304 |
-
# ====================
|
305 |
-
evaluation_strategy: steps
|
306 |
-
eval_steps: 5
|
307 |
-
save_strategy: steps
|
308 |
-
save_steps: 5
|
309 |
-
save_total_limit: 5 # Keep best + last few checkpoints
|
310 |
-
load_best_model_at_end: true
|
311 |
-
metric_for_best_model: eval_loss
|
312 |
-
greater_is_better: false
|
313 |
-
early_stopping_patience: 5
|
314 |
-
|
315 |
-
# ====================
|
316 |
-
# LOGGING & OUTPUT
|
317 |
-
# ====================
|
318 |
-
output_dir: ./output_model
|
319 |
-
logging_steps: 2
|
320 |
-
save_safetensors: true
|
321 |
-
|
322 |
-
# ====================
|
323 |
-
# WANDB TRACKING
|
324 |
-
# ====================
|
325 |
-
wandb_project: project_name
|
326 |
-
# wandb_entity: your_entity # Uncomment and set if needed
|
327 |
-
# wandb_name: your_run_name # Uncomment and set if needed</pre>
|
328 |
</div>
|
329 |
-
<
|
330 |
-
<div
|
331 |
-
<
|
332 |
-
# MODEL CONFIGURATION
|
333 |
-
# ====================
|
334 |
-
base_model: ApocalypseParty/unleashed-fulldata30
|
335 |
model_type: AutoModelForCausalLM
|
336 |
tokenizer_type: AutoTokenizer
|
337 |
-
special_tokens: {}
|
338 |
chat_template: tokenizer_default
|
339 |
|
340 |
-
#
|
341 |
-
# RL/DPO CONFIGURATION
|
342 |
-
# ====================
|
343 |
rl: dpo
|
344 |
rl_beta: 0.07
|
345 |
|
346 |
-
# ====================
|
347 |
-
# DATASET CONFIGURATION
|
348 |
-
# ====================
|
349 |
datasets:
|
350 |
- path: ./dpo_cleaned-v3_deduplicated.jsonl
|
351 |
type: chat_template.default
|
@@ -359,71 +323,33 @@ datasets:
|
|
359 |
system: ["system"]
|
360 |
user: ["user"]
|
361 |
assistant: ["assistant"]
|
362 |
-
dataset_prepared_path:
|
363 |
-
train_on_inputs: false # Only train on assistant responses
|
364 |
|
365 |
-
#
|
366 |
-
|
367 |
-
|
|
|
|
|
|
|
|
|
|
|
368 |
adapter: qlora
|
369 |
load_in_4bit: true
|
370 |
lora_r: 32
|
371 |
lora_alpha: 64
|
372 |
lora_dropout: 0.05
|
373 |
lora_target_linear: true
|
374 |
-
# lora_modules_to_save: # Uncomment only if you added NEW tokens
|
375 |
|
376 |
-
#
|
377 |
-
# TRAINING PARAMETERS
|
378 |
-
# ====================
|
379 |
-
num_epochs: 1
|
380 |
-
micro_batch_size: 4
|
381 |
-
gradient_accumulation_steps: 2
|
382 |
-
learning_rate: 2e-6
|
383 |
-
optimizer: adamw_8bit
|
384 |
-
lr_scheduler: cosine
|
385 |
-
warmup_steps: 5
|
386 |
-
weight_decay: 0.01
|
387 |
-
max_grad_norm: 1.0
|
388 |
-
|
389 |
-
# ====================
|
390 |
-
# SEQUENCE CONFIGURATION
|
391 |
-
# ====================
|
392 |
sequence_len: 4096
|
393 |
pad_to_sequence_len: true
|
394 |
|
395 |
-
#
|
396 |
-
# HARDWARE OPTIMIZATIONS
|
397 |
-
# ====================
|
398 |
bf16: auto
|
399 |
-
tf32: false
|
400 |
flash_attention: true
|
401 |
gradient_checkpointing: offload
|
402 |
-
deepspeed: deepspeed_configs/zero1.json
|
403 |
-
|
404 |
-
# ====================
|
405 |
-
# CHECKPOINTING
|
406 |
-
# ====================
|
407 |
-
save_steps: 10
|
408 |
-
save_total_limit: 10
|
409 |
-
load_best_model_at_end: true
|
410 |
-
metric_for_best_model: eval_loss
|
411 |
-
greater_is_better: false
|
412 |
-
|
413 |
-
# ====================
|
414 |
-
# LOGGING & OUTPUT
|
415 |
-
# ====================
|
416 |
-
output_dir: ./dpo_model
|
417 |
-
logging_steps: 2
|
418 |
-
save_safetensors: true
|
419 |
-
|
420 |
-
# ====================
|
421 |
-
# WANDB TRACKING
|
422 |
-
# ====================
|
423 |
-
wandb_project: project_name
|
424 |
-
# wandb_entity: your_entity # Uncomment and set if needed
|
425 |
-
# wandb_name: your_run_name # Uncomment and set if needed</pre>
|
426 |
</div>
|
|
|
427 |
</div>
|
428 |
</div>
|
429 |
</div>
|
|
|
98 |
.data-arrow {color: #33ff99; width: 20px; display: inline-block;}
|
99 |
.data-label {color: #00c3ff; width: 80px; display: inline-block;}
|
100 |
|
101 |
+
/* Code display styling */
|
102 |
+
.code-section {
|
103 |
+
margin: 15px 0;
|
104 |
+
border-left: 2px solid #33ff99;
|
105 |
+
background-color: rgba(0, 0, 0, 0.3);
|
106 |
+
overflow-x: auto;
|
107 |
+
}
|
108 |
+
.code-header {
|
109 |
+
background-color: rgba(51, 255, 153, 0.1);
|
110 |
+
padding: 8px 15px;
|
111 |
+
font-family: 'Orbitron', sans-serif;
|
112 |
+
color: #33ff99;
|
113 |
+
font-size: 0.9rem;
|
114 |
+
letter-spacing: 1px;
|
115 |
+
border-bottom: 1px solid rgba(51, 255, 153, 0.2);
|
116 |
+
}
|
117 |
+
.code-content {
|
118 |
+
padding: 15px;
|
119 |
+
font-family: 'JetBrains Mono', monospace;
|
120 |
+
font-size: 0.85rem;
|
121 |
+
line-height: 1.4;
|
122 |
+
color: #e1e9f0;
|
123 |
+
white-space: pre;
|
124 |
+
}
|
125 |
+
.code-comment {
|
126 |
+
color: #5f8baa;
|
127 |
+
}
|
128 |
+
.code-key {
|
129 |
+
color: #00c3ff;
|
130 |
+
}
|
131 |
+
.code-value {
|
132 |
+
color: #e1e9f0;
|
133 |
+
}
|
134 |
+
|
135 |
/* Subheading styling */
|
136 |
.subheading {color: #00c3ff; font-size: 1.1rem; margin-top: 20px; margin-bottom: 15px; font-weight: 400; border-bottom: 1px dashed rgba(0, 195, 255, 0.3); display: inline-block; text-transform: uppercase; letter-spacing: 1px; font-family: 'Orbitron', sans-serif;}
|
137 |
|
|
|
248 |
<div class="section-content">
|
249 |
<p>The model first went through SFT with a small synthetic dataset of 2.9 million tokens, approximately 750 conversations. Primarily RP data with small amounts of random instruct / assistant data and creative writing.</p>
|
250 |
<p>The model then went through DPO training using approx 1100 chosen examples from the SFT dataset that were of exceptional quality or showed verifiable instruction following. Rejected samples were generated using another Llama 3.3 finetune that is known for poor instruction following.</p>
|
251 |
+
<h3 class="subheading">Axolotl Configurations</h3>
|
252 |
+
<p>SFT Configuration:</p>
|
253 |
+
<div style="background-color: #0a0e16; padding: 15px; border-radius: 4px; border-left: 2px solid #33ff99; margin-bottom: 20px; overflow-x: auto;">
|
254 |
+
<code style="font-family: 'JetBrains Mono', monospace; font-size: 0.85rem; display: block; white-space: pre;">base_model: zerofata/L3.3-GeneticLemonade-Unleashed-70B
|
|
|
|
|
255 |
model_type: AutoModelForCausalLM
|
256 |
tokenizer_type: AutoTokenizer
|
257 |
special_tokens:
|
258 |
pad_token: "<|finetune_right_pad_id|>"
|
259 |
chat_template: llama3
|
260 |
|
|
|
|
|
|
|
261 |
datasets:
|
262 |
- path: ./dataset.jsonl
|
263 |
type: chat_template
|
|
|
272 |
assistant: ["assistant"]
|
273 |
system: ["system"]
|
274 |
|
275 |
+
test_datasets: [...]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
276 |
|
277 |
+
# Training configuration
|
278 |
+
train_on_inputs: false
|
279 |
+
num_epochs: 2
|
280 |
+
micro_batch_size: 4
|
281 |
+
gradient_accumulation_steps: 2
|
282 |
+
learning_rate: 1.5e-5
|
283 |
|
284 |
+
# LoRA parameters
|
|
|
|
|
285 |
adapter: qlora
|
286 |
load_in_4bit: true
|
287 |
lora_r: 64
|
288 |
lora_alpha: 128
|
289 |
lora_dropout: 0.1
|
290 |
lora_target_linear: true
|
|
|
291 |
|
292 |
+
# Sequence handling
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
293 |
sequence_len: 8192
|
294 |
sample_packing: true
|
|
|
295 |
pad_to_sequence_len: true
|
296 |
|
297 |
+
# Hardware optimizations
|
|
|
|
|
298 |
bf16: auto
|
299 |
flash_attention: true
|
300 |
+
gradient_checkpointing: true</code>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
301 |
</div>
|
302 |
+
<p>DPO Configuration:</p>
|
303 |
+
<div style="background-color: #0a0e16; padding: 15px; border-radius: 4px; border-left: 2px solid #33ff99; margin-bottom: 20px; overflow-x: auto;">
|
304 |
+
<code style="font-family: 'JetBrains Mono', monospace; font-size: 0.85rem; display: block; white-space: pre;">base_model: ApocalypseParty/unleashed-fulldata30
|
|
|
|
|
|
|
305 |
model_type: AutoModelForCausalLM
|
306 |
tokenizer_type: AutoTokenizer
|
|
|
307 |
chat_template: tokenizer_default
|
308 |
|
309 |
+
# DPO specific
|
|
|
|
|
310 |
rl: dpo
|
311 |
rl_beta: 0.07
|
312 |
|
|
|
|
|
|
|
313 |
datasets:
|
314 |
- path: ./dpo_cleaned-v3_deduplicated.jsonl
|
315 |
type: chat_template.default
|
|
|
323 |
system: ["system"]
|
324 |
user: ["user"]
|
325 |
assistant: ["assistant"]
|
|
|
|
|
326 |
|
327 |
+
# Training configuration
|
328 |
+
train_on_inputs: false
|
329 |
+
num_epochs: 1
|
330 |
+
micro_batch_size: 4
|
331 |
+
gradient_accumulation_steps: 2
|
332 |
+
learning_rate: 2e-6
|
333 |
+
|
334 |
+
# LoRA parameters
|
335 |
adapter: qlora
|
336 |
load_in_4bit: true
|
337 |
lora_r: 32
|
338 |
lora_alpha: 64
|
339 |
lora_dropout: 0.05
|
340 |
lora_target_linear: true
|
|
|
341 |
|
342 |
+
# Sequence handling
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
343 |
sequence_len: 4096
|
344 |
pad_to_sequence_len: true
|
345 |
|
346 |
+
# Hardware optimizations
|
|
|
|
|
347 |
bf16: auto
|
|
|
348 |
flash_attention: true
|
349 |
gradient_checkpointing: offload
|
350 |
+
deepspeed: deepspeed_configs/zero1.json</code>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
351 |
</div>
|
352 |
+
<p>Full configurations are available in the repository for those interested in complete training details.</p>
|
353 |
</div>
|
354 |
</div>
|
355 |
</div>
|