|
--- |
|
license: cc-by-4.0 |
|
datasets: |
|
- kyujinpy/Open-platypus-Commercial |
|
language: |
|
- en |
|
--- |
|
**Model Details** |
|
|
|
Model Developers: *SeungJin Lee (knlpscience)* |
|
|
|
Base Model: *upstage/SOLAR-10.7B-v1.0* |
|
|
|
**Notice** |
|
|
|
***hyper params I*** |
|
|
|
-batch_size : 16 |
|
|
|
-num_epochs : 1 |
|
|
|
-micro_batch : 1 |
|
|
|
-gradient_accumulation_steps : batch_size // micro_batch |
|
|
|
***hyper params II*** |
|
|
|
-cutoff_len : 4096 |
|
|
|
-lr_scheduler : 'cosine' |
|
|
|
-warmup_ratio : 0.06 |
|
|
|
-learning_rate : 4e-4 |
|
|
|
-optimizer : 'adamw_torch' |
|
|
|
-weight_decay : 0.01 |
|
|
|
-max_grad_norm : 1.0 |
|
|
|
***LoRA config*** |
|
|
|
-lora_r : 64 |
|
|
|
-lora_alpha : 16 |
|
|
|
-lora_dropout : 0.05 |
|
|
|
-lora_target_modules : ["gate_proj", "down_proj", "up_proj"] |