Update README.md
Browse files
README.md
CHANGED
@@ -59,46 +59,39 @@ Peft Config :
|
|
59 |
|
60 |
{
|
61 |
'Technqiue' : 'QLORA',
|
62 |
-
|
63 |
'rank': 256,
|
64 |
-
|
65 |
'target_modules' : ["q_proj", "k_proj", "v_proj", "o_proj","gate_proj", "up_proj", "down_proj",],
|
66 |
-
|
67 |
'lora_alpha' : 128,
|
68 |
-
|
69 |
'lora_dropout' : 0,
|
70 |
-
|
71 |
'bias': "none",
|
72 |
-
|
73 |
}
|
74 |
|
75 |
Hyperparameters:
|
76 |
|
77 |
{
|
78 |
"epochs": 3,
|
79 |
-
|
80 |
"evaluation_strategy": "epoch",
|
81 |
-
|
82 |
"gradient_checkpointing": True,
|
83 |
-
|
84 |
"max_grad_norm" : 0.3,
|
85 |
-
|
86 |
"optimizer" : "adamw_torch_fused",
|
87 |
-
|
88 |
"learning_rate" : 2e-4,
|
89 |
-
|
90 |
"lr_scheduler_type": "constant",
|
91 |
-
|
92 |
"warmup_ratio" : 0.03,
|
93 |
-
|
94 |
-
"
|
95 |
-
|
96 |
-
"per_device_eval_batch_size" : 8,
|
97 |
-
|
98 |
"gradient_accumulation_steps" : 4
|
99 |
-
|
100 |
}
|
101 |
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
|
103 |
## Evaluation
|
104 |
|
|
|
59 |
|
60 |
{
|
61 |
'Technqiue' : 'QLORA',
|
|
|
62 |
'rank': 256,
|
|
|
63 |
'target_modules' : ["q_proj", "k_proj", "v_proj", "o_proj","gate_proj", "up_proj", "down_proj",],
|
|
|
64 |
'lora_alpha' : 128,
|
|
|
65 |
'lora_dropout' : 0,
|
|
|
66 |
'bias': "none",
|
|
|
67 |
}
|
68 |
|
69 |
Hyperparameters:
|
70 |
|
71 |
{
|
72 |
"epochs": 3,
|
|
|
73 |
"evaluation_strategy": "epoch",
|
|
|
74 |
"gradient_checkpointing": True,
|
|
|
75 |
"max_grad_norm" : 0.3,
|
|
|
76 |
"optimizer" : "adamw_torch_fused",
|
|
|
77 |
"learning_rate" : 2e-4,
|
|
|
78 |
"lr_scheduler_type": "constant",
|
|
|
79 |
"warmup_ratio" : 0.03,
|
80 |
+
"per_device_train_batch_size" : 4,
|
81 |
+
"per_device_eval_batch_size" : 4,
|
|
|
|
|
|
|
82 |
"gradient_accumulation_steps" : 4
|
|
|
83 |
}
|
84 |
```
|
85 |
+
Model was trained on 1xA100 80GB, below loss and memory consmuption details:
|
86 |
+
{'eval_loss': 0.9614351987838745, 'eval_runtime': 244.0411, 'eval_samples_per_second': 2.663, 'eval_steps_per_second': 0.668, 'epoch': 3.0}
|
87 |
+
{'train_runtime': 19718.5285, 'train_samples_per_second': 0.781, 'train_steps_per_second': 0.049, 'train_loss': 0.8241131883172602, 'epoch': 3.0}
|
88 |
+
Total training time 19720.924563884735
|
89 |
+
328.64 minutes used for training.
|
90 |
+
Peak reserved memory = 35.789 GB.
|
91 |
+
Peak reserved memory for training = 27.848 GB.
|
92 |
+
Peak reserved memory % of max memory = 45.216 %.
|
93 |
+
Peak reserved memory for training % of max memory = 35.183 %.
|
94 |
+
|
95 |
|
96 |
## Evaluation
|
97 |
|