anamikac2708 commited on
Commit
bd3580d
·
verified ·
1 Parent(s): 3e5999e

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +12 -19
README.md CHANGED
@@ -59,46 +59,39 @@ Peft Config :
59
 
60
  {
61
  'Technqiue' : 'QLORA',
62
-
63
  'rank': 256,
64
-
65
  'target_modules' : ["q_proj", "k_proj", "v_proj", "o_proj","gate_proj", "up_proj", "down_proj",],
66
-
67
  'lora_alpha' : 128,
68
-
69
  'lora_dropout' : 0,
70
-
71
  'bias': "none",
72
-
73
  }
74
 
75
  Hyperparameters:
76
 
77
  {
78
  "epochs": 3,
79
-
80
  "evaluation_strategy": "epoch",
81
-
82
  "gradient_checkpointing": True,
83
-
84
  "max_grad_norm" : 0.3,
85
-
86
  "optimizer" : "adamw_torch_fused",
87
-
88
  "learning_rate" : 2e-4,
89
-
90
  "lr_scheduler_type": "constant",
91
-
92
  "warmup_ratio" : 0.03,
93
-
94
- "per_device_train_batch_size" : 8,
95
-
96
- "per_device_eval_batch_size" : 8,
97
-
98
  "gradient_accumulation_steps" : 4
99
-
100
  }
101
  ```
 
 
 
 
 
 
 
 
 
 
102
 
103
  ## Evaluation
104
 
 
59
 
60
  {
61
  'Technqiue' : 'QLORA',
 
62
  'rank': 256,
 
63
  'target_modules' : ["q_proj", "k_proj", "v_proj", "o_proj","gate_proj", "up_proj", "down_proj",],
 
64
  'lora_alpha' : 128,
 
65
  'lora_dropout' : 0,
 
66
  'bias': "none",
 
67
  }
68
 
69
  Hyperparameters:
70
 
71
  {
72
  "epochs": 3,
 
73
  "evaluation_strategy": "epoch",
 
74
  "gradient_checkpointing": True,
 
75
  "max_grad_norm" : 0.3,
 
76
  "optimizer" : "adamw_torch_fused",
 
77
  "learning_rate" : 2e-4,
 
78
  "lr_scheduler_type": "constant",
 
79
  "warmup_ratio" : 0.03,
80
+ "per_device_train_batch_size" : 4,
81
+ "per_device_eval_batch_size" : 4,
 
 
 
82
  "gradient_accumulation_steps" : 4
 
83
  }
84
  ```
85
+ Model was trained on 1xA100 80GB, below loss and memory consmuption details:
86
+ {'eval_loss': 0.9614351987838745, 'eval_runtime': 244.0411, 'eval_samples_per_second': 2.663, 'eval_steps_per_second': 0.668, 'epoch': 3.0}
87
+ {'train_runtime': 19718.5285, 'train_samples_per_second': 0.781, 'train_steps_per_second': 0.049, 'train_loss': 0.8241131883172602, 'epoch': 3.0}
88
+ Total training time 19720.924563884735
89
+ 328.64 minutes used for training.
90
+ Peak reserved memory = 35.789 GB.
91
+ Peak reserved memory for training = 27.848 GB.
92
+ Peak reserved memory % of max memory = 45.216 %.
93
+ Peak reserved memory for training % of max memory = 35.183 %.
94
+
95
 
96
  ## Evaluation
97