tarabukinivan commited on
Commit
d63c5ab
·
verified ·
1 Parent(s): 30e32f4

Training in progress, step 36, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:35965e106d6c24f57f6add5fe6f1a22a20181eb32fb6d0a32f9233febf452608
3
  size 83945296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af45f5a20c7781427e7d13de0ca4866a1cf71bade19be14ca66dbe14b1506f58
3
  size 83945296
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d0692876634f0af2937bee77494f409c7849c1e8c3f39ae0055cff65b167315
3
  size 168149074
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6cecc7bb05c4bb579b040038b3f705ed45416975b87e35007c8e22c6569d7a45
3
  size 168149074
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:834583889f2c9cb654ef5c623b5eaf9441713a3db16fdde1d4afd1caee298f82
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7034eba1ae19e0d8d26b6a7beae9c6e8303193d228a3589cb78845a2ac46c6a
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f2cf2c4048d6740354979367cfe53d0c735909b56d447ba3e528d55c38895176
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24f70974ebe6e16031ce63527ce3fea95f8e56e83073513783f6d8a14f9aa0e8
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.05508798775822494,
5
  "eval_steps": 5,
6
- "global_step": 18,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -81,6 +81,80 @@
81
  "learning_rate": 0.0002,
82
  "loss": 0.7708,
83
  "step": 18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  }
85
  ],
86
  "logging_steps": 3,
@@ -100,7 +174,7 @@
100
  "attributes": {}
101
  }
102
  },
103
- "total_flos": 6658431293325312.0,
104
  "train_batch_size": 2,
105
  "trial_name": null,
106
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.11017597551644988,
5
  "eval_steps": 5,
6
+ "global_step": 36,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
81
  "learning_rate": 0.0002,
82
  "loss": 0.7708,
83
  "step": 18
84
+ },
85
+ {
86
+ "epoch": 0.061208875286916604,
87
+ "eval_loss": 0.7175387144088745,
88
+ "eval_runtime": 26.0285,
89
+ "eval_samples_per_second": 5.302,
90
+ "eval_steps_per_second": 2.651,
91
+ "step": 20
92
+ },
93
+ {
94
+ "epoch": 0.06426931905126243,
95
+ "grad_norm": 0.667968213558197,
96
+ "learning_rate": 0.0001879473751206489,
97
+ "loss": 0.7045,
98
+ "step": 21
99
+ },
100
+ {
101
+ "epoch": 0.07345065034429993,
102
+ "grad_norm": 0.618311882019043,
103
+ "learning_rate": 0.00015469481581224272,
104
+ "loss": 0.6921,
105
+ "step": 24
106
+ },
107
+ {
108
+ "epoch": 0.07651109410864575,
109
+ "eval_loss": 0.6743873953819275,
110
+ "eval_runtime": 26.0301,
111
+ "eval_samples_per_second": 5.302,
112
+ "eval_steps_per_second": 2.651,
113
+ "step": 25
114
+ },
115
+ {
116
+ "epoch": 0.08263198163733741,
117
+ "grad_norm": 0.48745396733283997,
118
+ "learning_rate": 0.00010825793454723325,
119
+ "loss": 0.6098,
120
+ "step": 27
121
+ },
122
+ {
123
+ "epoch": 0.09181331293037491,
124
+ "grad_norm": 0.530785083770752,
125
+ "learning_rate": 5.983045753470308e-05,
126
+ "loss": 0.611,
127
+ "step": 30
128
+ },
129
+ {
130
+ "epoch": 0.09181331293037491,
131
+ "eval_loss": 0.6592618227005005,
132
+ "eval_runtime": 26.2376,
133
+ "eval_samples_per_second": 5.26,
134
+ "eval_steps_per_second": 2.63,
135
+ "step": 30
136
+ },
137
+ {
138
+ "epoch": 0.1009946442234124,
139
+ "grad_norm": 0.5798205733299255,
140
+ "learning_rate": 2.1085949060360654e-05,
141
+ "loss": 0.7136,
142
+ "step": 33
143
+ },
144
+ {
145
+ "epoch": 0.10711553175210406,
146
+ "eval_loss": 0.6530157923698425,
147
+ "eval_runtime": 26.1128,
148
+ "eval_samples_per_second": 5.285,
149
+ "eval_steps_per_second": 2.642,
150
+ "step": 35
151
+ },
152
+ {
153
+ "epoch": 0.11017597551644988,
154
+ "grad_norm": 0.5891212821006775,
155
+ "learning_rate": 1.3638696597277679e-06,
156
+ "loss": 0.6765,
157
+ "step": 36
158
  }
159
  ],
160
  "logging_steps": 3,
 
174
  "attributes": {}
175
  }
176
  },
177
+ "total_flos": 1.340934079905792e+16,
178
  "train_batch_size": 2,
179
  "trial_name": null,
180
  "trial_params": null