sandernotenbaert commited on
Commit
21cecc5
·
verified ·
1 Parent(s): e819eca

Training in progress, step 1000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5be01825ddc487915292db37d62ff62b8d6ed0f8c74279a313c72a200f7d91f7
3
  size 1783055976
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:acc87b56bfea0921f67b82fd6fb0ff13a0dce7525f5dcac74b700919b5b7da60
3
  size 1783055976
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa4e94e49b6113a881a4e33ab16b3c39d397235b7cffc6987b8fbcca73489c72
3
  size 3566169867
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fe5a8e62d2a35845881303c9c91d1fd60669d4548386f6d6eb35cdd3962cfe5
3
  size 3566169867
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e3a77d4a8b98ce027a4d6a3b9fb5d7c904e27ec1efd5c0468c24fa26bb738316
3
  size 14455
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dba4fde4ee04d2f472bb4dea96a48e8fdf7891d2b0694a8f012e8133a2e176ae
3
  size 14455
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1c50f7ca4fd505471874984aa92be108b5378e85599e099ca2e0075af31141d8
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:579bb6a92f2b71fde64b38b0a9a0620dcd61523c3257dcf20e2dc548cb235123
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.04067603571355936,
6
  "eval_steps": 500,
7
- "global_step": 500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -86,6 +86,84 @@
86
  "eval_samples_per_second": 3.094,
87
  "eval_steps_per_second": 0.774,
88
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  }
90
  ],
91
  "logging_steps": 50,
@@ -105,7 +183,7 @@
105
  "attributes": {}
106
  }
107
  },
108
- "total_flos": 2.1883244249088e+16,
109
  "train_batch_size": 4,
110
  "trial_name": null,
111
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.08135207142711871,
6
  "eval_steps": 500,
7
+ "global_step": 1000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
86
  "eval_samples_per_second": 3.094,
87
  "eval_steps_per_second": 0.774,
88
  "step": 500
89
+ },
90
+ {
91
+ "epoch": 0.044743639284915294,
92
+ "grad_norm": 10.40243148803711,
93
+ "learning_rate": 2.4796747967479675e-05,
94
+ "loss": 2.9854,
95
+ "step": 550
96
+ },
97
+ {
98
+ "epoch": 0.048811242856271225,
99
+ "grad_norm": 9.785271644592285,
100
+ "learning_rate": 2.7055103884372178e-05,
101
+ "loss": 2.918,
102
+ "step": 600
103
+ },
104
+ {
105
+ "epoch": 0.05287884642762716,
106
+ "grad_norm": 9.53836727142334,
107
+ "learning_rate": 2.931345980126468e-05,
108
+ "loss": 2.9519,
109
+ "step": 650
110
+ },
111
+ {
112
+ "epoch": 0.0569464499989831,
113
+ "grad_norm": 9.973458290100098,
114
+ "learning_rate": 3.1571815718157185e-05,
115
+ "loss": 2.9703,
116
+ "step": 700
117
+ },
118
+ {
119
+ "epoch": 0.06101405357033903,
120
+ "grad_norm": 9.470365524291992,
121
+ "learning_rate": 3.3830171635049685e-05,
122
+ "loss": 2.9412,
123
+ "step": 750
124
+ },
125
+ {
126
+ "epoch": 0.06508165714169498,
127
+ "grad_norm": 9.310904502868652,
128
+ "learning_rate": 3.6088527551942185e-05,
129
+ "loss": 2.9269,
130
+ "step": 800
131
+ },
132
+ {
133
+ "epoch": 0.0691492607130509,
134
+ "grad_norm": 9.931621551513672,
135
+ "learning_rate": 3.8346883468834685e-05,
136
+ "loss": 2.8926,
137
+ "step": 850
138
+ },
139
+ {
140
+ "epoch": 0.07321686428440684,
141
+ "grad_norm": 10.827827453613281,
142
+ "learning_rate": 4.060523938572719e-05,
143
+ "loss": 2.8518,
144
+ "step": 900
145
+ },
146
+ {
147
+ "epoch": 0.07728446785576278,
148
+ "grad_norm": 10.306633949279785,
149
+ "learning_rate": 4.28635953026197e-05,
150
+ "loss": 2.8674,
151
+ "step": 950
152
+ },
153
+ {
154
+ "epoch": 0.08135207142711871,
155
+ "grad_norm": 9.62368106842041,
156
+ "learning_rate": 4.51219512195122e-05,
157
+ "loss": 2.8594,
158
+ "step": 1000
159
+ },
160
+ {
161
+ "epoch": 0.08135207142711871,
162
+ "eval_loss": 2.8507587909698486,
163
+ "eval_runtime": 642.7633,
164
+ "eval_samples_per_second": 3.091,
165
+ "eval_steps_per_second": 0.773,
166
+ "step": 1000
167
  }
168
  ],
169
  "logging_steps": 50,
 
183
  "attributes": {}
184
  }
185
  },
186
+ "total_flos": 4.3766488498176e+16,
187
  "train_batch_size": 4,
188
  "trial_name": null,
189
  "trial_params": null