kinleyrabgay commited on
Commit
4fccf30
·
verified ·
1 Parent(s): c299ce1

Initial release: Fine-tuned NLLB for Dzongkha-English

Browse files
Files changed (3) hide show
  1. all_results.json +5 -5
  2. train_results.json +5 -5
  3. trainer_state.json +120 -50
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 5.0,
3
- "total_flos": 2708880752640000.0,
4
- "train_loss": 1.667306510925293,
5
- "train_runtime": 2329.7908,
6
- "train_samples_per_second": 4.292,
7
- "train_steps_per_second": 0.537
8
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "total_flos": 1.35444037632e+16,
4
+ "train_loss": 0.05036833358764648,
5
+ "train_runtime": 6278.8779,
6
+ "train_samples_per_second": 7.963,
7
+ "train_steps_per_second": 0.995
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 5.0,
3
- "total_flos": 2708880752640000.0,
4
- "train_loss": 1.667306510925293,
5
- "train_runtime": 2329.7908,
6
- "train_samples_per_second": 4.292,
7
- "train_steps_per_second": 0.537
8
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "total_flos": 1.35444037632e+16,
4
+ "train_loss": 0.05036833358764648,
5
+ "train_runtime": 6278.8779,
6
+ "train_samples_per_second": 7.963,
7
+ "train_steps_per_second": 0.995
8
  }
trainer_state.json CHANGED
@@ -1,85 +1,155 @@
1
  {
2
- "best_global_step": 1250,
3
- "best_metric": 63.12399055783562,
4
- "best_model_checkpoint": "nllb-600m-dz-en-checkpoints/checkpoint-1250",
5
  "epoch": 5.0,
6
  "eval_steps": 500,
7
- "global_step": 1250,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  {
13
  "epoch": 1.0,
14
- "eval_bleu": 58.0755267273345,
15
- "eval_loss": 3.2226617336273193,
16
- "eval_runtime": 88.1796,
17
- "eval_samples_per_second": 5.67,
18
- "eval_steps_per_second": 1.418,
19
- "step": 250
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  },
21
  {
22
  "epoch": 2.0,
23
- "grad_norm": 5.708385944366455,
24
- "learning_rate": 1.8096e-05,
25
- "loss": 4.0188,
26
- "step": 500
27
  },
28
  {
29
  "epoch": 2.0,
30
- "eval_bleu": 59.852719093079365,
31
- "eval_loss": 0.36037033796310425,
32
- "eval_runtime": 100.2346,
33
- "eval_samples_per_second": 4.988,
34
- "eval_steps_per_second": 1.247,
35
- "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  },
37
  {
38
  "epoch": 3.0,
39
- "eval_bleu": 62.2628950594224,
40
- "eval_loss": 0.10864810645580292,
41
- "eval_runtime": 100.1968,
42
- "eval_samples_per_second": 4.99,
43
- "eval_steps_per_second": 1.248,
44
- "step": 750
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  },
46
  {
47
  "epoch": 4.0,
48
- "grad_norm": 0.4872799217700958,
49
- "learning_rate": 6.096e-06,
50
- "loss": 0.1201,
51
- "step": 1000
52
  },
53
  {
54
  "epoch": 4.0,
55
- "eval_bleu": 63.06198866502594,
56
- "eval_loss": 0.10180553793907166,
57
- "eval_runtime": 100.7637,
58
- "eval_samples_per_second": 4.962,
59
- "eval_steps_per_second": 1.241,
60
- "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  },
62
  {
63
  "epoch": 5.0,
64
- "eval_bleu": 63.12399055783562,
65
- "eval_loss": 0.10181548446416855,
66
- "eval_runtime": 111.015,
67
- "eval_samples_per_second": 4.504,
68
- "eval_steps_per_second": 1.126,
69
- "step": 1250
70
  },
71
  {
72
  "epoch": 5.0,
73
- "step": 1250,
74
- "total_flos": 2708880752640000.0,
75
- "train_loss": 1.667306510925293,
76
- "train_runtime": 2329.7908,
77
- "train_samples_per_second": 4.292,
78
- "train_steps_per_second": 0.537
79
  }
80
  ],
81
  "logging_steps": 500,
82
- "max_steps": 1250,
83
  "num_input_tokens_seen": 0,
84
  "num_train_epochs": 5,
85
  "save_steps": 500,
@@ -95,7 +165,7 @@
95
  "attributes": {}
96
  }
97
  },
98
- "total_flos": 2708880752640000.0,
99
  "train_batch_size": 4,
100
  "trial_name": null,
101
  "trial_params": null
 
1
  {
2
+ "best_global_step": 6250,
3
+ "best_metric": 59.5126771383472,
4
+ "best_model_checkpoint": "nllb-200-600M-dzo-eng-checkpoints/checkpoint-6250",
5
  "epoch": 5.0,
6
  "eval_steps": 500,
7
+ "global_step": 6250,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
+ {
13
+ "epoch": 0.4,
14
+ "grad_norm": 0.19286614656448364,
15
+ "learning_rate": 2.76048e-05,
16
+ "loss": 0.0794,
17
+ "step": 500
18
+ },
19
+ {
20
+ "epoch": 0.8,
21
+ "grad_norm": 0.27002865076065063,
22
+ "learning_rate": 2.52048e-05,
23
+ "loss": 0.0765,
24
+ "step": 1000
25
+ },
26
  {
27
  "epoch": 1.0,
28
+ "eval_bleu": 58.03728575479501,
29
+ "eval_loss": 0.07460231333971024,
30
+ "eval_runtime": 167.4219,
31
+ "eval_samples_per_second": 5.973,
32
+ "eval_steps_per_second": 1.493,
33
+ "step": 1250
34
+ },
35
+ {
36
+ "epoch": 1.2,
37
+ "grad_norm": 0.3723973035812378,
38
+ "learning_rate": 2.28048e-05,
39
+ "loss": 0.0644,
40
+ "step": 1500
41
+ },
42
+ {
43
+ "epoch": 1.6,
44
+ "grad_norm": 0.20845282077789307,
45
+ "learning_rate": 2.04048e-05,
46
+ "loss": 0.0576,
47
+ "step": 2000
48
  },
49
  {
50
  "epoch": 2.0,
51
+ "grad_norm": 0.3116896450519562,
52
+ "learning_rate": 1.80048e-05,
53
+ "loss": 0.0576,
54
+ "step": 2500
55
  },
56
  {
57
  "epoch": 2.0,
58
+ "eval_bleu": 58.57463505915474,
59
+ "eval_loss": 0.07282423228025436,
60
+ "eval_runtime": 168.9336,
61
+ "eval_samples_per_second": 5.919,
62
+ "eval_steps_per_second": 1.48,
63
+ "step": 2500
64
+ },
65
+ {
66
+ "epoch": 2.4,
67
+ "grad_norm": 0.2609412372112274,
68
+ "learning_rate": 1.56048e-05,
69
+ "loss": 0.0452,
70
+ "step": 3000
71
+ },
72
+ {
73
+ "epoch": 2.8,
74
+ "grad_norm": 0.16577185690402985,
75
+ "learning_rate": 1.3204800000000001e-05,
76
+ "loss": 0.0465,
77
+ "step": 3500
78
  },
79
  {
80
  "epoch": 3.0,
81
+ "eval_bleu": 59.30988278861181,
82
+ "eval_loss": 0.07350268214941025,
83
+ "eval_runtime": 171.8963,
84
+ "eval_samples_per_second": 5.817,
85
+ "eval_steps_per_second": 1.454,
86
+ "step": 3750
87
+ },
88
+ {
89
+ "epoch": 3.2,
90
+ "grad_norm": 0.1930246204137802,
91
+ "learning_rate": 1.08048e-05,
92
+ "loss": 0.0426,
93
+ "step": 4000
94
+ },
95
+ {
96
+ "epoch": 3.6,
97
+ "grad_norm": 0.3314656913280487,
98
+ "learning_rate": 8.404800000000001e-06,
99
+ "loss": 0.0381,
100
+ "step": 4500
101
  },
102
  {
103
  "epoch": 4.0,
104
+ "grad_norm": 0.26522204279899597,
105
+ "learning_rate": 6.0048000000000005e-06,
106
+ "loss": 0.0381,
107
+ "step": 5000
108
  },
109
  {
110
  "epoch": 4.0,
111
+ "eval_bleu": 59.249316493031536,
112
+ "eval_loss": 0.07584889233112335,
113
+ "eval_runtime": 168.0339,
114
+ "eval_samples_per_second": 5.951,
115
+ "eval_steps_per_second": 1.488,
116
+ "step": 5000
117
+ },
118
+ {
119
+ "epoch": 4.4,
120
+ "grad_norm": 0.17317089438438416,
121
+ "learning_rate": 3.6048e-06,
122
+ "loss": 0.0337,
123
+ "step": 5500
124
+ },
125
+ {
126
+ "epoch": 4.8,
127
+ "grad_norm": 0.32834669947624207,
128
+ "learning_rate": 1.2048e-06,
129
+ "loss": 0.033,
130
+ "step": 6000
131
  },
132
  {
133
  "epoch": 5.0,
134
+ "eval_bleu": 59.5126771383472,
135
+ "eval_loss": 0.07744310051202774,
136
+ "eval_runtime": 168.6113,
137
+ "eval_samples_per_second": 5.931,
138
+ "eval_steps_per_second": 1.483,
139
+ "step": 6250
140
  },
141
  {
142
  "epoch": 5.0,
143
+ "step": 6250,
144
+ "total_flos": 1.35444037632e+16,
145
+ "train_loss": 0.05036833358764648,
146
+ "train_runtime": 6278.8779,
147
+ "train_samples_per_second": 7.963,
148
+ "train_steps_per_second": 0.995
149
  }
150
  ],
151
  "logging_steps": 500,
152
+ "max_steps": 6250,
153
  "num_input_tokens_seen": 0,
154
  "num_train_epochs": 5,
155
  "save_steps": 500,
 
165
  "attributes": {}
166
  }
167
  },
168
+ "total_flos": 1.35444037632e+16,
169
  "train_batch_size": 4,
170
  "trial_name": null,
171
  "trial_params": null