DewiBrynJones commited on
Commit
53aa0f9
·
verified ·
1 Parent(s): e7ffeb2

End of training

Browse files
Files changed (4) hide show
  1. README.md +2 -0
  2. all_results.json +6 -6
  3. train_results.json +6 -6
  4. trainer_state.json +17 -73
README.md CHANGED
@@ -3,6 +3,8 @@ library_name: transformers
3
  license: apache-2.0
4
  base_model: facebook/wav2vec2-xls-r-1b
5
  tags:
 
 
6
  - generated_from_trainer
7
  model-index:
8
  - name: wav2vec2-xls-r-1b-ft-yt
 
3
  license: apache-2.0
4
  base_model: facebook/wav2vec2-xls-r-1b
5
  tags:
6
+ - automatic-speech-recognition
7
+ - ./data-configs/youtube.json
8
  - generated_from_trainer
9
  model-index:
10
  - name: wav2vec2-xls-r-1b-ft-yt
all_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 0.06424945923371812,
3
- "total_flos": 2.5256461458914173e+19,
4
- "train_loss": 0.4072261555989583,
5
- "train_runtime": 8835.4203,
6
  "train_samples": 373544,
7
- "train_samples_per_second": 2.716,
8
- "train_steps_per_second": 0.679
9
  }
 
1
  {
2
+ "epoch": 0.02141648641123937,
3
+ "total_flos": 8.380982415051637e+18,
4
+ "train_loss": 2.956299072265625,
5
+ "train_runtime": 3006.0711,
6
  "train_samples": 373544,
7
+ "train_samples_per_second": 2.661,
8
+ "train_steps_per_second": 0.665
9
  }
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 0.06424945923371812,
3
- "total_flos": 2.5256461458914173e+19,
4
- "train_loss": 0.4072261555989583,
5
- "train_runtime": 8835.4203,
6
  "train_samples": 373544,
7
- "train_samples_per_second": 2.716,
8
- "train_steps_per_second": 0.679
9
  }
 
1
  {
2
+ "epoch": 0.02141648641123937,
3
+ "total_flos": 8.380982415051637e+18,
4
+ "train_loss": 2.956299072265625,
5
+ "train_runtime": 3006.0711,
6
  "train_samples": 373544,
7
+ "train_samples_per_second": 2.661,
8
+ "train_steps_per_second": 0.665
9
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.06424945923371812,
5
  "eval_steps": 500,
6
- "global_step": 6000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -11,102 +11,46 @@
11
  {
12
  "epoch": 0.005354121602809843,
13
  "grad_norm": NaN,
14
- "learning_rate": 0.00025,
15
- "loss": 4.8867,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 0.010708243205619685,
20
  "grad_norm": NaN,
21
- "learning_rate": 0.0002777777777777778,
22
  "loss": 0.0,
23
  "step": 1000
24
  },
25
  {
26
  "epoch": 0.01606236480842953,
27
  "grad_norm": NaN,
28
- "learning_rate": 0.00025,
29
  "loss": 0.0,
30
  "step": 1500
31
  },
32
  {
33
  "epoch": 0.02141648641123937,
34
  "grad_norm": NaN,
35
- "learning_rate": 0.00022222222222222218,
36
- "loss": 0.0,
37
- "step": 2000
38
- },
39
- {
40
- "epoch": 0.026770608014049214,
41
- "grad_norm": NaN,
42
- "learning_rate": 0.00019444444444444443,
43
- "loss": 0.0,
44
- "step": 2500
45
- },
46
- {
47
- "epoch": 0.03212472961685906,
48
- "grad_norm": NaN,
49
- "learning_rate": 0.00016666666666666666,
50
- "loss": 0.0,
51
- "step": 3000
52
- },
53
- {
54
- "epoch": 0.0374788512196689,
55
- "grad_norm": NaN,
56
- "learning_rate": 0.0001388888888888889,
57
- "loss": 0.0,
58
- "step": 3500
59
- },
60
- {
61
- "epoch": 0.04283297282247874,
62
- "grad_norm": NaN,
63
- "learning_rate": 0.00011111111111111109,
64
- "loss": 0.0,
65
- "step": 4000
66
- },
67
- {
68
- "epoch": 0.04818709442528859,
69
- "grad_norm": NaN,
70
- "learning_rate": 8.333333333333333e-05,
71
- "loss": 0.0,
72
- "step": 4500
73
- },
74
- {
75
- "epoch": 0.05354121602809843,
76
- "grad_norm": NaN,
77
- "learning_rate": 5.5555555555555545e-05,
78
- "loss": 0.0,
79
- "step": 5000
80
- },
81
- {
82
- "epoch": 0.058895337630908276,
83
- "grad_norm": NaN,
84
- "learning_rate": 2.7777777777777772e-05,
85
- "loss": 0.0,
86
- "step": 5500
87
- },
88
- {
89
- "epoch": 0.06424945923371812,
90
- "grad_norm": NaN,
91
  "learning_rate": 0.0,
92
  "loss": 0.0,
93
- "step": 6000
94
  },
95
  {
96
- "epoch": 0.06424945923371812,
97
- "step": 6000,
98
- "total_flos": 2.5256461458914173e+19,
99
- "train_loss": 0.4072261555989583,
100
- "train_runtime": 8835.4203,
101
- "train_samples_per_second": 2.716,
102
- "train_steps_per_second": 0.679
103
  }
104
  ],
105
  "logging_steps": 500,
106
- "max_steps": 6000,
107
  "num_input_tokens_seen": 0,
108
  "num_train_epochs": 1,
109
- "save_steps": 1000,
110
  "stateful_callbacks": {
111
  "TrainerControl": {
112
  "args": {
@@ -119,7 +63,7 @@
119
  "attributes": {}
120
  }
121
  },
122
- "total_flos": 2.5256461458914173e+19,
123
  "train_batch_size": 4,
124
  "trial_name": null,
125
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.02141648641123937,
5
  "eval_steps": 500,
6
+ "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
11
  {
12
  "epoch": 0.005354121602809843,
13
  "grad_norm": NaN,
14
+ "learning_rate": 2.5e-06,
15
+ "loss": 11.8252,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 0.010708243205619685,
20
  "grad_norm": NaN,
21
+ "learning_rate": 1.6666666666666669e-06,
22
  "loss": 0.0,
23
  "step": 1000
24
  },
25
  {
26
  "epoch": 0.01606236480842953,
27
  "grad_norm": NaN,
28
+ "learning_rate": 8.333333333333334e-07,
29
  "loss": 0.0,
30
  "step": 1500
31
  },
32
  {
33
  "epoch": 0.02141648641123937,
34
  "grad_norm": NaN,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  "learning_rate": 0.0,
36
  "loss": 0.0,
37
+ "step": 2000
38
  },
39
  {
40
+ "epoch": 0.02141648641123937,
41
+ "step": 2000,
42
+ "total_flos": 8.380982415051637e+18,
43
+ "train_loss": 2.956299072265625,
44
+ "train_runtime": 3006.0711,
45
+ "train_samples_per_second": 2.661,
46
+ "train_steps_per_second": 0.665
47
  }
48
  ],
49
  "logging_steps": 500,
50
+ "max_steps": 2000,
51
  "num_input_tokens_seen": 0,
52
  "num_train_epochs": 1,
53
+ "save_steps": 500,
54
  "stateful_callbacks": {
55
  "TrainerControl": {
56
  "args": {
 
63
  "attributes": {}
64
  }
65
  },
66
+ "total_flos": 8.380982415051637e+18,
67
  "train_batch_size": 4,
68
  "trial_name": null,
69
  "trial_params": null