stuser2023 commited on
Commit
1779fb2
·
verified ·
1 Parent(s): 804b71d

Training in progress, epoch 2

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:26cd8608c74765cf0d7e63a3bd73516440951ea66a7798b1db8fabaf293cd73d
3
  size 267832560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:934a427b00be68da3ed8fde7b1bcdd15d4eee49375d3c550286d6c6b91d75482
3
  size 267832560
run-2/checkpoint-4276/config.json CHANGED
@@ -20,6 +20,6 @@
20
  "sinusoidal_pos_embds": false,
21
  "tie_weights_": true,
22
  "torch_dtype": "float32",
23
- "transformers_version": "4.35.2",
24
  "vocab_size": 30522
25
  }
 
20
  "sinusoidal_pos_embds": false,
21
  "tie_weights_": true,
22
  "torch_dtype": "float32",
23
+ "transformers_version": "4.38.2",
24
  "vocab_size": 30522
25
  }
run-2/checkpoint-4276/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2641a0fb5e8098c1e4392dd7fb772522aab8196dfa550989e9b11678c3074660
3
  size 267832560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:934a427b00be68da3ed8fde7b1bcdd15d4eee49375d3c550286d6c6b91d75482
3
  size 267832560
run-2/checkpoint-4276/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:73bb318ea7868b4d3b5448eee3af2735d8e81824613de6e44e76ff1f949a34cf
3
  size 535727290
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f41bf4a3c1d763cdaef986d5e50ce022d06df3a30a5eab7fc97e283653bcb0f
3
  size 535727290
run-2/checkpoint-4276/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff56f6cf771ddd7d20265eca53b8a64af4c290f26007b82e13456dec8a4076f6
3
- size 14308
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00e672a9362c6f5f488b05417701d476fdfce4dbe9c5c1eff5573f6a228e6183
3
+ size 14244
run-2/checkpoint-4276/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c001a95dc22efb46374f1f965702e4e20dbf9fde40fb4980df8a756842d4e0ac
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d883f9bbfa01924e0f8592716e3b9514c8da859b9f26198cdd40cf5b4791cd9
3
  size 1064
run-2/checkpoint-4276/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "best_metric": 0.49329306040311344,
3
- "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-2/checkpoint-3207",
4
- "epoch": 4.0,
5
  "eval_steps": 500,
6
  "global_step": 4276,
7
  "is_hyper_param_search": true,
@@ -9,100 +9,92 @@
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.47,
13
- "learning_rate": 5.944660653119031e-06,
14
- "loss": 0.5558,
 
15
  "step": 500
16
  },
17
  {
18
- "epoch": 0.94,
19
- "learning_rate": 5.157496901381872e-06,
20
- "loss": 0.5053,
 
21
  "step": 1000
22
  },
23
  {
24
- "epoch": 1.0,
25
- "eval_loss": 0.4907635748386383,
26
- "eval_matthews_correlation": 0.42854888313294803,
27
- "eval_runtime": 0.8054,
28
- "eval_samples_per_second": 1295.012,
29
- "eval_steps_per_second": 81.947,
30
- "step": 1069
31
- },
32
- {
33
- "epoch": 1.4,
34
- "learning_rate": 4.370333149644712e-06,
35
- "loss": 0.4286,
36
  "step": 1500
37
  },
38
  {
39
- "epoch": 1.87,
40
- "learning_rate": 3.5831693979075514e-06,
41
- "loss": 0.3992,
 
42
  "step": 2000
43
  },
44
  {
45
- "epoch": 2.0,
46
- "eval_loss": 0.5015696883201599,
47
- "eval_matthews_correlation": 0.48409375114357234,
48
- "eval_runtime": 0.8234,
49
- "eval_samples_per_second": 1266.73,
50
- "eval_steps_per_second": 80.157,
51
  "step": 2138
52
  },
53
  {
54
- "epoch": 2.34,
55
- "learning_rate": 2.796005646170392e-06,
56
- "loss": 0.3434,
 
57
  "step": 2500
58
  },
59
  {
60
- "epoch": 2.81,
61
- "learning_rate": 2.008841894433232e-06,
62
- "loss": 0.3371,
 
63
  "step": 3000
64
  },
65
  {
66
- "epoch": 3.0,
67
- "eval_loss": 0.5880187749862671,
68
- "eval_matthews_correlation": 0.49329306040311344,
69
- "eval_runtime": 0.9037,
70
- "eval_samples_per_second": 1154.15,
71
- "eval_steps_per_second": 73.033,
72
- "step": 3207
73
- },
74
- {
75
- "epoch": 3.27,
76
- "learning_rate": 1.2216781426960722e-06,
77
- "loss": 0.2988,
78
  "step": 3500
79
  },
80
  {
81
- "epoch": 3.74,
82
- "learning_rate": 4.345143909589123e-07,
83
- "loss": 0.2797,
 
84
  "step": 4000
85
  },
86
  {
87
- "epoch": 4.0,
88
- "eval_loss": 0.6395880579948425,
89
- "eval_matthews_correlation": 0.49249265259737396,
90
- "eval_runtime": 1.057,
91
- "eval_samples_per_second": 986.747,
92
- "eval_steps_per_second": 62.44,
93
  "step": 4276
94
  }
95
  ],
96
  "logging_steps": 500,
97
- "max_steps": 4276,
98
- "num_train_epochs": 4,
 
99
  "save_steps": 500,
100
- "total_flos": 150793435338816.0,
 
101
  "trial_name": null,
102
  "trial_params": {
103
- "learning_rate": 6.7318244048561916e-06,
104
- "num_train_epochs": 4,
105
- "per_device_train_batch_size": 8,
106
- "seed": 14
107
  }
108
  }
 
1
  {
2
+ "best_metric": 0.4691032179514943,
3
+ "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-2/checkpoint-4276",
4
+ "epoch": 2.0,
5
  "eval_steps": 500,
6
  "global_step": 4276,
7
  "is_hyper_param_search": true,
 
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.23,
13
+ "grad_norm": 4.28505277633667,
14
+ "learning_rate": 3.0702893894484785e-06,
15
+ "loss": 0.6069,
16
  "step": 500
17
  },
18
  {
19
+ "epoch": 0.47,
20
+ "grad_norm": 9.482794761657715,
21
+ "learning_rate": 2.9196373094951675e-06,
22
+ "loss": 0.5628,
23
  "step": 1000
24
  },
25
  {
26
+ "epoch": 0.7,
27
+ "grad_norm": 22.521339416503906,
28
+ "learning_rate": 2.7689852295418565e-06,
29
+ "loss": 0.5565,
 
 
 
 
 
 
 
 
30
  "step": 1500
31
  },
32
  {
33
+ "epoch": 0.94,
34
+ "grad_norm": 26.7753849029541,
35
+ "learning_rate": 2.6183331495885454e-06,
36
+ "loss": 0.5184,
37
  "step": 2000
38
  },
39
  {
40
+ "epoch": 1.0,
41
+ "eval_loss": 0.5730993747711182,
42
+ "eval_matthews_correlation": 0.3853198145814999,
43
+ "eval_runtime": 0.7612,
44
+ "eval_samples_per_second": 1370.225,
45
+ "eval_steps_per_second": 86.706,
46
  "step": 2138
47
  },
48
  {
49
+ "epoch": 1.17,
50
+ "grad_norm": 17.77669334411621,
51
+ "learning_rate": 2.4676810696352344e-06,
52
+ "loss": 0.4619,
53
  "step": 2500
54
  },
55
  {
56
+ "epoch": 1.4,
57
+ "grad_norm": 37.4239387512207,
58
+ "learning_rate": 2.3170289896819234e-06,
59
+ "loss": 0.5014,
60
  "step": 3000
61
  },
62
  {
63
+ "epoch": 1.64,
64
+ "grad_norm": 46.75569534301758,
65
+ "learning_rate": 2.1663769097286124e-06,
66
+ "loss": 0.492,
 
 
 
 
 
 
 
 
67
  "step": 3500
68
  },
69
  {
70
+ "epoch": 1.87,
71
+ "grad_norm": 66.9134750366211,
72
+ "learning_rate": 2.0157248297753013e-06,
73
+ "loss": 0.4809,
74
  "step": 4000
75
  },
76
  {
77
+ "epoch": 2.0,
78
+ "eval_loss": 0.6646500825881958,
79
+ "eval_matthews_correlation": 0.4691032179514943,
80
+ "eval_runtime": 0.8224,
81
+ "eval_samples_per_second": 1268.193,
82
+ "eval_steps_per_second": 80.25,
83
  "step": 4276
84
  }
85
  ],
86
  "logging_steps": 500,
87
+ "max_steps": 10690,
88
+ "num_input_tokens_seen": 0,
89
+ "num_train_epochs": 5,
90
  "save_steps": 500,
91
+ "total_flos": 65200091402940.0,
92
+ "train_batch_size": 4,
93
  "trial_name": null,
94
  "trial_params": {
95
+ "learning_rate": 3.2209414694017896e-06,
96
+ "num_train_epochs": 5,
97
+ "per_device_train_batch_size": 4,
98
+ "seed": 16
99
  }
100
  }
run-2/checkpoint-4276/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:618b6f744377ad587ba25f1af0a1c4f702e5743116ab1fe52e9ecc52d00dc59b
3
- size 4664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88d2c3d6804ca2d9d22cb74f328c5ae8ec320f8d12a0ef15ea5ae2037f02bd85
3
+ size 4984
runs/Mar16_01-46-29_6e5f088ca464/events.out.tfevents.1710554238.6e5f088ca464.226.4 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2728a30252b8c7698d4b1cbbee9e9398c0d44f7fa3ab038a041c0d6da1827ad
3
- size 5979
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e275f4a807e39d73a49dad5d1ea7c25874258e28fe23616f020c0ad00492050
3
+ size 7158