SjardiWillems commited on
Commit
0a4ee9f
·
verified ·
1 Parent(s): 296eee4

Training in progress, epoch 1

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b70b25fb624ba97ba62cf6366699b1804f9ade1a5d4b0d8d7726632eb25e5f31
3
  size 267829484
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84bec32edce0c34ec348a06616c8c7aa85c9f1a33d5d27ae77ad3e090d412754
3
  size 267829484
run-3/checkpoint-288/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d61c763c886dbb531d62293ccddf79a5802b2bb562413c4d5db455ce0ca8a2fb
3
  size 267829484
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e70149ba23dd0d2f866d218b9336961ec98113bb1af5729569e5b571ffd29e9f
3
  size 267829484
run-3/checkpoint-288/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3dbce5211cc54e0465ac8be2041b7c2acfcfc05c146d963020091964689ca99d
3
  size 535721146
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89a83c71a35c4c8a47c9f9d461682720d58447a2d19c908bf8f73ad93239b467
3
  size 535721146
run-3/checkpoint-288/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b4567d37c30077318ed7ca2fd6ed8c79fd4e9422f26b3d853c9c457274f3e99
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa0756249ef89554273490d3f25c592cf35d7de3c714c99d3ee3ec2bbb73a5d4
3
  size 14244
run-3/checkpoint-288/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6873788c8492efd7d168b02739521070490d7c8a8f85aee5192671cdda219904
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e813b361e3bb66e575ae412545ecddb9e1a2907d17551c0c7b2959b9d9137cd8
3
  size 1064
run-3/checkpoint-288/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.8252063306594624,
3
  "best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-3/checkpoint-288",
4
  "epoch": 2.0,
5
  "eval_steps": 500,
@@ -10,22 +10,22 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_loss": 0.9698190689086914,
14
- "eval_pearson": 0.7678320454774757,
15
- "eval_runtime": 0.8035,
16
- "eval_samples_per_second": 1866.77,
17
- "eval_spearmanr": 0.712509933248186,
18
- "eval_steps_per_second": 116.984,
19
  "step": 144
20
  },
21
  {
22
  "epoch": 2.0,
23
- "eval_loss": 0.7606285810470581,
24
- "eval_pearson": 0.8252063306594624,
25
- "eval_runtime": 0.7878,
26
- "eval_samples_per_second": 1904.014,
27
- "eval_spearmanr": 0.8218529358151725,
28
- "eval_steps_per_second": 119.318,
29
  "step": 288
30
  }
31
  ],
@@ -38,9 +38,9 @@
38
  "train_batch_size": 4,
39
  "trial_name": null,
40
  "trial_params": {
41
- "learning_rate": 5.4134010471541346e-05,
42
  "num_train_epochs": 5,
43
  "per_device_train_batch_size": 4,
44
- "seed": 4
45
  }
46
  }
 
1
  {
2
+ "best_metric": 0.8294526923897652,
3
  "best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-3/checkpoint-288",
4
  "epoch": 2.0,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_loss": 1.0159441232681274,
14
+ "eval_pearson": 0.7754149460856693,
15
+ "eval_runtime": 0.9276,
16
+ "eval_samples_per_second": 1617.073,
17
+ "eval_spearmanr": 0.7770625188634587,
18
+ "eval_steps_per_second": 101.337,
19
  "step": 144
20
  },
21
  {
22
  "epoch": 2.0,
23
+ "eval_loss": 0.7027656435966492,
24
+ "eval_pearson": 0.8294526923897652,
25
+ "eval_runtime": 1.1203,
26
+ "eval_samples_per_second": 1338.921,
27
+ "eval_spearmanr": 0.8239919782128128,
28
+ "eval_steps_per_second": 83.906,
29
  "step": 288
30
  }
31
  ],
 
38
  "train_batch_size": 4,
39
  "trial_name": null,
40
  "trial_params": {
41
+ "learning_rate": 3.444308458582572e-05,
42
  "num_train_epochs": 5,
43
  "per_device_train_batch_size": 4,
44
+ "seed": 38
45
  }
46
  }
run-3/checkpoint-288/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a89dbe21e169d083920168fe1c35f86c15d94a0c3b9ca2615dabe1e0930b053d
3
  size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4821c7433b1b1cc47986c64fa81c811ca3c1f9863a29a79f5e5c8b19c0b27c05
3
  size 4920
run-3/checkpoint-432/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:54ee921761736374c58efdf205e400cb47f16270af5b9033facba3bb7383e53a
3
  size 267829484
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4868147f9f64d8d4e4ba0994d128b9aa79ef2693115460ecba2d47291a2e064f
3
  size 267829484
run-3/checkpoint-432/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7ac1fa64753adc4283dbcfd47227812a681fdced7ace3e7ccdb93f6e6b9336a5
3
  size 535721146
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:700b4b368b7b1f8c8ce908d043229c8e86e8657842ed24bba642b02baecb35c6
3
  size 535721146
run-3/checkpoint-432/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a35f605eedd2102adde6198f38cba143faaed4ee12b11d5ffe4f701e82b262f2
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:808e511081723cb6acf19ed232057cc3dd2d7b62e1a05317219f9befc0215f61
3
  size 14244
run-3/checkpoint-432/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9422fdad9996632c1797fb4769bcfaa4d1bf52fc06a431fa4296855752c582f2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb739c138f564a5262bc1447b75225686d934caaecccce181d2b86227142035c
3
  size 1064
run-3/checkpoint-432/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.8325197619129224,
3
  "best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-3/checkpoint-432",
4
  "epoch": 3.0,
5
  "eval_steps": 500,
@@ -10,32 +10,32 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_loss": 0.9698190689086914,
14
- "eval_pearson": 0.7678320454774757,
15
- "eval_runtime": 0.8035,
16
- "eval_samples_per_second": 1866.77,
17
- "eval_spearmanr": 0.712509933248186,
18
- "eval_steps_per_second": 116.984,
19
  "step": 144
20
  },
21
  {
22
  "epoch": 2.0,
23
- "eval_loss": 0.7606285810470581,
24
- "eval_pearson": 0.8252063306594624,
25
- "eval_runtime": 0.7878,
26
- "eval_samples_per_second": 1904.014,
27
- "eval_spearmanr": 0.8218529358151725,
28
- "eval_steps_per_second": 119.318,
29
  "step": 288
30
  },
31
  {
32
  "epoch": 3.0,
33
- "eval_loss": 0.7174907326698303,
34
- "eval_pearson": 0.8325197619129224,
35
- "eval_runtime": 0.7962,
36
- "eval_samples_per_second": 1883.934,
37
- "eval_spearmanr": 0.8279573830775626,
38
- "eval_steps_per_second": 118.06,
39
  "step": 432
40
  }
41
  ],
@@ -48,9 +48,9 @@
48
  "train_batch_size": 4,
49
  "trial_name": null,
50
  "trial_params": {
51
- "learning_rate": 5.4134010471541346e-05,
52
  "num_train_epochs": 5,
53
  "per_device_train_batch_size": 4,
54
- "seed": 4
55
  }
56
  }
 
1
  {
2
+ "best_metric": 0.8371525320180763,
3
  "best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-3/checkpoint-432",
4
  "epoch": 3.0,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_loss": 1.0159441232681274,
14
+ "eval_pearson": 0.7754149460856693,
15
+ "eval_runtime": 0.9276,
16
+ "eval_samples_per_second": 1617.073,
17
+ "eval_spearmanr": 0.7770625188634587,
18
+ "eval_steps_per_second": 101.337,
19
  "step": 144
20
  },
21
  {
22
  "epoch": 2.0,
23
+ "eval_loss": 0.7027656435966492,
24
+ "eval_pearson": 0.8294526923897652,
25
+ "eval_runtime": 1.1203,
26
+ "eval_samples_per_second": 1338.921,
27
+ "eval_spearmanr": 0.8239919782128128,
28
+ "eval_steps_per_second": 83.906,
29
  "step": 288
30
  },
31
  {
32
  "epoch": 3.0,
33
+ "eval_loss": 0.6792955994606018,
34
+ "eval_pearson": 0.8371525320180763,
35
+ "eval_runtime": 1.1241,
36
+ "eval_samples_per_second": 1334.415,
37
+ "eval_spearmanr": 0.8358915789303897,
38
+ "eval_steps_per_second": 83.623,
39
  "step": 432
40
  }
41
  ],
 
48
  "train_batch_size": 4,
49
  "trial_name": null,
50
  "trial_params": {
51
+ "learning_rate": 3.444308458582572e-05,
52
  "num_train_epochs": 5,
53
  "per_device_train_batch_size": 4,
54
+ "seed": 38
55
  }
56
  }
run-3/checkpoint-432/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a89dbe21e169d083920168fe1c35f86c15d94a0c3b9ca2615dabe1e0930b053d
3
  size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4821c7433b1b1cc47986c64fa81c811ca3c1f9863a29a79f5e5c8b19c0b27c05
3
  size 4920
run-3/checkpoint-576/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d3d6088776c555a453aba313194e46ee79702e29ce42f01cfa8a9c6961bc9c8
3
  size 267829484
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0252f09672f42eb2def3a2d92dafa09af532b35ad3edace25eb853de6dc06ba
3
  size 267829484
run-3/checkpoint-576/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:266bfd784fb8d7d7deff582f100b5c733e610ba4b2b088848056436bc04f81be
3
  size 535721146
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64ecde12fc2a704b464816f349a6de99b4e9647b5ec943f520b69c97f3f472e6
3
  size 535721146
run-3/checkpoint-576/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:866764b15a22ef1ff0938a6647b07c0c9c90e6d2381339fde66f0f71097dcc37
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa277fec9a04a686204af70247a8b4e3568d8153e92f5bd3a9eb50c7d3e807a1
3
  size 14244
run-3/checkpoint-576/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb27dabe46c6c138fbd2a477aac978f51b59d763c234a73ac470f1f9c0286e4a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3bfa2eaedf155ed3818f2d7439057a71e6beb4a37ac73fd5d2e8e33136054d8
3
  size 1064
run-3/checkpoint-576/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.8407800570738724,
3
  "best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-3/checkpoint-576",
4
  "epoch": 4.0,
5
  "eval_steps": 500,
@@ -10,49 +10,49 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_loss": 0.9698190689086914,
14
- "eval_pearson": 0.7678320454774757,
15
- "eval_runtime": 0.8035,
16
- "eval_samples_per_second": 1866.77,
17
- "eval_spearmanr": 0.712509933248186,
18
- "eval_steps_per_second": 116.984,
19
  "step": 144
20
  },
21
  {
22
  "epoch": 2.0,
23
- "eval_loss": 0.7606285810470581,
24
- "eval_pearson": 0.8252063306594624,
25
- "eval_runtime": 0.7878,
26
- "eval_samples_per_second": 1904.014,
27
- "eval_spearmanr": 0.8218529358151725,
28
- "eval_steps_per_second": 119.318,
29
  "step": 288
30
  },
31
  {
32
  "epoch": 3.0,
33
- "eval_loss": 0.7174907326698303,
34
- "eval_pearson": 0.8325197619129224,
35
- "eval_runtime": 0.7962,
36
- "eval_samples_per_second": 1883.934,
37
- "eval_spearmanr": 0.8279573830775626,
38
- "eval_steps_per_second": 118.06,
39
  "step": 432
40
  },
41
  {
42
  "epoch": 3.47,
43
- "grad_norm": 10.061558723449707,
44
- "learning_rate": 1.654094764408208e-05,
45
- "loss": 0.9316,
46
  "step": 500
47
  },
48
  {
49
  "epoch": 4.0,
50
- "eval_loss": 0.7175036072731018,
51
- "eval_pearson": 0.8407800570738724,
52
- "eval_runtime": 0.7992,
53
- "eval_samples_per_second": 1876.812,
54
- "eval_spearmanr": 0.8358993750524432,
55
- "eval_steps_per_second": 117.614,
56
  "step": 576
57
  }
58
  ],
@@ -61,13 +61,13 @@
61
  "num_input_tokens_seen": 0,
62
  "num_train_epochs": 5,
63
  "save_steps": 500,
64
- "total_flos": 22512846870384.0,
65
  "train_batch_size": 4,
66
  "trial_name": null,
67
  "trial_params": {
68
- "learning_rate": 5.4134010471541346e-05,
69
  "num_train_epochs": 5,
70
  "per_device_train_batch_size": 4,
71
- "seed": 4
72
  }
73
  }
 
1
  {
2
+ "best_metric": 0.8416256812127808,
3
  "best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-3/checkpoint-576",
4
  "epoch": 4.0,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_loss": 1.0159441232681274,
14
+ "eval_pearson": 0.7754149460856693,
15
+ "eval_runtime": 0.9276,
16
+ "eval_samples_per_second": 1617.073,
17
+ "eval_spearmanr": 0.7770625188634587,
18
+ "eval_steps_per_second": 101.337,
19
  "step": 144
20
  },
21
  {
22
  "epoch": 2.0,
23
+ "eval_loss": 0.7027656435966492,
24
+ "eval_pearson": 0.8294526923897652,
25
+ "eval_runtime": 1.1203,
26
+ "eval_samples_per_second": 1338.921,
27
+ "eval_spearmanr": 0.8239919782128128,
28
+ "eval_steps_per_second": 83.906,
29
  "step": 288
30
  },
31
  {
32
  "epoch": 3.0,
33
+ "eval_loss": 0.6792955994606018,
34
+ "eval_pearson": 0.8371525320180763,
35
+ "eval_runtime": 1.1241,
36
+ "eval_samples_per_second": 1334.415,
37
+ "eval_spearmanr": 0.8358915789303897,
38
+ "eval_steps_per_second": 83.623,
39
  "step": 432
40
  },
41
  {
42
  "epoch": 3.47,
43
+ "grad_norm": 24.244155883789062,
44
+ "learning_rate": 1.052427584566897e-05,
45
+ "loss": 0.9809,
46
  "step": 500
47
  },
48
  {
49
  "epoch": 4.0,
50
+ "eval_loss": 0.6611331105232239,
51
+ "eval_pearson": 0.8416256812127808,
52
+ "eval_runtime": 1.1415,
53
+ "eval_samples_per_second": 1314.012,
54
+ "eval_spearmanr": 0.8371352595017586,
55
+ "eval_steps_per_second": 82.345,
56
  "step": 576
57
  }
58
  ],
 
61
  "num_input_tokens_seen": 0,
62
  "num_train_epochs": 5,
63
  "save_steps": 500,
64
+ "total_flos": 22488785838402.0,
65
  "train_batch_size": 4,
66
  "trial_name": null,
67
  "trial_params": {
68
+ "learning_rate": 3.444308458582572e-05,
69
  "num_train_epochs": 5,
70
  "per_device_train_batch_size": 4,
71
+ "seed": 38
72
  }
73
  }
run-3/checkpoint-576/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a89dbe21e169d083920168fe1c35f86c15d94a0c3b9ca2615dabe1e0930b053d
3
  size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4821c7433b1b1cc47986c64fa81c811ca3c1f9863a29a79f5e5c8b19c0b27c05
3
  size 4920
run-3/checkpoint-720/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa0d60c076141cf7c557f9e6e5ee5c4f86e889eb8bdf0a92abb93ba722721f86
3
  size 267829484
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5bc1b531032dd209d8612697b6d66430437909170e4f5e9db1464a69cb7170b
3
  size 267829484
run-3/checkpoint-720/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a301924ca24aae6f1fb2c1c6ee9db250640a15a1a2d472c44a832785935b9318
3
  size 535721146
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85bd33f89e970d5eed5b1700ce0879d341256f399b5e49a28441dea48ec6ef7d
3
  size 535721146
run-3/checkpoint-720/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8f67419749962139f0cc43ca20b49e9704d0edbf7d5f97a6d0cab6b36fcd9155
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c107ae0ea3f1f1b34f3b300cf1b0955ef489d70d5f2d9e4aa62d79e0bebdc5cf
3
  size 14244
run-3/checkpoint-720/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:423272412d97a885928e6da28e6e2e5c0c8b0cd8ad24d0ecdfd83d3aefd966c8
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e936e0aaf767fe7e9ef1cc507adb5e75f602c8f4ddb24d590baff19f29fe3662
3
  size 1064
run-3/checkpoint-720/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.8407800570738724,
3
- "best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-3/checkpoint-576",
4
  "epoch": 5.0,
5
  "eval_steps": 500,
6
  "global_step": 720,
@@ -10,59 +10,59 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_loss": 0.9698190689086914,
14
- "eval_pearson": 0.7678320454774757,
15
- "eval_runtime": 0.8035,
16
- "eval_samples_per_second": 1866.77,
17
- "eval_spearmanr": 0.712509933248186,
18
- "eval_steps_per_second": 116.984,
19
  "step": 144
20
  },
21
  {
22
  "epoch": 2.0,
23
- "eval_loss": 0.7606285810470581,
24
- "eval_pearson": 0.8252063306594624,
25
- "eval_runtime": 0.7878,
26
- "eval_samples_per_second": 1904.014,
27
- "eval_spearmanr": 0.8218529358151725,
28
- "eval_steps_per_second": 119.318,
29
  "step": 288
30
  },
31
  {
32
  "epoch": 3.0,
33
- "eval_loss": 0.7174907326698303,
34
- "eval_pearson": 0.8325197619129224,
35
- "eval_runtime": 0.7962,
36
- "eval_samples_per_second": 1883.934,
37
- "eval_spearmanr": 0.8279573830775626,
38
- "eval_steps_per_second": 118.06,
39
  "step": 432
40
  },
41
  {
42
  "epoch": 3.47,
43
- "grad_norm": 10.061558723449707,
44
- "learning_rate": 1.654094764408208e-05,
45
- "loss": 0.9316,
46
  "step": 500
47
  },
48
  {
49
  "epoch": 4.0,
50
- "eval_loss": 0.7175036072731018,
51
- "eval_pearson": 0.8407800570738724,
52
- "eval_runtime": 0.7992,
53
- "eval_samples_per_second": 1876.812,
54
- "eval_spearmanr": 0.8358993750524432,
55
- "eval_steps_per_second": 117.614,
56
  "step": 576
57
  },
58
  {
59
  "epoch": 5.0,
60
- "eval_loss": 0.67606520652771,
61
- "eval_pearson": 0.8389162758555168,
62
- "eval_runtime": 0.8221,
63
- "eval_samples_per_second": 1824.653,
64
- "eval_spearmanr": 0.8352930144614386,
65
- "eval_steps_per_second": 114.345,
66
  "step": 720
67
  }
68
  ],
@@ -71,13 +71,13 @@
71
  "num_input_tokens_seen": 0,
72
  "num_train_epochs": 5,
73
  "save_steps": 500,
74
- "total_flos": 22512846870384.0,
75
  "train_batch_size": 4,
76
  "trial_name": null,
77
  "trial_params": {
78
- "learning_rate": 5.4134010471541346e-05,
79
  "num_train_epochs": 5,
80
  "per_device_train_batch_size": 4,
81
- "seed": 4
82
  }
83
  }
 
1
  {
2
+ "best_metric": 0.8422757387186379,
3
+ "best_model_checkpoint": "distilbert-base-uncased-finetuned-stsb/run-3/checkpoint-720",
4
  "epoch": 5.0,
5
  "eval_steps": 500,
6
  "global_step": 720,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_loss": 1.0159441232681274,
14
+ "eval_pearson": 0.7754149460856693,
15
+ "eval_runtime": 0.9276,
16
+ "eval_samples_per_second": 1617.073,
17
+ "eval_spearmanr": 0.7770625188634587,
18
+ "eval_steps_per_second": 101.337,
19
  "step": 144
20
  },
21
  {
22
  "epoch": 2.0,
23
+ "eval_loss": 0.7027656435966492,
24
+ "eval_pearson": 0.8294526923897652,
25
+ "eval_runtime": 1.1203,
26
+ "eval_samples_per_second": 1338.921,
27
+ "eval_spearmanr": 0.8239919782128128,
28
+ "eval_steps_per_second": 83.906,
29
  "step": 288
30
  },
31
  {
32
  "epoch": 3.0,
33
+ "eval_loss": 0.6792955994606018,
34
+ "eval_pearson": 0.8371525320180763,
35
+ "eval_runtime": 1.1241,
36
+ "eval_samples_per_second": 1334.415,
37
+ "eval_spearmanr": 0.8358915789303897,
38
+ "eval_steps_per_second": 83.623,
39
  "step": 432
40
  },
41
  {
42
  "epoch": 3.47,
43
+ "grad_norm": 24.244155883789062,
44
+ "learning_rate": 1.052427584566897e-05,
45
+ "loss": 0.9809,
46
  "step": 500
47
  },
48
  {
49
  "epoch": 4.0,
50
+ "eval_loss": 0.6611331105232239,
51
+ "eval_pearson": 0.8416256812127808,
52
+ "eval_runtime": 1.1415,
53
+ "eval_samples_per_second": 1314.012,
54
+ "eval_spearmanr": 0.8371352595017586,
55
+ "eval_steps_per_second": 82.345,
56
  "step": 576
57
  },
58
  {
59
  "epoch": 5.0,
60
+ "eval_loss": 0.6691386699676514,
61
+ "eval_pearson": 0.8422757387186379,
62
+ "eval_runtime": 1.0993,
63
+ "eval_samples_per_second": 1364.543,
64
+ "eval_spearmanr": 0.8374301458257349,
65
+ "eval_steps_per_second": 85.511,
66
  "step": 720
67
  }
68
  ],
 
71
  "num_input_tokens_seen": 0,
72
  "num_train_epochs": 5,
73
  "save_steps": 500,
74
+ "total_flos": 22488785838402.0,
75
  "train_batch_size": 4,
76
  "trial_name": null,
77
  "trial_params": {
78
+ "learning_rate": 3.444308458582572e-05,
79
  "num_train_epochs": 5,
80
  "per_device_train_batch_size": 4,
81
+ "seed": 38
82
  }
83
  }
run-3/checkpoint-720/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a89dbe21e169d083920168fe1c35f86c15d94a0c3b9ca2615dabe1e0930b053d
3
  size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4821c7433b1b1cc47986c64fa81c811ca3c1f9863a29a79f5e5c8b19c0b27c05
3
  size 4920
runs/Mar07_00-50-37_758c92e55f7e/events.out.tfevents.1709773116.758c92e55f7e.2638.6 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17e4d54666ff128c9e9ff7873136a55131dae24f207cacad191725739f7f7b43
3
+ size 6426
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4821c7433b1b1cc47986c64fa81c811ca3c1f9863a29a79f5e5c8b19c0b27c05
3
  size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf3f62aadffc261f72a51d4b7c87acc76e6bc2705585514f61113a84b0e34fd3
3
  size 4920