robertou2 commited on
Commit
a28f9d9
·
verified ·
1 Parent(s): a187ab1

Upload folder using huggingface_hub

Browse files
adapter_config.json CHANGED
@@ -23,10 +23,10 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "qkv_proj",
27
- "down_proj",
28
  "o_proj",
29
- "gate_up_proj"
 
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
 
 
26
  "o_proj",
27
+ "qkv_proj",
28
+ "gate_up_proj",
29
+ "down_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7cdb74f4aea7eaaeb69447d8a3740d24dafa4689d00dc0bdb0a1f656b6b3a4e8
3
  size 100697728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1fa574614bd2c4f4b60446fc1e1704a6309cd03a1bee5f601b80009c317b85b
3
  size 100697728
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 1.1866666666666668,
3
- "total_flos": 4226848963651584.0,
4
- "train_loss": 0.6811937597062853,
5
- "train_runtime": 94.215,
6
- "train_samples_per_second": 1.911,
7
- "train_steps_per_second": 0.478
8
  }
 
1
  {
2
+ "epoch": 2.48,
3
+ "total_flos": 1.2752346185220096e+16,
4
+ "train_loss": 0.643518532647027,
5
+ "train_runtime": 231.7259,
6
+ "train_samples_per_second": 1.554,
7
+ "train_steps_per_second": 0.194
8
  }
checkpoint-45/adapter_config.json CHANGED
@@ -23,10 +23,10 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "qkv_proj",
27
- "down_proj",
28
  "o_proj",
29
- "gate_up_proj"
 
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
 
 
26
  "o_proj",
27
+ "qkv_proj",
28
+ "gate_up_proj",
29
+ "down_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
checkpoint-45/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7cdb74f4aea7eaaeb69447d8a3740d24dafa4689d00dc0bdb0a1f656b6b3a4e8
3
  size 100697728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1fa574614bd2c4f4b60446fc1e1704a6309cd03a1bee5f601b80009c317b85b
3
  size 100697728
checkpoint-45/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:022371f022aa42ec80b0298d42a53fbc14d67f7f603b1e6f6fdf8e98506745a0
3
  size 201541754
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97fdf0ab912d2be2e65c97549eee40b206332fd7cfd950d3671d97877d75a0f5
3
  size 201541754
checkpoint-45/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a6df1d6528255f497048d6c169fcb02b3e86a7eb126c4a9571080ca3a7b3e07b
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f77392d6a4313495d69e48b960a2ff23dda053345de283eaed32a84e6e1f6e2d
3
  size 14244
checkpoint-45/trainer_state.json CHANGED
@@ -2,7 +2,7 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 1.1866666666666668,
6
  "eval_steps": 500,
7
  "global_step": 45,
8
  "is_hyper_param_search": false,
@@ -10,73 +10,73 @@
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
- "epoch": 0.13333333333333333,
14
- "grad_norm": 0.9045277237892151,
15
  "learning_rate": 4.347826086956522e-05,
16
- "loss": 0.7604,
17
  "step": 5
18
  },
19
  {
20
- "epoch": 0.26666666666666666,
21
- "grad_norm": 0.3948459327220917,
22
  "learning_rate": 8.695652173913044e-05,
23
- "loss": 0.796,
24
  "step": 10
25
  },
26
  {
27
- "epoch": 0.4,
28
- "grad_norm": 0.3346000909805298,
29
  "learning_rate": 0.00013043478260869567,
30
- "loss": 0.8321,
31
  "step": 15
32
  },
33
  {
34
- "epoch": 0.5333333333333333,
35
- "grad_norm": 0.2612442672252655,
36
  "learning_rate": 0.00017391304347826088,
37
- "loss": 0.6935,
38
  "step": 20
39
  },
40
  {
41
- "epoch": 0.6666666666666666,
42
- "grad_norm": 0.6190615892410278,
43
  "learning_rate": 0.00019594929736144976,
44
- "loss": 0.6976,
45
  "step": 25
46
  },
47
  {
48
- "epoch": 0.8,
49
- "grad_norm": 0.2649993896484375,
50
  "learning_rate": 0.00015406408174555976,
51
- "loss": 0.575,
52
  "step": 30
53
  },
54
  {
55
- "epoch": 0.9333333333333333,
56
- "grad_norm": 0.3675606846809387,
57
  "learning_rate": 8.57685161726715e-05,
58
- "loss": 0.6436,
59
  "step": 35
60
  },
61
  {
62
- "epoch": 1.0533333333333332,
63
- "grad_norm": 0.25675633549690247,
64
  "learning_rate": 2.4425042564574184e-05,
65
- "loss": 0.5229,
66
  "step": 40
67
  },
68
  {
69
- "epoch": 1.1866666666666668,
70
- "grad_norm": 0.2978689670562744,
71
  "learning_rate": 0.0,
72
- "loss": 0.6096,
73
  "step": 45
74
  }
75
  ],
76
  "logging_steps": 5,
77
  "max_steps": 45,
78
  "num_input_tokens_seen": 0,
79
- "num_train_epochs": 2,
80
  "save_steps": 500,
81
  "stateful_callbacks": {
82
  "TrainerControl": {
@@ -90,8 +90,8 @@
90
  "attributes": {}
91
  }
92
  },
93
- "total_flos": 4226848963651584.0,
94
- "train_batch_size": 1,
95
  "trial_name": null,
96
  "trial_params": null
97
  }
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 2.48,
6
  "eval_steps": 500,
7
  "global_step": 45,
8
  "is_hyper_param_search": false,
 
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
+ "epoch": 0.26666666666666666,
14
+ "grad_norm": 0.44340959191322327,
15
  "learning_rate": 4.347826086956522e-05,
16
+ "loss": 0.7885,
17
  "step": 5
18
  },
19
  {
20
+ "epoch": 0.5333333333333333,
21
+ "grad_norm": 0.25733163952827454,
22
  "learning_rate": 8.695652173913044e-05,
23
+ "loss": 0.7907,
24
  "step": 10
25
  },
26
  {
27
+ "epoch": 0.8,
28
+ "grad_norm": 0.22555556893348694,
29
  "learning_rate": 0.00013043478260869567,
30
+ "loss": 0.6445,
31
  "step": 15
32
  },
33
  {
34
+ "epoch": 1.1066666666666667,
35
+ "grad_norm": 0.22658060491085052,
36
  "learning_rate": 0.00017391304347826088,
37
+ "loss": 0.7548,
38
  "step": 20
39
  },
40
  {
41
+ "epoch": 1.3733333333333333,
42
+ "grad_norm": 0.30281075835227966,
43
  "learning_rate": 0.00019594929736144976,
44
+ "loss": 0.5684,
45
  "step": 25
46
  },
47
  {
48
+ "epoch": 1.6400000000000001,
49
+ "grad_norm": 0.22937139868736267,
50
  "learning_rate": 0.00015406408174555976,
51
+ "loss": 0.609,
52
  "step": 30
53
  },
54
  {
55
+ "epoch": 1.9066666666666667,
56
+ "grad_norm": 0.24968542158603668,
57
  "learning_rate": 8.57685161726715e-05,
58
+ "loss": 0.5708,
59
  "step": 35
60
  },
61
  {
62
+ "epoch": 2.2133333333333334,
63
+ "grad_norm": 0.2418316900730133,
64
  "learning_rate": 2.4425042564574184e-05,
65
+ "loss": 0.5956,
66
  "step": 40
67
  },
68
  {
69
+ "epoch": 2.48,
70
+ "grad_norm": 0.21928495168685913,
71
  "learning_rate": 0.0,
72
+ "loss": 0.4693,
73
  "step": 45
74
  }
75
  ],
76
  "logging_steps": 5,
77
  "max_steps": 45,
78
  "num_input_tokens_seen": 0,
79
+ "num_train_epochs": 3,
80
  "save_steps": 500,
81
  "stateful_callbacks": {
82
  "TrainerControl": {
 
90
  "attributes": {}
91
  }
92
  },
93
+ "total_flos": 1.2752346185220096e+16,
94
+ "train_batch_size": 2,
95
  "trial_name": null,
96
  "trial_params": null
97
  }
checkpoint-45/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:11673240e67bcb0b48a964ced360b47161e72557a90972cad4239f7c7af6c5d9
3
  size 5624
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4ebfb1e9e4e099c0a01f2b27c892b7a8026c4e89d97a919ba06de02ef08b08c
3
  size 5624
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 1.1866666666666668,
3
- "total_flos": 4226848963651584.0,
4
- "train_loss": 0.6811937597062853,
5
- "train_runtime": 94.215,
6
- "train_samples_per_second": 1.911,
7
- "train_steps_per_second": 0.478
8
  }
 
1
  {
2
+ "epoch": 2.48,
3
+ "total_flos": 1.2752346185220096e+16,
4
+ "train_loss": 0.643518532647027,
5
+ "train_runtime": 231.7259,
6
+ "train_samples_per_second": 1.554,
7
+ "train_steps_per_second": 0.194
8
  }
trainer_state.json CHANGED
@@ -2,7 +2,7 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 1.1866666666666668,
6
  "eval_steps": 500,
7
  "global_step": 45,
8
  "is_hyper_param_search": false,
@@ -10,82 +10,82 @@
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
- "epoch": 0.13333333333333333,
14
- "grad_norm": 0.9045277237892151,
15
  "learning_rate": 4.347826086956522e-05,
16
- "loss": 0.7604,
17
  "step": 5
18
  },
19
  {
20
- "epoch": 0.26666666666666666,
21
- "grad_norm": 0.3948459327220917,
22
  "learning_rate": 8.695652173913044e-05,
23
- "loss": 0.796,
24
  "step": 10
25
  },
26
  {
27
- "epoch": 0.4,
28
- "grad_norm": 0.3346000909805298,
29
  "learning_rate": 0.00013043478260869567,
30
- "loss": 0.8321,
31
  "step": 15
32
  },
33
  {
34
- "epoch": 0.5333333333333333,
35
- "grad_norm": 0.2612442672252655,
36
  "learning_rate": 0.00017391304347826088,
37
- "loss": 0.6935,
38
  "step": 20
39
  },
40
  {
41
- "epoch": 0.6666666666666666,
42
- "grad_norm": 0.6190615892410278,
43
  "learning_rate": 0.00019594929736144976,
44
- "loss": 0.6976,
45
  "step": 25
46
  },
47
  {
48
- "epoch": 0.8,
49
- "grad_norm": 0.2649993896484375,
50
  "learning_rate": 0.00015406408174555976,
51
- "loss": 0.575,
52
  "step": 30
53
  },
54
  {
55
- "epoch": 0.9333333333333333,
56
- "grad_norm": 0.3675606846809387,
57
  "learning_rate": 8.57685161726715e-05,
58
- "loss": 0.6436,
59
  "step": 35
60
  },
61
  {
62
- "epoch": 1.0533333333333332,
63
- "grad_norm": 0.25675633549690247,
64
  "learning_rate": 2.4425042564574184e-05,
65
- "loss": 0.5229,
66
  "step": 40
67
  },
68
  {
69
- "epoch": 1.1866666666666668,
70
- "grad_norm": 0.2978689670562744,
71
  "learning_rate": 0.0,
72
- "loss": 0.6096,
73
  "step": 45
74
  },
75
  {
76
- "epoch": 1.1866666666666668,
77
  "step": 45,
78
- "total_flos": 4226848963651584.0,
79
- "train_loss": 0.6811937597062853,
80
- "train_runtime": 94.215,
81
- "train_samples_per_second": 1.911,
82
- "train_steps_per_second": 0.478
83
  }
84
  ],
85
  "logging_steps": 5,
86
  "max_steps": 45,
87
  "num_input_tokens_seen": 0,
88
- "num_train_epochs": 2,
89
  "save_steps": 500,
90
  "stateful_callbacks": {
91
  "TrainerControl": {
@@ -99,8 +99,8 @@
99
  "attributes": {}
100
  }
101
  },
102
- "total_flos": 4226848963651584.0,
103
- "train_batch_size": 1,
104
  "trial_name": null,
105
  "trial_params": null
106
  }
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 2.48,
6
  "eval_steps": 500,
7
  "global_step": 45,
8
  "is_hyper_param_search": false,
 
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
+ "epoch": 0.26666666666666666,
14
+ "grad_norm": 0.44340959191322327,
15
  "learning_rate": 4.347826086956522e-05,
16
+ "loss": 0.7885,
17
  "step": 5
18
  },
19
  {
20
+ "epoch": 0.5333333333333333,
21
+ "grad_norm": 0.25733163952827454,
22
  "learning_rate": 8.695652173913044e-05,
23
+ "loss": 0.7907,
24
  "step": 10
25
  },
26
  {
27
+ "epoch": 0.8,
28
+ "grad_norm": 0.22555556893348694,
29
  "learning_rate": 0.00013043478260869567,
30
+ "loss": 0.6445,
31
  "step": 15
32
  },
33
  {
34
+ "epoch": 1.1066666666666667,
35
+ "grad_norm": 0.22658060491085052,
36
  "learning_rate": 0.00017391304347826088,
37
+ "loss": 0.7548,
38
  "step": 20
39
  },
40
  {
41
+ "epoch": 1.3733333333333333,
42
+ "grad_norm": 0.30281075835227966,
43
  "learning_rate": 0.00019594929736144976,
44
+ "loss": 0.5684,
45
  "step": 25
46
  },
47
  {
48
+ "epoch": 1.6400000000000001,
49
+ "grad_norm": 0.22937139868736267,
50
  "learning_rate": 0.00015406408174555976,
51
+ "loss": 0.609,
52
  "step": 30
53
  },
54
  {
55
+ "epoch": 1.9066666666666667,
56
+ "grad_norm": 0.24968542158603668,
57
  "learning_rate": 8.57685161726715e-05,
58
+ "loss": 0.5708,
59
  "step": 35
60
  },
61
  {
62
+ "epoch": 2.2133333333333334,
63
+ "grad_norm": 0.2418316900730133,
64
  "learning_rate": 2.4425042564574184e-05,
65
+ "loss": 0.5956,
66
  "step": 40
67
  },
68
  {
69
+ "epoch": 2.48,
70
+ "grad_norm": 0.21928495168685913,
71
  "learning_rate": 0.0,
72
+ "loss": 0.4693,
73
  "step": 45
74
  },
75
  {
76
+ "epoch": 2.48,
77
  "step": 45,
78
+ "total_flos": 1.2752346185220096e+16,
79
+ "train_loss": 0.643518532647027,
80
+ "train_runtime": 231.7259,
81
+ "train_samples_per_second": 1.554,
82
+ "train_steps_per_second": 0.194
83
  }
84
  ],
85
  "logging_steps": 5,
86
  "max_steps": 45,
87
  "num_input_tokens_seen": 0,
88
+ "num_train_epochs": 3,
89
  "save_steps": 500,
90
  "stateful_callbacks": {
91
  "TrainerControl": {
 
99
  "attributes": {}
100
  }
101
  },
102
+ "total_flos": 1.2752346185220096e+16,
103
+ "train_batch_size": 2,
104
  "trial_name": null,
105
  "trial_params": null
106
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:11673240e67bcb0b48a964ced360b47161e72557a90972cad4239f7c7af6c5d9
3
  size 5624
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4ebfb1e9e4e099c0a01f2b27c892b7a8026c4e89d97a919ba06de02ef08b08c
3
  size 5624