diagonalge commited on
Commit
e334eae
·
verified ·
1 Parent(s): 5975792

Upload task output test1334test1234test1234test12334

Browse files
README.md CHANGED
@@ -89,7 +89,7 @@ xformers_attention: null
89
 
90
  This model was trained from scratch on the None dataset.
91
  It achieves the following results on the evaluation set:
92
- - Loss: 0.9542
93
 
94
  ## Model description
95
 
@@ -123,10 +123,10 @@ The following hyperparameters were used during training:
123
 
124
  | Training Loss | Epoch | Step | Validation Loss |
125
  |:-------------:|:------:|:----:|:---------------:|
126
- | No log | 0 | 0 | 0.9532 |
127
- | 1.0261 | 0.0372 | 3 | 0.9536 |
128
- | 1.1582 | 0.0743 | 6 | 0.9554 |
129
- | 0.8051 | 0.1115 | 9 | 0.9542 |
130
 
131
 
132
  ### Framework versions
 
89
 
90
  This model was trained from scratch on the None dataset.
91
  It achieves the following results on the evaluation set:
92
+ - Loss: 0.9023
93
 
94
  ## Model description
95
 
 
123
 
124
  | Training Loss | Epoch | Step | Validation Loss |
125
  |:-------------:|:------:|:----:|:---------------:|
126
+ | No log | 0 | 0 | 0.9052 |
127
+ | 1.0164 | 0.0372 | 3 | 0.9057 |
128
+ | 1.0848 | 0.0743 | 6 | 0.9046 |
129
+ | 1.0387 | 0.1115 | 9 | 0.9023 |
130
 
131
 
132
  ### Framework versions
adapter_config.json CHANGED
@@ -24,12 +24,12 @@
24
  "rank_pattern": {},
25
  "revision": null,
26
  "target_modules": [
 
27
  "v_proj",
28
- "up_proj",
29
  "o_proj",
30
  "k_proj",
 
31
  "gate_proj",
32
- "q_proj",
33
  "down_proj"
34
  ],
35
  "task_type": "CAUSAL_LM",
 
24
  "rank_pattern": {},
25
  "revision": null,
26
  "target_modules": [
27
+ "q_proj",
28
  "v_proj",
 
29
  "o_proj",
30
  "k_proj",
31
+ "up_proj",
32
  "gate_proj",
 
33
  "down_proj"
34
  ],
35
  "task_type": "CAUSAL_LM",
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a41ff268bfabcd272e7b2b09489b30d7731c43c8d4c889e62992e997f1ebc46a
3
  size 22573704
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc9aeac2acbc2455a5f0737412114cef01add69564a125108760ac8956c75f9f
3
  size 22573704
checkpoint-10/adapter_config.json CHANGED
@@ -24,12 +24,12 @@
24
  "rank_pattern": {},
25
  "revision": null,
26
  "target_modules": [
 
27
  "v_proj",
28
- "up_proj",
29
  "o_proj",
30
  "k_proj",
 
31
  "gate_proj",
32
- "q_proj",
33
  "down_proj"
34
  ],
35
  "task_type": "CAUSAL_LM",
 
24
  "rank_pattern": {},
25
  "revision": null,
26
  "target_modules": [
27
+ "q_proj",
28
  "v_proj",
 
29
  "o_proj",
30
  "k_proj",
31
+ "up_proj",
32
  "gate_proj",
 
33
  "down_proj"
34
  ],
35
  "task_type": "CAUSAL_LM",
checkpoint-10/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a41ff268bfabcd272e7b2b09489b30d7731c43c8d4c889e62992e997f1ebc46a
3
  size 22573704
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc9aeac2acbc2455a5f0737412114cef01add69564a125108760ac8956c75f9f
3
  size 22573704
checkpoint-10/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2387619257e6c44171f5d79e5e1fb94be749b1c3cc40ab21cff0286713db3366
3
  size 11710970
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29d04618f87bc05af7cacfdc3c30d2e55e5447a6fb93da26fd5fbe9ba4cbe024
3
  size 11710970
checkpoint-10/trainer_state.json CHANGED
@@ -11,104 +11,104 @@
11
  "log_history": [
12
  {
13
  "epoch": 0,
14
- "eval_loss": 0.953231930732727,
15
- "eval_runtime": 8.7002,
16
- "eval_samples_per_second": 3.908,
17
- "eval_steps_per_second": 1.954,
18
  "step": 0
19
  },
20
  {
21
  "epoch": 0.01238390092879257,
22
- "grad_norm": 0.6875126957893372,
23
  "learning_rate": 0.0,
24
- "loss": 1.0347,
25
  "step": 1
26
  },
27
  {
28
  "epoch": 0.02476780185758514,
29
- "grad_norm": 0.5213010907173157,
30
  "learning_rate": 2e-05,
31
- "loss": 0.9209,
32
  "step": 2
33
  },
34
  {
35
  "epoch": 0.03715170278637771,
36
- "grad_norm": 0.5898625254631042,
37
  "learning_rate": 4e-05,
38
- "loss": 1.0261,
39
  "step": 3
40
  },
41
  {
42
  "epoch": 0.03715170278637771,
43
- "eval_loss": 0.9535599946975708,
44
- "eval_runtime": 8.4038,
45
- "eval_samples_per_second": 4.046,
46
- "eval_steps_per_second": 2.023,
47
  "step": 3
48
  },
49
  {
50
  "epoch": 0.04953560371517028,
51
- "grad_norm": 0.768417239189148,
52
  "learning_rate": 6e-05,
53
- "loss": 1.04,
54
  "step": 4
55
  },
56
  {
57
  "epoch": 0.06191950464396285,
58
- "grad_norm": 0.6189444661140442,
59
  "learning_rate": 8e-05,
60
- "loss": 1.0581,
61
  "step": 5
62
  },
63
  {
64
  "epoch": 0.07430340557275542,
65
- "grad_norm": 0.7873916029930115,
66
  "learning_rate": 0.0001,
67
- "loss": 1.1582,
68
  "step": 6
69
  },
70
  {
71
  "epoch": 0.07430340557275542,
72
- "eval_loss": 0.9553690552711487,
73
- "eval_runtime": 8.2665,
74
- "eval_samples_per_second": 4.113,
75
- "eval_steps_per_second": 2.057,
76
  "step": 6
77
  },
78
  {
79
  "epoch": 0.08668730650154799,
80
- "grad_norm": 0.4747847020626068,
81
  "learning_rate": 0.00012,
82
- "loss": 1.1452,
83
  "step": 7
84
  },
85
  {
86
  "epoch": 0.09907120743034056,
87
- "grad_norm": 0.9517867565155029,
88
  "learning_rate": 0.00014,
89
- "loss": 0.9401,
90
  "step": 8
91
  },
92
  {
93
  "epoch": 0.11145510835913312,
94
- "grad_norm": 0.5377346277236938,
95
  "learning_rate": 0.00016,
96
- "loss": 0.8051,
97
  "step": 9
98
  },
99
  {
100
  "epoch": 0.11145510835913312,
101
- "eval_loss": 0.9541666507720947,
102
- "eval_runtime": 8.196,
103
- "eval_samples_per_second": 4.148,
104
- "eval_steps_per_second": 2.074,
105
  "step": 9
106
  },
107
  {
108
  "epoch": 0.1238390092879257,
109
- "grad_norm": 0.5587700009346008,
110
  "learning_rate": 0.00018,
111
- "loss": 0.836,
112
  "step": 10
113
  }
114
  ],
 
11
  "log_history": [
12
  {
13
  "epoch": 0,
14
+ "eval_loss": 0.9052417278289795,
15
+ "eval_runtime": 8.4448,
16
+ "eval_samples_per_second": 4.026,
17
+ "eval_steps_per_second": 2.013,
18
  "step": 0
19
  },
20
  {
21
  "epoch": 0.01238390092879257,
22
+ "grad_norm": 0.8082026243209839,
23
  "learning_rate": 0.0,
24
+ "loss": 1.1985,
25
  "step": 1
26
  },
27
  {
28
  "epoch": 0.02476780185758514,
29
+ "grad_norm": 0.50789475440979,
30
  "learning_rate": 2e-05,
31
+ "loss": 0.6888,
32
  "step": 2
33
  },
34
  {
35
  "epoch": 0.03715170278637771,
36
+ "grad_norm": 0.8321412205696106,
37
  "learning_rate": 4e-05,
38
+ "loss": 1.0164,
39
  "step": 3
40
  },
41
  {
42
  "epoch": 0.03715170278637771,
43
+ "eval_loss": 0.9056991338729858,
44
+ "eval_runtime": 7.8859,
45
+ "eval_samples_per_second": 4.312,
46
+ "eval_steps_per_second": 2.156,
47
  "step": 3
48
  },
49
  {
50
  "epoch": 0.04953560371517028,
51
+ "grad_norm": 0.4088127613067627,
52
  "learning_rate": 6e-05,
53
+ "loss": 0.7558,
54
  "step": 4
55
  },
56
  {
57
  "epoch": 0.06191950464396285,
58
+ "grad_norm": 0.7107352614402771,
59
  "learning_rate": 8e-05,
60
+ "loss": 1.2576,
61
  "step": 5
62
  },
63
  {
64
  "epoch": 0.07430340557275542,
65
+ "grad_norm": 0.5187698006629944,
66
  "learning_rate": 0.0001,
67
+ "loss": 1.0848,
68
  "step": 6
69
  },
70
  {
71
  "epoch": 0.07430340557275542,
72
+ "eval_loss": 0.9046434164047241,
73
+ "eval_runtime": 7.844,
74
+ "eval_samples_per_second": 4.335,
75
+ "eval_steps_per_second": 2.167,
76
  "step": 6
77
  },
78
  {
79
  "epoch": 0.08668730650154799,
80
+ "grad_norm": 0.5227215886116028,
81
  "learning_rate": 0.00012,
82
+ "loss": 0.7471,
83
  "step": 7
84
  },
85
  {
86
  "epoch": 0.09907120743034056,
87
+ "grad_norm": 0.49186187982559204,
88
  "learning_rate": 0.00014,
89
+ "loss": 0.9016,
90
  "step": 8
91
  },
92
  {
93
  "epoch": 0.11145510835913312,
94
+ "grad_norm": 0.49750179052352905,
95
  "learning_rate": 0.00016,
96
+ "loss": 1.0387,
97
  "step": 9
98
  },
99
  {
100
  "epoch": 0.11145510835913312,
101
+ "eval_loss": 0.9022544622421265,
102
+ "eval_runtime": 7.9872,
103
+ "eval_samples_per_second": 4.257,
104
+ "eval_steps_per_second": 2.128,
105
  "step": 9
106
  },
107
  {
108
  "epoch": 0.1238390092879257,
109
+ "grad_norm": 0.5828521847724915,
110
  "learning_rate": 0.00018,
111
+ "loss": 0.7156,
112
  "step": 10
113
  }
114
  ],
checkpoint-10/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:67fb4271d258100396c327a4063ee257b8654ab40cdb395b6c5cdb2b70ee6064
3
  size 7096
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06cd807872f02309d108f298220e5b415c77e8a2df23b68dd2963de4c9f9fc75
3
  size 7096
checkpoint-3/adapter_config.json CHANGED
@@ -24,12 +24,12 @@
24
  "rank_pattern": {},
25
  "revision": null,
26
  "target_modules": [
 
27
  "v_proj",
28
- "up_proj",
29
  "o_proj",
30
  "k_proj",
 
31
  "gate_proj",
32
- "q_proj",
33
  "down_proj"
34
  ],
35
  "task_type": "CAUSAL_LM",
 
24
  "rank_pattern": {},
25
  "revision": null,
26
  "target_modules": [
27
+ "q_proj",
28
  "v_proj",
 
29
  "o_proj",
30
  "k_proj",
31
+ "up_proj",
32
  "gate_proj",
 
33
  "down_proj"
34
  ],
35
  "task_type": "CAUSAL_LM",
checkpoint-3/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:67b269d4ff4ec8bb5052887d3c92c549ca53cf39696e3f71c7d89eff81f935d4
3
  size 22573704
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35d8272665362212d4b28fc489b5cf0ed5f32f9fc76c4b8267c6adf537a997bc
3
  size 22573704
checkpoint-3/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48d5b596c485dd963a3666957e0050bcd6fc3658a552aaf30e3e93fe6e21726f
3
  size 11710970
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd4f4752f73d772ee28292cbe0567ebb4e38aea7bb804565abca91938b84fa9b
3
  size 11710970
checkpoint-3/trainer_state.json CHANGED
@@ -11,39 +11,39 @@
11
  "log_history": [
12
  {
13
  "epoch": 0,
14
- "eval_loss": 0.953231930732727,
15
- "eval_runtime": 8.7002,
16
- "eval_samples_per_second": 3.908,
17
- "eval_steps_per_second": 1.954,
18
  "step": 0
19
  },
20
  {
21
  "epoch": 0.01238390092879257,
22
- "grad_norm": 0.6875126957893372,
23
  "learning_rate": 0.0,
24
- "loss": 1.0347,
25
  "step": 1
26
  },
27
  {
28
  "epoch": 0.02476780185758514,
29
- "grad_norm": 0.5213010907173157,
30
  "learning_rate": 2e-05,
31
- "loss": 0.9209,
32
  "step": 2
33
  },
34
  {
35
  "epoch": 0.03715170278637771,
36
- "grad_norm": 0.5898625254631042,
37
  "learning_rate": 4e-05,
38
- "loss": 1.0261,
39
  "step": 3
40
  },
41
  {
42
  "epoch": 0.03715170278637771,
43
- "eval_loss": 0.9535599946975708,
44
- "eval_runtime": 8.4038,
45
- "eval_samples_per_second": 4.046,
46
- "eval_steps_per_second": 2.023,
47
  "step": 3
48
  }
49
  ],
 
11
  "log_history": [
12
  {
13
  "epoch": 0,
14
+ "eval_loss": 0.9052417278289795,
15
+ "eval_runtime": 8.4448,
16
+ "eval_samples_per_second": 4.026,
17
+ "eval_steps_per_second": 2.013,
18
  "step": 0
19
  },
20
  {
21
  "epoch": 0.01238390092879257,
22
+ "grad_norm": 0.8082026243209839,
23
  "learning_rate": 0.0,
24
+ "loss": 1.1985,
25
  "step": 1
26
  },
27
  {
28
  "epoch": 0.02476780185758514,
29
+ "grad_norm": 0.50789475440979,
30
  "learning_rate": 2e-05,
31
+ "loss": 0.6888,
32
  "step": 2
33
  },
34
  {
35
  "epoch": 0.03715170278637771,
36
+ "grad_norm": 0.8321412205696106,
37
  "learning_rate": 4e-05,
38
+ "loss": 1.0164,
39
  "step": 3
40
  },
41
  {
42
  "epoch": 0.03715170278637771,
43
+ "eval_loss": 0.9056991338729858,
44
+ "eval_runtime": 7.8859,
45
+ "eval_samples_per_second": 4.312,
46
+ "eval_steps_per_second": 2.156,
47
  "step": 3
48
  }
49
  ],
checkpoint-3/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:67fb4271d258100396c327a4063ee257b8654ab40cdb395b6c5cdb2b70ee6064
3
  size 7096
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06cd807872f02309d108f298220e5b415c77e8a2df23b68dd2963de4c9f9fc75
3
  size 7096
checkpoint-6/adapter_config.json CHANGED
@@ -24,12 +24,12 @@
24
  "rank_pattern": {},
25
  "revision": null,
26
  "target_modules": [
 
27
  "v_proj",
28
- "up_proj",
29
  "o_proj",
30
  "k_proj",
 
31
  "gate_proj",
32
- "q_proj",
33
  "down_proj"
34
  ],
35
  "task_type": "CAUSAL_LM",
 
24
  "rank_pattern": {},
25
  "revision": null,
26
  "target_modules": [
27
+ "q_proj",
28
  "v_proj",
 
29
  "o_proj",
30
  "k_proj",
31
+ "up_proj",
32
  "gate_proj",
 
33
  "down_proj"
34
  ],
35
  "task_type": "CAUSAL_LM",
checkpoint-6/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:28acb07656c3961e8bc71f1c1add37bab283f13d0a88820e8f688fb2a434fd99
3
  size 22573704
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8faa56675bb544b05ac158400b770d86bf54b6315f310aabd03d9db89324bf6
3
  size 22573704
checkpoint-6/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6cfc66875fe7c2869668adeb3374d402ee937deefd6a2c0f94b0e50a3269dcf7
3
  size 11710970
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:774eb7a9d6b31c579dd88dff4182225133d63d1784d5204ed1ec4fbb55442693
3
  size 11710970
checkpoint-6/trainer_state.json CHANGED
@@ -11,68 +11,68 @@
11
  "log_history": [
12
  {
13
  "epoch": 0,
14
- "eval_loss": 0.953231930732727,
15
- "eval_runtime": 8.7002,
16
- "eval_samples_per_second": 3.908,
17
- "eval_steps_per_second": 1.954,
18
  "step": 0
19
  },
20
  {
21
  "epoch": 0.01238390092879257,
22
- "grad_norm": 0.6875126957893372,
23
  "learning_rate": 0.0,
24
- "loss": 1.0347,
25
  "step": 1
26
  },
27
  {
28
  "epoch": 0.02476780185758514,
29
- "grad_norm": 0.5213010907173157,
30
  "learning_rate": 2e-05,
31
- "loss": 0.9209,
32
  "step": 2
33
  },
34
  {
35
  "epoch": 0.03715170278637771,
36
- "grad_norm": 0.5898625254631042,
37
  "learning_rate": 4e-05,
38
- "loss": 1.0261,
39
  "step": 3
40
  },
41
  {
42
  "epoch": 0.03715170278637771,
43
- "eval_loss": 0.9535599946975708,
44
- "eval_runtime": 8.4038,
45
- "eval_samples_per_second": 4.046,
46
- "eval_steps_per_second": 2.023,
47
  "step": 3
48
  },
49
  {
50
  "epoch": 0.04953560371517028,
51
- "grad_norm": 0.768417239189148,
52
  "learning_rate": 6e-05,
53
- "loss": 1.04,
54
  "step": 4
55
  },
56
  {
57
  "epoch": 0.06191950464396285,
58
- "grad_norm": 0.6189444661140442,
59
  "learning_rate": 8e-05,
60
- "loss": 1.0581,
61
  "step": 5
62
  },
63
  {
64
  "epoch": 0.07430340557275542,
65
- "grad_norm": 0.7873916029930115,
66
  "learning_rate": 0.0001,
67
- "loss": 1.1582,
68
  "step": 6
69
  },
70
  {
71
  "epoch": 0.07430340557275542,
72
- "eval_loss": 0.9553690552711487,
73
- "eval_runtime": 8.2665,
74
- "eval_samples_per_second": 4.113,
75
- "eval_steps_per_second": 2.057,
76
  "step": 6
77
  }
78
  ],
 
11
  "log_history": [
12
  {
13
  "epoch": 0,
14
+ "eval_loss": 0.9052417278289795,
15
+ "eval_runtime": 8.4448,
16
+ "eval_samples_per_second": 4.026,
17
+ "eval_steps_per_second": 2.013,
18
  "step": 0
19
  },
20
  {
21
  "epoch": 0.01238390092879257,
22
+ "grad_norm": 0.8082026243209839,
23
  "learning_rate": 0.0,
24
+ "loss": 1.1985,
25
  "step": 1
26
  },
27
  {
28
  "epoch": 0.02476780185758514,
29
+ "grad_norm": 0.50789475440979,
30
  "learning_rate": 2e-05,
31
+ "loss": 0.6888,
32
  "step": 2
33
  },
34
  {
35
  "epoch": 0.03715170278637771,
36
+ "grad_norm": 0.8321412205696106,
37
  "learning_rate": 4e-05,
38
+ "loss": 1.0164,
39
  "step": 3
40
  },
41
  {
42
  "epoch": 0.03715170278637771,
43
+ "eval_loss": 0.9056991338729858,
44
+ "eval_runtime": 7.8859,
45
+ "eval_samples_per_second": 4.312,
46
+ "eval_steps_per_second": 2.156,
47
  "step": 3
48
  },
49
  {
50
  "epoch": 0.04953560371517028,
51
+ "grad_norm": 0.4088127613067627,
52
  "learning_rate": 6e-05,
53
+ "loss": 0.7558,
54
  "step": 4
55
  },
56
  {
57
  "epoch": 0.06191950464396285,
58
+ "grad_norm": 0.7107352614402771,
59
  "learning_rate": 8e-05,
60
+ "loss": 1.2576,
61
  "step": 5
62
  },
63
  {
64
  "epoch": 0.07430340557275542,
65
+ "grad_norm": 0.5187698006629944,
66
  "learning_rate": 0.0001,
67
+ "loss": 1.0848,
68
  "step": 6
69
  },
70
  {
71
  "epoch": 0.07430340557275542,
72
+ "eval_loss": 0.9046434164047241,
73
+ "eval_runtime": 7.844,
74
+ "eval_samples_per_second": 4.335,
75
+ "eval_steps_per_second": 2.167,
76
  "step": 6
77
  }
78
  ],
checkpoint-6/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:67fb4271d258100396c327a4063ee257b8654ab40cdb395b6c5cdb2b70ee6064
3
  size 7096
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06cd807872f02309d108f298220e5b415c77e8a2df23b68dd2963de4c9f9fc75
3
  size 7096
checkpoint-9/adapter_config.json CHANGED
@@ -24,12 +24,12 @@
24
  "rank_pattern": {},
25
  "revision": null,
26
  "target_modules": [
 
27
  "v_proj",
28
- "up_proj",
29
  "o_proj",
30
  "k_proj",
 
31
  "gate_proj",
32
- "q_proj",
33
  "down_proj"
34
  ],
35
  "task_type": "CAUSAL_LM",
 
24
  "rank_pattern": {},
25
  "revision": null,
26
  "target_modules": [
27
+ "q_proj",
28
  "v_proj",
 
29
  "o_proj",
30
  "k_proj",
31
+ "up_proj",
32
  "gate_proj",
 
33
  "down_proj"
34
  ],
35
  "task_type": "CAUSAL_LM",
checkpoint-9/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b727383908207749f5903ec133cd3114df21260c27cab96d06c11a964e16f164
3
  size 22573704
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebe307591989cc56631abe1b4d7b7886913e144a0b680c007c393d686ba70aa3
3
  size 22573704
checkpoint-9/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d76e76ee2b72377a9705db59536407edc7e027fbefaf895d3acfb14a50d5091
3
  size 11710970
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7bc8bf2c0ec7339f2108836b5fcfeaa22463b73211d52242fed9238d402427c
3
  size 11710970
checkpoint-9/trainer_state.json CHANGED
@@ -11,97 +11,97 @@
11
  "log_history": [
12
  {
13
  "epoch": 0,
14
- "eval_loss": 0.953231930732727,
15
- "eval_runtime": 8.7002,
16
- "eval_samples_per_second": 3.908,
17
- "eval_steps_per_second": 1.954,
18
  "step": 0
19
  },
20
  {
21
  "epoch": 0.01238390092879257,
22
- "grad_norm": 0.6875126957893372,
23
  "learning_rate": 0.0,
24
- "loss": 1.0347,
25
  "step": 1
26
  },
27
  {
28
  "epoch": 0.02476780185758514,
29
- "grad_norm": 0.5213010907173157,
30
  "learning_rate": 2e-05,
31
- "loss": 0.9209,
32
  "step": 2
33
  },
34
  {
35
  "epoch": 0.03715170278637771,
36
- "grad_norm": 0.5898625254631042,
37
  "learning_rate": 4e-05,
38
- "loss": 1.0261,
39
  "step": 3
40
  },
41
  {
42
  "epoch": 0.03715170278637771,
43
- "eval_loss": 0.9535599946975708,
44
- "eval_runtime": 8.4038,
45
- "eval_samples_per_second": 4.046,
46
- "eval_steps_per_second": 2.023,
47
  "step": 3
48
  },
49
  {
50
  "epoch": 0.04953560371517028,
51
- "grad_norm": 0.768417239189148,
52
  "learning_rate": 6e-05,
53
- "loss": 1.04,
54
  "step": 4
55
  },
56
  {
57
  "epoch": 0.06191950464396285,
58
- "grad_norm": 0.6189444661140442,
59
  "learning_rate": 8e-05,
60
- "loss": 1.0581,
61
  "step": 5
62
  },
63
  {
64
  "epoch": 0.07430340557275542,
65
- "grad_norm": 0.7873916029930115,
66
  "learning_rate": 0.0001,
67
- "loss": 1.1582,
68
  "step": 6
69
  },
70
  {
71
  "epoch": 0.07430340557275542,
72
- "eval_loss": 0.9553690552711487,
73
- "eval_runtime": 8.2665,
74
- "eval_samples_per_second": 4.113,
75
- "eval_steps_per_second": 2.057,
76
  "step": 6
77
  },
78
  {
79
  "epoch": 0.08668730650154799,
80
- "grad_norm": 0.4747847020626068,
81
  "learning_rate": 0.00012,
82
- "loss": 1.1452,
83
  "step": 7
84
  },
85
  {
86
  "epoch": 0.09907120743034056,
87
- "grad_norm": 0.9517867565155029,
88
  "learning_rate": 0.00014,
89
- "loss": 0.9401,
90
  "step": 8
91
  },
92
  {
93
  "epoch": 0.11145510835913312,
94
- "grad_norm": 0.5377346277236938,
95
  "learning_rate": 0.00016,
96
- "loss": 0.8051,
97
  "step": 9
98
  },
99
  {
100
  "epoch": 0.11145510835913312,
101
- "eval_loss": 0.9541666507720947,
102
- "eval_runtime": 8.196,
103
- "eval_samples_per_second": 4.148,
104
- "eval_steps_per_second": 2.074,
105
  "step": 9
106
  }
107
  ],
 
11
  "log_history": [
12
  {
13
  "epoch": 0,
14
+ "eval_loss": 0.9052417278289795,
15
+ "eval_runtime": 8.4448,
16
+ "eval_samples_per_second": 4.026,
17
+ "eval_steps_per_second": 2.013,
18
  "step": 0
19
  },
20
  {
21
  "epoch": 0.01238390092879257,
22
+ "grad_norm": 0.8082026243209839,
23
  "learning_rate": 0.0,
24
+ "loss": 1.1985,
25
  "step": 1
26
  },
27
  {
28
  "epoch": 0.02476780185758514,
29
+ "grad_norm": 0.50789475440979,
30
  "learning_rate": 2e-05,
31
+ "loss": 0.6888,
32
  "step": 2
33
  },
34
  {
35
  "epoch": 0.03715170278637771,
36
+ "grad_norm": 0.8321412205696106,
37
  "learning_rate": 4e-05,
38
+ "loss": 1.0164,
39
  "step": 3
40
  },
41
  {
42
  "epoch": 0.03715170278637771,
43
+ "eval_loss": 0.9056991338729858,
44
+ "eval_runtime": 7.8859,
45
+ "eval_samples_per_second": 4.312,
46
+ "eval_steps_per_second": 2.156,
47
  "step": 3
48
  },
49
  {
50
  "epoch": 0.04953560371517028,
51
+ "grad_norm": 0.4088127613067627,
52
  "learning_rate": 6e-05,
53
+ "loss": 0.7558,
54
  "step": 4
55
  },
56
  {
57
  "epoch": 0.06191950464396285,
58
+ "grad_norm": 0.7107352614402771,
59
  "learning_rate": 8e-05,
60
+ "loss": 1.2576,
61
  "step": 5
62
  },
63
  {
64
  "epoch": 0.07430340557275542,
65
+ "grad_norm": 0.5187698006629944,
66
  "learning_rate": 0.0001,
67
+ "loss": 1.0848,
68
  "step": 6
69
  },
70
  {
71
  "epoch": 0.07430340557275542,
72
+ "eval_loss": 0.9046434164047241,
73
+ "eval_runtime": 7.844,
74
+ "eval_samples_per_second": 4.335,
75
+ "eval_steps_per_second": 2.167,
76
  "step": 6
77
  },
78
  {
79
  "epoch": 0.08668730650154799,
80
+ "grad_norm": 0.5227215886116028,
81
  "learning_rate": 0.00012,
82
+ "loss": 0.7471,
83
  "step": 7
84
  },
85
  {
86
  "epoch": 0.09907120743034056,
87
+ "grad_norm": 0.49186187982559204,
88
  "learning_rate": 0.00014,
89
+ "loss": 0.9016,
90
  "step": 8
91
  },
92
  {
93
  "epoch": 0.11145510835913312,
94
+ "grad_norm": 0.49750179052352905,
95
  "learning_rate": 0.00016,
96
+ "loss": 1.0387,
97
  "step": 9
98
  },
99
  {
100
  "epoch": 0.11145510835913312,
101
+ "eval_loss": 0.9022544622421265,
102
+ "eval_runtime": 7.9872,
103
+ "eval_samples_per_second": 4.257,
104
+ "eval_steps_per_second": 2.128,
105
  "step": 9
106
  }
107
  ],
checkpoint-9/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:67fb4271d258100396c327a4063ee257b8654ab40cdb395b6c5cdb2b70ee6064
3
  size 7096
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06cd807872f02309d108f298220e5b415c77e8a2df23b68dd2963de4c9f9fc75
3
  size 7096