DeepDream2045 commited on
Commit
c371781
·
verified ·
1 Parent(s): 1ed616d

Training in progress, step 19, checkpoint

Browse files
last-checkpoint/adapter_config.json CHANGED
@@ -20,13 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
 
23
  "k_proj",
24
  "q_proj",
25
- "v_proj",
26
- "o_proj",
27
  "up_proj",
28
- "gate_proj",
29
- "down_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
+ "v_proj",
24
+ "down_proj",
25
  "k_proj",
26
  "q_proj",
 
 
27
  "up_proj",
28
+ "o_proj",
29
+ "gate_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b2f3401b0402b266f3e3b5bfbb73b84afdb7a0a0d262944290a8e3db31c64872
3
  size 97728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e8fba98e1768d606b3e34fa33b7faf71e645f336bbf4b349a6f38ca19a99d12
3
  size 97728
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a3c5028462d8a1682bdda3dec5634efa432ebcf38a15cf6f783475de0738269
3
  size 212298
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:407550fe93e4144c0c7b535703dc0af4260cd8b7849192c0187960e061c4fd7e
3
  size 212298
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e4d26cc27bf3a8f71d0610bcf382effc83d34b654f5d07f90ab43930ebbd8325
3
- size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5dc99727afc70938d9edd3168cd8e82139628f65521bfd34a77f52d1e4010a17
3
+ size 14960
last-checkpoint/trainer_state.json CHANGED
@@ -10,143 +10,143 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.16326530612244897,
13
- "grad_norm": 0.08094418048858643,
14
  "learning_rate": 9.931806517013612e-05,
15
  "loss": 10.3801,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 0.16326530612244897,
20
- "eval_loss": 10.379819869995117,
21
- "eval_runtime": 0.0487,
22
- "eval_samples_per_second": 862.113,
23
- "eval_steps_per_second": 123.159,
24
  "step": 1
25
  },
26
  {
27
  "epoch": 0.32653061224489793,
28
- "grad_norm": 0.086942657828331,
29
  "learning_rate": 9.729086208503174e-05,
30
  "loss": 10.3811,
31
  "step": 2
32
  },
33
  {
34
  "epoch": 0.4897959183673469,
35
- "grad_norm": 0.09496694803237915,
36
  "learning_rate": 9.397368756032445e-05,
37
- "loss": 10.3806,
38
  "step": 3
39
  },
40
  {
41
  "epoch": 0.6530612244897959,
42
- "grad_norm": 0.07616405189037323,
43
  "learning_rate": 8.945702546981969e-05,
44
- "loss": 10.3807,
45
  "step": 4
46
  },
47
  {
48
  "epoch": 0.8163265306122449,
49
- "grad_norm": 0.0874004065990448,
50
  "learning_rate": 8.386407858128706e-05,
51
- "loss": 10.38,
52
  "step": 5
53
  },
54
  {
55
  "epoch": 0.9795918367346939,
56
- "grad_norm": 0.09588516503572464,
57
  "learning_rate": 7.734740790612136e-05,
58
- "loss": 10.3792,
59
  "step": 6
60
  },
61
  {
62
  "epoch": 1.1428571428571428,
63
- "grad_norm": 0.15574996173381805,
64
  "learning_rate": 7.008477123264848e-05,
65
- "loss": 19.6951,
66
  "step": 7
67
  },
68
  {
69
  "epoch": 1.306122448979592,
70
- "grad_norm": 0.08454212546348572,
71
  "learning_rate": 6.227427435703997e-05,
72
- "loss": 10.0344,
73
  "step": 8
74
  },
75
  {
76
  "epoch": 1.469387755102041,
77
- "grad_norm": 0.10501722246408463,
78
  "learning_rate": 5.4128967273616625e-05,
79
- "loss": 10.7884,
80
  "step": 9
81
  },
82
  {
83
  "epoch": 1.6326530612244898,
84
- "grad_norm": 0.08670323342084885,
85
  "learning_rate": 4.5871032726383386e-05,
86
- "loss": 10.2937,
87
  "step": 10
88
  },
89
  {
90
  "epoch": 1.7959183673469388,
91
- "grad_norm": 0.08873719722032547,
92
  "learning_rate": 3.772572564296005e-05,
93
- "loss": 10.0212,
94
  "step": 11
95
  },
96
  {
97
  "epoch": 1.9591836734693877,
98
- "grad_norm": 0.09139610081911087,
99
  "learning_rate": 2.991522876735154e-05,
100
- "loss": 10.8236,
101
  "step": 12
102
  },
103
  {
104
  "epoch": 2.122448979591837,
105
- "grad_norm": 0.16937825083732605,
106
  "learning_rate": 2.2652592093878666e-05,
107
- "loss": 19.5605,
108
  "step": 13
109
  },
110
  {
111
  "epoch": 2.2857142857142856,
112
- "grad_norm": 0.08272731304168701,
113
  "learning_rate": 1.6135921418712956e-05,
114
- "loss": 9.6462,
115
  "step": 14
116
  },
117
  {
118
  "epoch": 2.4489795918367347,
119
- "grad_norm": 0.09716588258743286,
120
  "learning_rate": 1.0542974530180327e-05,
121
- "loss": 11.1995,
122
  "step": 15
123
  },
124
  {
125
  "epoch": 2.612244897959184,
126
- "grad_norm": 0.09108325839042664,
127
  "learning_rate": 6.026312439675552e-06,
128
- "loss": 10.3281,
129
  "step": 16
130
  },
131
  {
132
  "epoch": 2.7755102040816326,
133
- "grad_norm": 0.08579767495393753,
134
  "learning_rate": 2.7091379149682685e-06,
135
- "loss": 9.6129,
136
  "step": 17
137
  },
138
  {
139
  "epoch": 2.938775510204082,
140
- "grad_norm": 0.10283508896827698,
141
  "learning_rate": 6.819348298638839e-07,
142
- "loss": 11.1786,
143
  "step": 18
144
  },
145
  {
146
  "epoch": 3.1020408163265305,
147
- "grad_norm": 0.17679283022880554,
148
  "learning_rate": 0.0,
149
- "loss": 19.5204,
150
  "step": 19
151
  }
152
  ],
 
10
  "log_history": [
11
  {
12
  "epoch": 0.16326530612244897,
13
+ "grad_norm": 0.08699872344732285,
14
  "learning_rate": 9.931806517013612e-05,
15
  "loss": 10.3801,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 0.16326530612244897,
20
+ "eval_loss": 10.379783630371094,
21
+ "eval_runtime": 0.0468,
22
+ "eval_samples_per_second": 897.854,
23
+ "eval_steps_per_second": 128.265,
24
  "step": 1
25
  },
26
  {
27
  "epoch": 0.32653061224489793,
28
+ "grad_norm": 0.09416474401950836,
29
  "learning_rate": 9.729086208503174e-05,
30
  "loss": 10.3811,
31
  "step": 2
32
  },
33
  {
34
  "epoch": 0.4897959183673469,
35
+ "grad_norm": 0.10440898686647415,
36
  "learning_rate": 9.397368756032445e-05,
37
+ "loss": 10.3805,
38
  "step": 3
39
  },
40
  {
41
  "epoch": 0.6530612244897959,
42
+ "grad_norm": 0.08457836508750916,
43
  "learning_rate": 8.945702546981969e-05,
44
+ "loss": 10.3806,
45
  "step": 4
46
  },
47
  {
48
  "epoch": 0.8163265306122449,
49
+ "grad_norm": 0.09569015353918076,
50
  "learning_rate": 8.386407858128706e-05,
51
+ "loss": 10.3799,
52
  "step": 5
53
  },
54
  {
55
  "epoch": 0.9795918367346939,
56
+ "grad_norm": 0.1072765588760376,
57
  "learning_rate": 7.734740790612136e-05,
58
+ "loss": 10.379,
59
  "step": 6
60
  },
61
  {
62
  "epoch": 1.1428571428571428,
63
+ "grad_norm": 0.17628832161426544,
64
  "learning_rate": 7.008477123264848e-05,
65
+ "loss": 19.6947,
66
  "step": 7
67
  },
68
  {
69
  "epoch": 1.306122448979592,
70
+ "grad_norm": 0.09385761618614197,
71
  "learning_rate": 6.227427435703997e-05,
72
+ "loss": 10.0342,
73
  "step": 8
74
  },
75
  {
76
  "epoch": 1.469387755102041,
77
+ "grad_norm": 0.1199827715754509,
78
  "learning_rate": 5.4128967273616625e-05,
79
+ "loss": 10.7881,
80
  "step": 9
81
  },
82
  {
83
  "epoch": 1.6326530612244898,
84
+ "grad_norm": 0.09820538759231567,
85
  "learning_rate": 4.5871032726383386e-05,
86
+ "loss": 10.2934,
87
  "step": 10
88
  },
89
  {
90
  "epoch": 1.7959183673469388,
91
+ "grad_norm": 0.10190445929765701,
92
  "learning_rate": 3.772572564296005e-05,
93
+ "loss": 10.0209,
94
  "step": 11
95
  },
96
  {
97
  "epoch": 1.9591836734693877,
98
+ "grad_norm": 0.10631943494081497,
99
  "learning_rate": 2.991522876735154e-05,
100
+ "loss": 10.8232,
101
  "step": 12
102
  },
103
  {
104
  "epoch": 2.122448979591837,
105
+ "grad_norm": 0.1966407150030136,
106
  "learning_rate": 2.2652592093878666e-05,
107
+ "loss": 19.5598,
108
  "step": 13
109
  },
110
  {
111
  "epoch": 2.2857142857142856,
112
+ "grad_norm": 0.09489902853965759,
113
  "learning_rate": 1.6135921418712956e-05,
114
+ "loss": 9.6459,
115
  "step": 14
116
  },
117
  {
118
  "epoch": 2.4489795918367347,
119
+ "grad_norm": 0.11150838434696198,
120
  "learning_rate": 1.0542974530180327e-05,
121
+ "loss": 11.1992,
122
  "step": 15
123
  },
124
  {
125
  "epoch": 2.612244897959184,
126
+ "grad_norm": 0.10458274930715561,
127
  "learning_rate": 6.026312439675552e-06,
128
+ "loss": 10.3278,
129
  "step": 16
130
  },
131
  {
132
  "epoch": 2.7755102040816326,
133
+ "grad_norm": 0.09886434674263,
134
  "learning_rate": 2.7091379149682685e-06,
135
+ "loss": 9.6125,
136
  "step": 17
137
  },
138
  {
139
  "epoch": 2.938775510204082,
140
+ "grad_norm": 0.11752457171678543,
141
  "learning_rate": 6.819348298638839e-07,
142
+ "loss": 11.1782,
143
  "step": 18
144
  },
145
  {
146
  "epoch": 3.1020408163265305,
147
+ "grad_norm": 0.20353963971138,
148
  "learning_rate": 0.0,
149
+ "loss": 19.5197,
150
  "step": 19
151
  }
152
  ],
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:adfd9a452340c7844714460d264a482b1e9ccd6cbf3924d826a08676c6660015
3
  size 6776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4ef1efafef513483c59b4bbfa46a3c48edfc43187f127f58ad06bf22c4d8c07
3
  size 6776