ShenaoZhang commited on
Commit
c35c1c3
·
verified ·
1 Parent(s): 614befd

Model save

Browse files
README.md CHANGED
@@ -2,14 +2,9 @@
2
  license: mit
3
  base_model: ShenaoZhang/0.001_ablation_4iters_bs128_nodpo_iter_1
4
  tags:
5
- - alignment-handbook
6
- - generated_from_trainer
7
  - trl
8
  - dpo
9
  - generated_from_trainer
10
- datasets:
11
- - updated
12
- - original
13
  model-index:
14
  - name: 0.001_ablation_4iters_bs128_nodpo_iter_2
15
  results: []
@@ -20,7 +15,7 @@ should probably proofread and complete it, then remove this comment. -->
20
 
21
  # 0.001_ablation_4iters_bs128_nodpo_iter_2
22
 
23
- This model is a fine-tuned version of [ShenaoZhang/0.001_ablation_4iters_bs128_nodpo_iter_1](https://huggingface.co/ShenaoZhang/0.001_ablation_4iters_bs128_nodpo_iter_1) on the updated and the original datasets.
24
 
25
  ## Model description
26
 
 
2
  license: mit
3
  base_model: ShenaoZhang/0.001_ablation_4iters_bs128_nodpo_iter_1
4
  tags:
 
 
5
  - trl
6
  - dpo
7
  - generated_from_trainer
 
 
 
8
  model-index:
9
  - name: 0.001_ablation_4iters_bs128_nodpo_iter_2
10
  results: []
 
15
 
16
  # 0.001_ablation_4iters_bs128_nodpo_iter_2
17
 
18
+ This model is a fine-tuned version of [ShenaoZhang/0.001_ablation_4iters_bs128_nodpo_iter_1](https://huggingface.co/ShenaoZhang/0.001_ablation_4iters_bs128_nodpo_iter_1) on the None dataset.
19
 
20
  ## Model description
21
 
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.5155397543386251,
4
- "train_runtime": 3544.4653,
5
  "train_samples": 15283,
6
- "train_samples_per_second": 4.312,
7
- "train_steps_per_second": 0.034
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.5123876844133649,
4
+ "train_runtime": 3580.7399,
5
  "train_samples": 15283,
6
+ "train_samples_per_second": 4.268,
7
+ "train_steps_per_second": 0.033
8
  }
config.json CHANGED
@@ -21,6 +21,6 @@
21
  "tie_word_embeddings": false,
22
  "torch_dtype": "bfloat16",
23
  "transformers_version": "4.36.2",
24
- "use_cache": true,
25
  "vocab_size": 32000
26
  }
 
21
  "tie_word_embeddings": false,
22
  "torch_dtype": "bfloat16",
23
  "transformers_version": "4.36.2",
24
+ "use_cache": false,
25
  "vocab_size": 32000
26
  }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b6a4a8f33f7cd22f68beb713ab961fdc674f47c187dc0c31e9c447b0263a166a
3
  size 4943162336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1eeb1d6bc4c9c71b9901440afbcadde9074dc1b609e805eff3e1ba5eb919a293
3
  size 4943162336
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:998f3455ad4b695e0e9cee3c94a2d450a0652ff7016d7fea834dc5da7dbbcd28
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:daae2a827686870c5740dfb45d7085e40a24afae1c4c43d802e6052d40b84d6d
3
  size 4999819336
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6ee50a9e5c82f0c9f712195a29b1bbbfc40af87273098c456b696f22ca7d5f3d
3
  size 4540516344
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b633f2fe5bb5bbd7b194dd4a0e53debb776a136b8bca8d35a6d1d3820cdd0740
3
  size 4540516344
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.5155397543386251,
4
- "train_runtime": 3544.4653,
5
  "train_samples": 15283,
6
- "train_samples_per_second": 4.312,
7
- "train_steps_per_second": 0.034
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.5123876844133649,
4
+ "train_runtime": 3580.7399,
5
  "train_samples": 15283,
6
+ "train_samples_per_second": 4.268,
7
+ "train_steps_per_second": 0.033
8
  }
trainer_state.json CHANGED
@@ -11,12 +11,12 @@
11
  {
12
  "epoch": 0.01,
13
  "learning_rate": 4.166666666666666e-08,
14
- "logits/chosen": -2.6911520957946777,
15
- "logits/rejected": -2.6465554237365723,
16
- "logps/chosen": -184.49661254882812,
17
- "logps/pi_response": -139.5087890625,
18
- "logps/ref_response": -139.5087890625,
19
- "logps/rejected": -296.51641845703125,
20
  "loss": 0.6931,
21
  "rewards/accuracies": 0.0,
22
  "rewards/chosen": 0.0,
@@ -27,187 +27,187 @@
27
  {
28
  "epoch": 0.08,
29
  "learning_rate": 4.1666666666666667e-07,
30
- "logits/chosen": -2.6858456134796143,
31
- "logits/rejected": -2.6641671657562256,
32
- "logps/chosen": -208.271240234375,
33
- "logps/pi_response": -148.94398498535156,
34
- "logps/ref_response": -148.13111877441406,
35
- "logps/rejected": -352.410888671875,
36
- "loss": 0.6845,
37
- "rewards/accuracies": 0.6041666865348816,
38
- "rewards/chosen": -0.011623169295489788,
39
- "rewards/margins": 0.028608590364456177,
40
- "rewards/rejected": -0.04023175686597824,
41
  "step": 10
42
  },
43
  {
44
  "epoch": 0.17,
45
  "learning_rate": 4.931352528237397e-07,
46
- "logits/chosen": -2.626678943634033,
47
- "logits/rejected": -2.5818490982055664,
48
- "logps/chosen": -290.82843017578125,
49
- "logps/pi_response": -157.5281524658203,
50
- "logps/ref_response": -153.23585510253906,
51
- "logps/rejected": -433.90478515625,
52
- "loss": 0.6211,
53
- "rewards/accuracies": 0.6812499761581421,
54
- "rewards/chosen": -0.40517061948776245,
55
- "rewards/margins": 0.4539198875427246,
56
- "rewards/rejected": -0.8590905070304871,
57
  "step": 20
58
  },
59
  {
60
  "epoch": 0.25,
61
  "learning_rate": 4.658920803689553e-07,
62
- "logits/chosen": -2.6315205097198486,
63
- "logits/rejected": -2.612787961959839,
64
- "logps/chosen": -279.93316650390625,
65
- "logps/pi_response": -139.732177734375,
66
- "logps/ref_response": -133.53305053710938,
67
- "logps/rejected": -475.787109375,
68
- "loss": 0.5927,
69
- "rewards/accuracies": 0.699999988079071,
70
- "rewards/chosen": -0.6063723564147949,
71
- "rewards/margins": 0.8305813074111938,
72
- "rewards/rejected": -1.4369536638259888,
73
  "step": 30
74
  },
75
  {
76
  "epoch": 0.33,
77
  "learning_rate": 4.201712553872657e-07,
78
- "logits/chosen": -2.5569541454315186,
79
- "logits/rejected": -2.500791072845459,
80
- "logps/chosen": -332.53753662109375,
81
- "logps/pi_response": -179.93458557128906,
82
- "logps/ref_response": -155.60166931152344,
83
- "logps/rejected": -513.1066284179688,
84
- "loss": 0.5165,
85
- "rewards/accuracies": 0.6812499761581421,
86
- "rewards/chosen": -0.6680192351341248,
87
- "rewards/margins": 0.8153729438781738,
88
- "rewards/rejected": -1.4833922386169434,
89
  "step": 40
90
  },
91
  {
92
  "epoch": 0.42,
93
  "learning_rate": 3.598859066780754e-07,
94
- "logits/chosen": -2.47841739654541,
95
- "logits/rejected": -2.409036636352539,
96
- "logps/chosen": -309.3136291503906,
97
- "logps/pi_response": -171.49142456054688,
98
- "logps/ref_response": -147.92498779296875,
99
- "logps/rejected": -520.16552734375,
100
- "loss": 0.499,
101
- "rewards/accuracies": 0.793749988079071,
102
- "rewards/chosen": -0.7337684035301208,
103
- "rewards/margins": 1.0852042436599731,
104
- "rewards/rejected": -1.8189725875854492,
105
  "step": 50
106
  },
107
  {
108
  "epoch": 0.5,
109
  "learning_rate": 2.9019570347986706e-07,
110
- "logits/chosen": -2.2959446907043457,
111
- "logits/rejected": -2.2667393684387207,
112
- "logps/chosen": -350.14996337890625,
113
- "logps/pi_response": -193.71585083007812,
114
- "logps/ref_response": -136.9359588623047,
115
- "logps/rejected": -525.8280029296875,
116
- "loss": 0.5013,
117
- "rewards/accuracies": 0.731249988079071,
118
- "rewards/chosen": -1.0568283796310425,
119
- "rewards/margins": 0.8585710525512695,
120
- "rewards/rejected": -1.9153995513916016,
121
  "step": 60
122
  },
123
  {
124
  "epoch": 0.59,
125
  "learning_rate": 2.1706525253979534e-07,
126
- "logits/chosen": -2.272998332977295,
127
- "logits/rejected": -2.209773302078247,
128
- "logps/chosen": -322.48974609375,
129
- "logps/pi_response": -191.36929321289062,
130
- "logps/ref_response": -146.25982666015625,
131
- "logps/rejected": -540.8594970703125,
132
- "loss": 0.4806,
133
- "rewards/accuracies": 0.7562500238418579,
134
- "rewards/chosen": -0.8404802083969116,
135
- "rewards/margins": 1.0443588495254517,
136
- "rewards/rejected": -1.8848390579223633,
137
  "step": 70
138
  },
139
  {
140
  "epoch": 0.67,
141
  "learning_rate": 1.4675360263490295e-07,
142
- "logits/chosen": -2.2215452194213867,
143
- "logits/rejected": -2.1440436840057373,
144
- "logps/chosen": -327.8442687988281,
145
- "logps/pi_response": -181.72433471679688,
146
- "logps/ref_response": -136.20291137695312,
147
- "logps/rejected": -519.2799072265625,
148
- "loss": 0.4574,
149
- "rewards/accuracies": 0.7124999761581421,
150
- "rewards/chosen": -0.9074360728263855,
151
- "rewards/margins": 0.9569914937019348,
152
- "rewards/rejected": -1.8644273281097412,
153
  "step": 80
154
  },
155
  {
156
  "epoch": 0.75,
157
  "learning_rate": 8.527854855097224e-08,
158
- "logits/chosen": -2.221020460128784,
159
- "logits/rejected": -2.087261199951172,
160
- "logps/chosen": -340.190673828125,
161
- "logps/pi_response": -209.3700714111328,
162
- "logps/ref_response": -147.84341430664062,
163
- "logps/rejected": -536.7362060546875,
164
- "loss": 0.4452,
165
- "rewards/accuracies": 0.7875000238418579,
166
- "rewards/chosen": -1.0263036489486694,
167
- "rewards/margins": 1.1081960201263428,
168
- "rewards/rejected": -2.134500026702881,
169
  "step": 90
170
  },
171
  {
172
  "epoch": 0.84,
173
  "learning_rate": 3.790158337517127e-08,
174
- "logits/chosen": -2.1238274574279785,
175
- "logits/rejected": -2.120473623275757,
176
- "logps/chosen": -347.0937194824219,
177
- "logps/pi_response": -210.205078125,
178
- "logps/ref_response": -146.23873901367188,
179
- "logps/rejected": -536.5059814453125,
180
- "loss": 0.4757,
181
- "rewards/accuracies": 0.699999988079071,
182
- "rewards/chosen": -1.1245771646499634,
183
- "rewards/margins": 0.8947040438652039,
184
- "rewards/rejected": -2.0192811489105225,
185
  "step": 100
186
  },
187
  {
188
  "epoch": 0.92,
189
  "learning_rate": 8.677580722139671e-09,
190
- "logits/chosen": -2.1690449714660645,
191
- "logits/rejected": -2.079481363296509,
192
- "logps/chosen": -313.77606201171875,
193
- "logps/pi_response": -206.690185546875,
194
- "logps/ref_response": -138.90621948242188,
195
- "logps/rejected": -530.0359497070312,
196
- "loss": 0.4302,
197
- "rewards/accuracies": 0.8187500238418579,
198
- "rewards/chosen": -0.9440746307373047,
199
- "rewards/margins": 1.1629929542541504,
200
- "rewards/rejected": -2.107067584991455,
201
  "step": 110
202
  },
203
  {
204
  "epoch": 1.0,
205
  "step": 119,
206
  "total_flos": 0.0,
207
- "train_loss": 0.5155397543386251,
208
- "train_runtime": 3544.4653,
209
- "train_samples_per_second": 4.312,
210
- "train_steps_per_second": 0.034
211
  }
212
  ],
213
  "logging_steps": 10,
 
11
  {
12
  "epoch": 0.01,
13
  "learning_rate": 4.166666666666666e-08,
14
+ "logits/chosen": -2.601804256439209,
15
+ "logits/rejected": -2.4488413333892822,
16
+ "logps/chosen": -309.41717529296875,
17
+ "logps/pi_response": -218.36558532714844,
18
+ "logps/ref_response": -218.36558532714844,
19
+ "logps/rejected": -352.54058837890625,
20
  "loss": 0.6931,
21
  "rewards/accuracies": 0.0,
22
  "rewards/chosen": 0.0,
 
27
  {
28
  "epoch": 0.08,
29
  "learning_rate": 4.1666666666666667e-07,
30
+ "logits/chosen": -2.587738513946533,
31
+ "logits/rejected": -2.5415542125701904,
32
+ "logps/chosen": -282.608154296875,
33
+ "logps/pi_response": -210.7708282470703,
34
+ "logps/ref_response": -211.15887451171875,
35
+ "logps/rejected": -350.5467224121094,
36
+ "loss": 0.6896,
37
+ "rewards/accuracies": 0.4166666567325592,
38
+ "rewards/chosen": 0.006864532828330994,
39
+ "rewards/margins": 0.002634861972182989,
40
+ "rewards/rejected": 0.004229670390486717,
41
  "step": 10
42
  },
43
  {
44
  "epoch": 0.17,
45
  "learning_rate": 4.931352528237397e-07,
46
+ "logits/chosen": -2.5801992416381836,
47
+ "logits/rejected": -2.526615858078003,
48
+ "logps/chosen": -321.7696228027344,
49
+ "logps/pi_response": -246.4761199951172,
50
+ "logps/ref_response": -208.217041015625,
51
+ "logps/rejected": -366.0833435058594,
52
+ "loss": 0.6334,
53
+ "rewards/accuracies": 0.6625000238418579,
54
+ "rewards/chosen": -0.2522856295108795,
55
+ "rewards/margins": 0.16487669944763184,
56
+ "rewards/rejected": -0.41716232895851135,
57
  "step": 20
58
  },
59
  {
60
  "epoch": 0.25,
61
  "learning_rate": 4.658920803689553e-07,
62
+ "logits/chosen": -2.4065756797790527,
63
+ "logits/rejected": -2.348301410675049,
64
+ "logps/chosen": -375.37969970703125,
65
+ "logps/pi_response": -379.5442810058594,
66
+ "logps/ref_response": -218.188232421875,
67
+ "logps/rejected": -469.886962890625,
68
+ "loss": 0.5773,
69
+ "rewards/accuracies": 0.7250000238418579,
70
+ "rewards/chosen": -0.9461199045181274,
71
+ "rewards/margins": 0.6475059986114502,
72
+ "rewards/rejected": -1.5936260223388672,
73
  "step": 30
74
  },
75
  {
76
  "epoch": 0.33,
77
  "learning_rate": 4.201712553872657e-07,
78
+ "logits/chosen": -2.316896438598633,
79
+ "logits/rejected": -2.2457833290100098,
80
+ "logps/chosen": -419.0770568847656,
81
+ "logps/pi_response": -449.25067138671875,
82
+ "logps/ref_response": -225.2439422607422,
83
+ "logps/rejected": -575.4486083984375,
84
+ "loss": 0.4922,
85
+ "rewards/accuracies": 0.7875000238418579,
86
+ "rewards/chosen": -1.1946828365325928,
87
+ "rewards/margins": 1.144521951675415,
88
+ "rewards/rejected": -2.3392045497894287,
89
  "step": 40
90
  },
91
  {
92
  "epoch": 0.42,
93
  "learning_rate": 3.598859066780754e-07,
94
+ "logits/chosen": -2.2486929893493652,
95
+ "logits/rejected": -2.1755928993225098,
96
+ "logps/chosen": -445.09942626953125,
97
+ "logps/pi_response": -452.2618103027344,
98
+ "logps/ref_response": -229.0274658203125,
99
+ "logps/rejected": -522.7374877929688,
100
+ "loss": 0.4686,
101
+ "rewards/accuracies": 0.7250000238418579,
102
+ "rewards/chosen": -1.2075799703598022,
103
+ "rewards/margins": 0.8736856579780579,
104
+ "rewards/rejected": -2.081265687942505,
105
  "step": 50
106
  },
107
  {
108
  "epoch": 0.5,
109
  "learning_rate": 2.9019570347986706e-07,
110
+ "logits/chosen": -2.1504597663879395,
111
+ "logits/rejected": -2.0797207355499268,
112
+ "logps/chosen": -436.8817443847656,
113
+ "logps/pi_response": -447.95123291015625,
114
+ "logps/ref_response": -223.42208862304688,
115
+ "logps/rejected": -558.6624145507812,
116
+ "loss": 0.4562,
117
+ "rewards/accuracies": 0.762499988079071,
118
+ "rewards/chosen": -1.3431683778762817,
119
+ "rewards/margins": 1.0750805139541626,
120
+ "rewards/rejected": -2.4182491302490234,
121
  "step": 60
122
  },
123
  {
124
  "epoch": 0.59,
125
  "learning_rate": 2.1706525253979534e-07,
126
+ "logits/chosen": -2.138681411743164,
127
+ "logits/rejected": -2.0570006370544434,
128
+ "logps/chosen": -440.0376892089844,
129
+ "logps/pi_response": -416.15435791015625,
130
+ "logps/ref_response": -193.9953155517578,
131
+ "logps/rejected": -525.2066040039062,
132
+ "loss": 0.4531,
133
+ "rewards/accuracies": 0.8125,
134
+ "rewards/chosen": -1.4727153778076172,
135
+ "rewards/margins": 0.8203312158584595,
136
+ "rewards/rejected": -2.293046474456787,
137
  "step": 70
138
  },
139
  {
140
  "epoch": 0.67,
141
  "learning_rate": 1.4675360263490295e-07,
142
+ "logits/chosen": -2.073247194290161,
143
+ "logits/rejected": -2.0226058959960938,
144
+ "logps/chosen": -397.6239929199219,
145
+ "logps/pi_response": -399.37701416015625,
146
+ "logps/ref_response": -194.95201110839844,
147
+ "logps/rejected": -524.5493774414062,
148
+ "loss": 0.4631,
149
+ "rewards/accuracies": 0.768750011920929,
150
+ "rewards/chosen": -1.335193157196045,
151
+ "rewards/margins": 0.893665611743927,
152
+ "rewards/rejected": -2.228858470916748,
153
  "step": 80
154
  },
155
  {
156
  "epoch": 0.75,
157
  "learning_rate": 8.527854855097224e-08,
158
+ "logits/chosen": -2.1284587383270264,
159
+ "logits/rejected": -2.057450771331787,
160
+ "logps/chosen": -467.1397399902344,
161
+ "logps/pi_response": -451.71929931640625,
162
+ "logps/ref_response": -203.74961853027344,
163
+ "logps/rejected": -569.0457153320312,
164
+ "loss": 0.4792,
165
+ "rewards/accuracies": 0.762499988079071,
166
+ "rewards/chosen": -1.6770477294921875,
167
+ "rewards/margins": 0.9977802038192749,
168
+ "rewards/rejected": -2.674827814102173,
169
  "step": 90
170
  },
171
  {
172
  "epoch": 0.84,
173
  "learning_rate": 3.790158337517127e-08,
174
+ "logits/chosen": -2.0567708015441895,
175
+ "logits/rejected": -1.9975345134735107,
176
+ "logps/chosen": -508.8993225097656,
177
+ "logps/pi_response": -477.3226013183594,
178
+ "logps/ref_response": -213.10018920898438,
179
+ "logps/rejected": -581.6873779296875,
180
+ "loss": 0.4706,
181
+ "rewards/accuracies": 0.75,
182
+ "rewards/chosen": -1.7259727716445923,
183
+ "rewards/margins": 0.9824594259262085,
184
+ "rewards/rejected": -2.708432197570801,
185
  "step": 100
186
  },
187
  {
188
  "epoch": 0.92,
189
  "learning_rate": 8.677580722139671e-09,
190
+ "logits/chosen": -2.1619856357574463,
191
+ "logits/rejected": -2.041363000869751,
192
+ "logps/chosen": -498.51531982421875,
193
+ "logps/pi_response": -450.4139709472656,
194
+ "logps/ref_response": -207.01895141601562,
195
+ "logps/rejected": -583.1207885742188,
196
+ "loss": 0.4992,
197
+ "rewards/accuracies": 0.762499988079071,
198
+ "rewards/chosen": -1.6402676105499268,
199
+ "rewards/margins": 1.0984296798706055,
200
+ "rewards/rejected": -2.7386972904205322,
201
  "step": 110
202
  },
203
  {
204
  "epoch": 1.0,
205
  "step": 119,
206
  "total_flos": 0.0,
207
+ "train_loss": 0.5123876844133649,
208
+ "train_runtime": 3580.7399,
209
+ "train_samples_per_second": 4.268,
210
+ "train_steps_per_second": 0.033
211
  }
212
  ],
213
  "logging_steps": 10,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9dd70008d573b944cc0bff847fa7dc92dffb5fb3348275bcd1b9b42bdcb71bcb
3
  size 6008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:085e5b9bd3b9b0c06c387ebb25f499f0d882a1c6618cd968b550404cc82f0678
3
  size 6008