jeromeramos commited on
Commit
63bb3e0
·
verified ·
1 Parent(s): f59c651

Model save

Browse files
README.md CHANGED
@@ -1,4 +1,5 @@
1
  ---
 
2
  library_name: transformers
3
  model_name: inter-play-sim-assistant-dpo
4
  tags:
@@ -10,7 +11,7 @@ licence: license
10
 
11
  # Model Card for inter-play-sim-assistant-dpo
12
 
13
- This model is a fine-tuned version of [None](https://huggingface.co/None).
14
  It has been trained using [TRL](https://github.com/huggingface/trl).
15
 
16
  ## Quick start
@@ -26,7 +27,7 @@ print(output["generated_text"])
26
 
27
  ## Training procedure
28
 
29
- [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/jerome-ramos-20/huggingface/runs/s1gd3cwj)
30
 
31
 
32
  This model was trained with DPO, a method introduced in [Direct Preference Optimization: Your Language Model is Secretly a Reward Model](https://huggingface.co/papers/2305.18290).
 
1
  ---
2
+ base_model: Sim4Rec/inter-play-sim-assistant-sft
3
  library_name: transformers
4
  model_name: inter-play-sim-assistant-dpo
5
  tags:
 
11
 
12
  # Model Card for inter-play-sim-assistant-dpo
13
 
14
+ This model is a fine-tuned version of [Sim4Rec/inter-play-sim-assistant-sft](https://huggingface.co/Sim4Rec/inter-play-sim-assistant-sft).
15
  It has been trained using [TRL](https://github.com/huggingface/trl).
16
 
17
  ## Quick start
 
27
 
28
  ## Training procedure
29
 
30
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/jerome-ramos-20/huggingface/runs/bw9lk46t)
31
 
32
 
33
  This model was trained with DPO, a method introduced in [Direct Preference Optimization: Your Language Model is Secretly a Reward Model](https://huggingface.co/papers/2305.18290).
all_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 0.975609756097561,
3
  "total_flos": 0.0,
4
- "train_loss": 0.6916259765625,
5
- "train_runtime": 59.3905,
6
- "train_samples": 978,
7
- "train_samples_per_second": 16.467,
8
- "train_steps_per_second": 0.253
9
  }
 
1
  {
2
+ "epoch": 1.9938900203665988,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.5645458033827485,
5
+ "train_runtime": 476.0531,
6
+ "train_samples": 3921,
7
+ "train_samples_per_second": 16.473,
8
+ "train_steps_per_second": 0.256
9
  }
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "model/inter-play-sim-assistant-sft",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
 
1
  {
2
+ "_name_or_path": "Sim4Rec/inter-play-sim-assistant-sft",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab96079ad2c261f426a8ef4826ae3df2c6cabacb010b4b0f9404993557cc3144
3
  size 4977222960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c662bff069e16a55c0ed3957b7c1a7e2e7b6efe4c7dc1b38a09a42e95da5419d
3
  size 4977222960
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:628482e036af56a4090a1fe75fa4d2b786dccbacc4008e03d55dce71a8846ab7
3
  size 4999802720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e41f89c0b9e9c5a5e8dd10e5c19c527e4726dc9b373296f7a897cbbaa737621
3
  size 4999802720
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c3049fd4de38e0e182de4e72e71ac3f272a10e2b48a2c8cd2104cb0c9c1ac7b0
3
  size 4915916176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6e57a5558dd5fcdae6c05b45dd4d4a6294dd49f4b1dd9e5a5bba7cf612cffe6
3
  size 4915916176
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5804772560bb628ce840523c1ad80d04fc1b7d33619be27b2cfa187e10be3158
3
  size 1168663096
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c466502280c55a6dd02d9a75fc8f3c83841694b9133c35db128ed464184cfa86
3
  size 1168663096
runs/Feb03_23-52-00_w-jerom-inter-play-sim-94c6890b9ccf44ea86f033a3db8a5dbd-6cxbw6h/events.out.tfevents.1738626757.w-jerom-inter-play-sim-94c6890b9ccf44ea86f033a3db8a5dbd-6cxbw6h.70942.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0e785113a7cbee4be5dd010882b1e48ca3ac55dee5113a7234d478548d905ee
3
+ size 7112
runs/Feb03_23-53-52_w-jerom-inter-play-sim-94c6890b9ccf44ea86f033a3db8a5dbd-6cxbw6h/events.out.tfevents.1738626866.w-jerom-inter-play-sim-94c6890b9ccf44ea86f033a3db8a5dbd-6cxbw6h.72213.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0c8de269223d4244779011935509c67ca73cfe641937dba5d0b5076eeda9643
3
+ size 8138
runs/Feb04_00-52-50_w-jerom-inter-play-sim-94c6890b9ccf44ea86f033a3db8a5dbd-6cxbw6h/events.out.tfevents.1738630407.w-jerom-inter-play-sim-94c6890b9ccf44ea86f033a3db8a5dbd-6cxbw6h.95662.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5279dfc90df5691531e2bd5993166771f36ac7e767c3a1175d0196a5931a47a7
3
+ size 6438
runs/Feb04_00-54-18_w-jerom-inter-play-sim-94c6890b9ccf44ea86f033a3db8a5dbd-6cxbw6h/events.out.tfevents.1738630495.w-jerom-inter-play-sim-94c6890b9ccf44ea86f033a3db8a5dbd-6cxbw6h.97190.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d289b6961caeffd17d5a8fdac14574ecf8f1acfb235b85fe661753379b43ab29
3
+ size 15574
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 0.975609756097561,
3
  "total_flos": 0.0,
4
- "train_loss": 0.6916259765625,
5
- "train_runtime": 59.3905,
6
- "train_samples": 978,
7
- "train_samples_per_second": 16.467,
8
- "train_steps_per_second": 0.253
9
  }
 
1
  {
2
+ "epoch": 1.9938900203665988,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.5645458033827485,
5
+ "train_runtime": 476.0531,
6
+ "train_samples": 3921,
7
+ "train_samples_per_second": 16.473,
8
+ "train_steps_per_second": 0.256
9
  }
trainer_state.json CHANGED
@@ -1,21 +1,21 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.975609756097561,
5
- "eval_steps": 100,
6
- "global_step": 15,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.06504065040650407,
13
- "grad_norm": 1.2033172845840454,
14
- "learning_rate": 2.5e-07,
15
- "logits/chosen": -3.296875,
16
- "logits/rejected": -3.234375,
17
- "logps/chosen": -40.65625,
18
- "logps/rejected": -39.75,
19
  "loss": 0.6914,
20
  "rewards/accuracies": 0.0,
21
  "rewards/chosen": 0.0,
@@ -24,34 +24,199 @@
24
  "step": 1
25
  },
26
  {
27
- "epoch": 0.6504065040650406,
28
- "grad_norm": 0.5706184506416321,
29
- "learning_rate": 1.6134877823936607e-07,
30
- "logits/chosen": -3.2595486640930176,
31
- "logits/rejected": -3.248914957046509,
32
- "logps/chosen": -39.62673568725586,
33
- "logps/rejected": -38.33854293823242,
34
- "loss": 0.6917,
35
- "rewards/accuracies": 0.2586805522441864,
36
- "rewards/chosen": 0.0023028056602925062,
37
- "rewards/margins": 0.0009113947744481266,
38
- "rewards/rejected": 0.0013909869594499469,
39
  "step": 10
40
  },
41
  {
42
- "epoch": 0.975609756097561,
43
- "step": 15,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  "total_flos": 0.0,
45
- "train_loss": 0.6916259765625,
46
- "train_runtime": 59.3905,
47
- "train_samples_per_second": 16.467,
48
- "train_steps_per_second": 0.253
49
  }
50
  ],
51
  "logging_steps": 10,
52
- "max_steps": 15,
53
  "num_input_tokens_seen": 0,
54
- "num_train_epochs": 1,
55
  "save_steps": 500,
56
  "stateful_callbacks": {
57
  "TrainerControl": {
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.9938900203665988,
5
+ "eval_steps": 500,
6
+ "global_step": 122,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.016293279022403257,
13
+ "grad_norm": 1.4111297130584717,
14
+ "learning_rate": 3.846153846153846e-08,
15
+ "logits/chosen": -3.2578125,
16
+ "logits/rejected": -3.19140625,
17
+ "logps/chosen": -46.375,
18
+ "logps/rejected": -45.75,
19
  "loss": 0.6914,
20
  "rewards/accuracies": 0.0,
21
  "rewards/chosen": 0.0,
 
24
  "step": 1
25
  },
26
  {
27
+ "epoch": 0.1629327902240326,
28
+ "grad_norm": 0.7594385147094727,
29
+ "learning_rate": 3.8461538461538463e-07,
30
+ "logits/chosen": -3.2265625,
31
+ "logits/rejected": -3.2200520038604736,
32
+ "logps/chosen": -42.77604293823242,
33
+ "logps/rejected": -41.88541793823242,
34
+ "loss": 0.6924,
35
+ "rewards/accuracies": 0.2222222238779068,
36
+ "rewards/chosen": 0.0023810069542378187,
37
+ "rewards/margins": -0.0003809928894042969,
38
+ "rewards/rejected": 0.0027594566345214844,
39
  "step": 10
40
  },
41
  {
42
+ "epoch": 0.3258655804480652,
43
+ "grad_norm": 0.7135093808174133,
44
+ "learning_rate": 4.949291683053768e-07,
45
+ "logits/chosen": -3.2763671875,
46
+ "logits/rejected": -3.2466797828674316,
47
+ "logps/chosen": -40.720314025878906,
48
+ "logps/rejected": -39.4296875,
49
+ "loss": 0.6872,
50
+ "rewards/accuracies": 0.421875,
51
+ "rewards/chosen": 0.033158015459775925,
52
+ "rewards/margins": 0.012033844366669655,
53
+ "rewards/rejected": 0.02114267274737358,
54
+ "step": 20
55
+ },
56
+ {
57
+ "epoch": 0.48879837067209775,
58
+ "grad_norm": 0.699894368648529,
59
+ "learning_rate": 4.70586371748506e-07,
60
+ "logits/chosen": -3.2841796875,
61
+ "logits/rejected": -3.2529296875,
62
+ "logps/chosen": -40.8515625,
63
+ "logps/rejected": -40.571876525878906,
64
+ "loss": 0.6765,
65
+ "rewards/accuracies": 0.528124988079071,
66
+ "rewards/chosen": 0.0396418571472168,
67
+ "rewards/margins": 0.034914396703243256,
68
+ "rewards/rejected": 0.004716300871223211,
69
+ "step": 30
70
+ },
71
+ {
72
+ "epoch": 0.6517311608961304,
73
+ "grad_norm": 1.1233805418014526,
74
+ "learning_rate": 4.280458575653296e-07,
75
+ "logits/chosen": -3.195117235183716,
76
+ "logits/rejected": -3.162890672683716,
77
+ "logps/chosen": -43.59375,
78
+ "logps/rejected": -44.532814025878906,
79
+ "loss": 0.6415,
80
+ "rewards/accuracies": 0.628125011920929,
81
+ "rewards/chosen": -0.06923361122608185,
82
+ "rewards/margins": 0.11445312201976776,
83
+ "rewards/rejected": -0.1837112456560135,
84
+ "step": 40
85
+ },
86
+ {
87
+ "epoch": 0.814663951120163,
88
+ "grad_norm": 1.2118208408355713,
89
+ "learning_rate": 3.7081709127108767e-07,
90
+ "logits/chosen": -3.0589842796325684,
91
+ "logits/rejected": NaN,
92
+ "logps/chosen": -50.13593673706055,
93
+ "logps/rejected": -52.04999923706055,
94
+ "loss": 0.6138,
95
+ "rewards/accuracies": 0.612500011920929,
96
+ "rewards/chosen": -0.3750244081020355,
97
+ "rewards/margins": 0.18903808295726776,
98
+ "rewards/rejected": -0.5643554925918579,
99
+ "step": 50
100
+ },
101
+ {
102
+ "epoch": 0.9775967413441955,
103
+ "grad_norm": 1.2914516925811768,
104
+ "learning_rate": 3.0362127536287636e-07,
105
+ "logits/chosen": -3.135546922683716,
106
+ "logits/rejected": -3.056835889816284,
107
+ "logps/chosen": -50.14531326293945,
108
+ "logps/rejected": -56.23749923706055,
109
+ "loss": 0.566,
110
+ "rewards/accuracies": 0.612500011920929,
111
+ "rewards/chosen": -0.4345153868198395,
112
+ "rewards/margins": 0.36333543062210083,
113
+ "rewards/rejected": -0.7979736328125,
114
+ "step": 60
115
+ },
116
+ {
117
+ "epoch": 1.1466395112016294,
118
+ "grad_norm": 1.4273715019226074,
119
+ "learning_rate": 2.3200186419770823e-07,
120
+ "logits/chosen": -3.161651134490967,
121
+ "logits/rejected": -3.0788965225219727,
122
+ "logps/chosen": -53.49691390991211,
123
+ "logps/rejected": -62.85802459716797,
124
+ "loss": 0.5371,
125
+ "rewards/accuracies": 0.6095678806304932,
126
+ "rewards/chosen": -0.5622889995574951,
127
+ "rewards/margins": 0.5249747037887573,
128
+ "rewards/rejected": -1.0871431827545166,
129
+ "step": 70
130
+ },
131
+ {
132
+ "epoch": 1.309572301425662,
133
+ "grad_norm": 1.3244953155517578,
134
+ "learning_rate": 1.6186724554503237e-07,
135
+ "logits/chosen": -3.133984327316284,
136
+ "logits/rejected": -3.0191407203674316,
137
+ "logps/chosen": -55.234375,
138
+ "logps/rejected": -68.234375,
139
+ "loss": 0.4905,
140
+ "rewards/accuracies": 0.660937488079071,
141
+ "rewards/chosen": -0.6110439300537109,
142
+ "rewards/margins": 0.7057861089706421,
143
+ "rewards/rejected": -1.3170897960662842,
144
+ "step": 80
145
+ },
146
+ {
147
+ "epoch": 1.4725050916496945,
148
+ "grad_norm": 1.8561619520187378,
149
+ "learning_rate": 9.900331622138063e-08,
150
+ "logits/chosen": -3.107617139816284,
151
+ "logits/rejected": -2.9839844703674316,
152
+ "logps/chosen": -55.421875,
153
+ "logps/rejected": -68.80937194824219,
154
+ "loss": 0.4936,
155
+ "rewards/accuracies": 0.640625,
156
+ "rewards/chosen": -0.6593307256698608,
157
+ "rewards/margins": 0.737597644329071,
158
+ "rewards/rejected": -1.3976562023162842,
159
+ "step": 90
160
+ },
161
+ {
162
+ "epoch": 1.635437881873727,
163
+ "grad_norm": 0.9447304606437683,
164
+ "learning_rate": 4.859616286322094e-08,
165
+ "logits/chosen": -3.1148438453674316,
166
+ "logits/rejected": -2.9876952171325684,
167
+ "logps/chosen": -53.092185974121094,
168
+ "logps/rejected": -68.9312515258789,
169
+ "loss": 0.468,
170
+ "rewards/accuracies": 0.6484375,
171
+ "rewards/chosen": -0.5302764773368835,
172
+ "rewards/margins": 0.887438952922821,
173
+ "rewards/rejected": -1.417944312095642,
174
+ "step": 100
175
+ },
176
+ {
177
+ "epoch": 1.7983706720977597,
178
+ "grad_norm": 1.593487024307251,
179
+ "learning_rate": 1.4804225250339281e-08,
180
+ "logits/chosen": -3.1207032203674316,
181
+ "logits/rejected": NaN,
182
+ "logps/chosen": -54.428123474121094,
183
+ "logps/rejected": -68.76249694824219,
184
+ "loss": 0.4758,
185
+ "rewards/accuracies": 0.640625,
186
+ "rewards/chosen": -0.6090973019599915,
187
+ "rewards/margins": 0.817614734172821,
188
+ "rewards/rejected": -1.4268066883087158,
189
+ "step": 110
190
+ },
191
+ {
192
+ "epoch": 1.9613034623217924,
193
+ "grad_norm": 2.265634059906006,
194
+ "learning_rate": 4.152374292708538e-10,
195
+ "logits/chosen": -3.109179735183716,
196
+ "logits/rejected": -2.985156297683716,
197
+ "logps/chosen": -54.99687576293945,
198
+ "logps/rejected": -71.015625,
199
+ "loss": 0.4654,
200
+ "rewards/accuracies": 0.667187511920929,
201
+ "rewards/chosen": -0.6471847295761108,
202
+ "rewards/margins": 0.863818347454071,
203
+ "rewards/rejected": -1.510644555091858,
204
+ "step": 120
205
+ },
206
+ {
207
+ "epoch": 1.9938900203665988,
208
+ "step": 122,
209
  "total_flos": 0.0,
210
+ "train_loss": 0.5645458033827485,
211
+ "train_runtime": 476.0531,
212
+ "train_samples_per_second": 16.473,
213
+ "train_steps_per_second": 0.256
214
  }
215
  ],
216
  "logging_steps": 10,
217
+ "max_steps": 122,
218
  "num_input_tokens_seen": 0,
219
+ "num_train_epochs": 2,
220
  "save_steps": 500,
221
  "stateful_callbacks": {
222
  "TrainerControl": {
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5dac2d07c37aa847df2089590c9af2a8adc87c2157da26b3cacbb11a6c1c4ae8
3
  size 7800
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6be99c816b5db8c5a3acc17c9a53f9de39b2335b62b1dba0929d841f39fa676d
3
  size 7800