ShenaoZ commited on
Commit
33e1fe1
·
verified ·
1 Parent(s): 3fd58d1

Model save

Browse files
README.md CHANGED
@@ -2,16 +2,9 @@
2
  license: mit
3
  base_model: HuggingFaceH4/mistral-7b-sft-beta
4
  tags:
5
- - alignment-handbook
6
  - trl
7
  - dpo
8
  - generated_from_trainer
9
- - trl
10
- - dpo
11
- - generated_from_trainer
12
- datasets:
13
- - updated
14
- - original
15
  model-index:
16
  - name: 0.001_3iters_bs256_declr_nodpo_userresponse_iter_1
17
  results: []
@@ -22,7 +15,7 @@ should probably proofread and complete it, then remove this comment. -->
22
 
23
  # 0.001_3iters_bs256_declr_nodpo_userresponse_iter_1
24
 
25
- This model is a fine-tuned version of [HuggingFaceH4/mistral-7b-sft-beta](https://huggingface.co/HuggingFaceH4/mistral-7b-sft-beta) on the updated and the original datasets.
26
 
27
  ## Model description
28
 
 
2
  license: mit
3
  base_model: HuggingFaceH4/mistral-7b-sft-beta
4
  tags:
 
5
  - trl
6
  - dpo
7
  - generated_from_trainer
 
 
 
 
 
 
8
  model-index:
9
  - name: 0.001_3iters_bs256_declr_nodpo_userresponse_iter_1
10
  results: []
 
15
 
16
  # 0.001_3iters_bs256_declr_nodpo_userresponse_iter_1
17
 
18
+ This model is a fine-tuned version of [HuggingFaceH4/mistral-7b-sft-beta](https://huggingface.co/HuggingFaceH4/mistral-7b-sft-beta) on the None dataset.
19
 
20
  ## Model description
21
 
all_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 0.9905956112852664,
3
  "total_flos": 0.0,
4
- "train_loss": 0.5763288389278364,
5
- "train_runtime": 3299.3537,
6
  "train_samples": 20378,
7
- "train_samples_per_second": 6.176,
8
  "train_steps_per_second": 0.024
9
  }
 
1
  {
2
  "epoch": 0.9905956112852664,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.5773088418984715,
5
+ "train_runtime": 3291.7593,
6
  "train_samples": 20378,
7
+ "train_samples_per_second": 6.191,
8
  "train_steps_per_second": 0.024
9
  }
config.json CHANGED
@@ -21,6 +21,6 @@
21
  "tie_word_embeddings": false,
22
  "torch_dtype": "bfloat16",
23
  "transformers_version": "4.40.0",
24
- "use_cache": true,
25
  "vocab_size": 32000
26
  }
 
21
  "tie_word_embeddings": false,
22
  "torch_dtype": "bfloat16",
23
  "transformers_version": "4.40.0",
24
+ "use_cache": false,
25
  "vocab_size": 32000
26
  }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef77c6322332c4c430e9b1ab632749609e42663241e959f8d8662acf03c1b98b
3
  size 4943162336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f22fca732a9f6b3d6c58ab42eeb476902a58cc9d22a8ae8d11e017da95eda8b
3
  size 4943162336
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:219edbf21d323e47afe96ca740d03c559dd91de22d1a308d18b1d386dfd4122c
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8dcc828c3cd23fbfbae0251650a30083fb43da293f1ceec5c3ead5c00a02d75b
3
  size 4999819336
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:47c29053b2b9f7324afc708af288407578509665b26aa9b1ef43ca60f52986a8
3
  size 4540516344
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ebcc55be4fad0d9123d62539bc046be1eb9d3d43b4fcd5c7aadd6e2847defb2
3
  size 4540516344
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 0.9905956112852664,
3
  "total_flos": 0.0,
4
- "train_loss": 0.5763288389278364,
5
- "train_runtime": 3299.3537,
6
  "train_samples": 20378,
7
- "train_samples_per_second": 6.176,
8
  "train_steps_per_second": 0.024
9
  }
 
1
  {
2
  "epoch": 0.9905956112852664,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.5773088418984715,
5
+ "train_runtime": 3291.7593,
6
  "train_samples": 20378,
7
+ "train_samples_per_second": 6.191,
8
  "train_steps_per_second": 0.024
9
  }
trainer_state.json CHANGED
@@ -10,7 +10,7 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.012539184952978056,
13
- "grad_norm": 12.995569272950581,
14
  "learning_rate": 6.25e-08,
15
  "logits/chosen": -2.895261764526367,
16
  "logits/rejected": -2.870915412902832,
@@ -27,130 +27,130 @@
27
  },
28
  {
29
  "epoch": 0.12539184952978055,
30
- "grad_norm": 10.597824924824353,
31
  "learning_rate": 4.990217055187362e-07,
32
- "logits/chosen": -2.7800254821777344,
33
- "logits/rejected": -2.764801502227783,
34
- "logps/chosen": -215.308349609375,
35
- "logps/pi_response": -70.83273315429688,
36
  "logps/ref_response": -69.94066619873047,
37
- "logps/rejected": -113.73969268798828,
38
  "loss": 0.6874,
39
- "rewards/accuracies": 0.5625,
40
- "rewards/chosen": 0.0037591855507344007,
41
- "rewards/margins": 0.008248809725046158,
42
- "rewards/rejected": -0.004489624407142401,
43
  "step": 10
44
  },
45
  {
46
  "epoch": 0.2507836990595611,
47
- "grad_norm": 5.532320834011963,
48
  "learning_rate": 4.655786431300069e-07,
49
- "logits/chosen": -2.677351713180542,
50
- "logits/rejected": -2.6526131629943848,
51
- "logps/chosen": -242.95101928710938,
52
- "logps/pi_response": -88.7496109008789,
53
  "logps/ref_response": -67.22555541992188,
54
- "logps/rejected": -121.909423828125,
55
- "loss": 0.6346,
56
- "rewards/accuracies": 0.7281249761581421,
57
- "rewards/chosen": -0.015267712064087391,
58
- "rewards/margins": 0.16114801168441772,
59
- "rewards/rejected": -0.17641572654247284,
60
  "step": 20
61
  },
62
  {
63
  "epoch": 0.3761755485893417,
64
- "grad_norm": 5.696693225944807,
65
  "learning_rate": 3.9061232191019517e-07,
66
- "logits/chosen": -2.559924602508545,
67
- "logits/rejected": -2.529345989227295,
68
- "logps/chosen": -235.51962280273438,
69
- "logps/pi_response": -122.11323547363281,
70
  "logps/ref_response": -66.2011489868164,
71
- "logps/rejected": -152.02407836914062,
72
- "loss": 0.5998,
73
  "rewards/accuracies": 0.7562500238418579,
74
- "rewards/chosen": -0.19383877515792847,
75
- "rewards/margins": 0.27510958909988403,
76
- "rewards/rejected": -0.4689483046531677,
77
  "step": 30
78
  },
79
  {
80
  "epoch": 0.5015673981191222,
81
- "grad_norm": 7.1826029149350745,
82
  "learning_rate": 2.8856223324132555e-07,
83
- "logits/chosen": -2.5519003868103027,
84
- "logits/rejected": -2.5247206687927246,
85
- "logps/chosen": -251.58963012695312,
86
- "logps/pi_response": -164.69174194335938,
87
  "logps/ref_response": -70.73230743408203,
88
- "logps/rejected": -184.05503845214844,
89
- "loss": 0.5645,
90
- "rewards/accuracies": 0.7437499761581421,
91
- "rewards/chosen": -0.3932897448539734,
92
- "rewards/margins": 0.4179116189479828,
93
- "rewards/rejected": -0.8112013936042786,
94
  "step": 40
95
  },
96
  {
97
  "epoch": 0.6269592476489029,
98
- "grad_norm": 8.887185971495962,
99
  "learning_rate": 1.7908455541642582e-07,
100
- "logits/chosen": -2.558701753616333,
101
- "logits/rejected": -2.52125883102417,
102
- "logps/chosen": -286.60028076171875,
103
- "logps/pi_response": -173.02108764648438,
104
  "logps/ref_response": -68.65879821777344,
105
- "logps/rejected": -191.91122436523438,
106
- "loss": 0.5437,
107
- "rewards/accuracies": 0.7875000238418579,
108
- "rewards/chosen": -0.38747724890708923,
109
- "rewards/margins": 0.5338504314422607,
110
- "rewards/rejected": -0.9213277101516724,
111
  "step": 50
112
  },
113
  {
114
  "epoch": 0.7523510971786834,
115
- "grad_norm": 16.631565802707648,
116
  "learning_rate": 8.32661172908373e-08,
117
- "logits/chosen": -2.553363800048828,
118
- "logits/rejected": -2.5198798179626465,
119
- "logps/chosen": -244.05642700195312,
120
- "logps/pi_response": -171.74761962890625,
121
  "logps/ref_response": -62.150115966796875,
122
- "logps/rejected": -194.5513458251953,
123
- "loss": 0.536,
124
  "rewards/accuracies": 0.7593749761581421,
125
- "rewards/chosen": -0.41183900833129883,
126
- "rewards/margins": 0.5683741569519043,
127
- "rewards/rejected": -0.9802130460739136,
128
  "step": 60
129
  },
130
  {
131
  "epoch": 0.877742946708464,
132
- "grad_norm": 10.81361845883075,
133
  "learning_rate": 1.956279997278043e-08,
134
- "logits/chosen": -2.5614013671875,
135
- "logits/rejected": -2.529111385345459,
136
- "logps/chosen": -282.7896423339844,
137
- "logps/pi_response": -187.90109252929688,
138
  "logps/ref_response": -70.37203216552734,
139
- "logps/rejected": -210.452392578125,
140
- "loss": 0.5313,
141
- "rewards/accuracies": 0.753125011920929,
142
- "rewards/chosen": -0.42874473333358765,
143
- "rewards/margins": 0.621096134185791,
144
- "rewards/rejected": -1.0498409271240234,
145
  "step": 70
146
  },
147
  {
148
  "epoch": 0.9905956112852664,
149
  "step": 79,
150
  "total_flos": 0.0,
151
- "train_loss": 0.5763288389278364,
152
- "train_runtime": 3299.3537,
153
- "train_samples_per_second": 6.176,
154
  "train_steps_per_second": 0.024
155
  }
156
  ],
 
10
  "log_history": [
11
  {
12
  "epoch": 0.012539184952978056,
13
+ "grad_norm": 12.994366765815737,
14
  "learning_rate": 6.25e-08,
15
  "logits/chosen": -2.895261764526367,
16
  "logits/rejected": -2.870915412902832,
 
27
  },
28
  {
29
  "epoch": 0.12539184952978055,
30
+ "grad_norm": 10.590546723806328,
31
  "learning_rate": 4.990217055187362e-07,
32
+ "logits/chosen": -2.779981851577759,
33
+ "logits/rejected": -2.7648234367370605,
34
+ "logps/chosen": -215.31658935546875,
35
+ "logps/pi_response": -70.84928894042969,
36
  "logps/ref_response": -69.94066619873047,
37
+ "logps/rejected": -113.74195861816406,
38
  "loss": 0.6874,
39
+ "rewards/accuracies": 0.5659722089767456,
40
+ "rewards/chosen": 0.003676379332318902,
41
+ "rewards/margins": 0.008188692852854729,
42
+ "rewards/rejected": -0.00451231375336647,
43
  "step": 10
44
  },
45
  {
46
  "epoch": 0.2507836990595611,
47
+ "grad_norm": 5.515447952912991,
48
  "learning_rate": 4.655786431300069e-07,
49
+ "logits/chosen": -2.677419900894165,
50
+ "logits/rejected": -2.6526081562042236,
51
+ "logps/chosen": -242.970947265625,
52
+ "logps/pi_response": -88.79570007324219,
53
  "logps/ref_response": -67.22555541992188,
54
+ "logps/rejected": -121.95565032958984,
55
+ "loss": 0.6345,
56
+ "rewards/accuracies": 0.7250000238418579,
57
+ "rewards/chosen": -0.015466970391571522,
58
+ "rewards/margins": 0.16141095757484436,
59
+ "rewards/rejected": -0.17687790095806122,
60
  "step": 20
61
  },
62
  {
63
  "epoch": 0.3761755485893417,
64
+ "grad_norm": 5.718252994097399,
65
  "learning_rate": 3.9061232191019517e-07,
66
+ "logits/chosen": -2.560062885284424,
67
+ "logits/rejected": -2.529508113861084,
68
+ "logps/chosen": -235.5645751953125,
69
+ "logps/pi_response": -122.2088394165039,
70
  "logps/ref_response": -66.2011489868164,
71
+ "logps/rejected": -152.08804321289062,
72
+ "loss": 0.5997,
73
  "rewards/accuracies": 0.7562500238418579,
74
+ "rewards/chosen": -0.1942879855632782,
75
+ "rewards/margins": 0.27530020475387573,
76
+ "rewards/rejected": -0.4695882201194763,
77
  "step": 30
78
  },
79
  {
80
  "epoch": 0.5015673981191222,
81
+ "grad_norm": 6.9513019055279806,
82
  "learning_rate": 2.8856223324132555e-07,
83
+ "logits/chosen": -2.5515570640563965,
84
+ "logits/rejected": -2.5246214866638184,
85
+ "logps/chosen": -250.9296112060547,
86
+ "logps/pi_response": -163.28016662597656,
87
  "logps/ref_response": -70.73230743408203,
88
+ "logps/rejected": -182.60157775878906,
89
+ "loss": 0.5653,
90
+ "rewards/accuracies": 0.753125011920929,
91
+ "rewards/chosen": -0.38668954372406006,
92
+ "rewards/margins": 0.40997713804244995,
93
+ "rewards/rejected": -0.7966667413711548,
94
  "step": 40
95
  },
96
  {
97
  "epoch": 0.6269592476489029,
98
+ "grad_norm": 9.300006842094811,
99
  "learning_rate": 1.7908455541642582e-07,
100
+ "logits/chosen": -2.5594820976257324,
101
+ "logits/rejected": -2.5220818519592285,
102
+ "logps/chosen": -286.2401428222656,
103
+ "logps/pi_response": -171.46835327148438,
104
  "logps/ref_response": -68.65879821777344,
105
+ "logps/rejected": -190.53627014160156,
106
+ "loss": 0.5449,
107
+ "rewards/accuracies": 0.784375011920929,
108
+ "rewards/chosen": -0.3838757574558258,
109
+ "rewards/margins": 0.5237023234367371,
110
+ "rewards/rejected": -0.90757817029953,
111
  "step": 50
112
  },
113
  {
114
  "epoch": 0.7523510971786834,
115
+ "grad_norm": 16.372680174050775,
116
  "learning_rate": 8.32661172908373e-08,
117
+ "logits/chosen": -2.552124261856079,
118
+ "logits/rejected": -2.518066167831421,
119
+ "logps/chosen": -246.05886840820312,
120
+ "logps/pi_response": -173.25637817382812,
121
  "logps/ref_response": -62.150115966796875,
122
+ "logps/rejected": -196.02783203125,
123
+ "loss": 0.5395,
124
  "rewards/accuracies": 0.7593749761581421,
125
+ "rewards/chosen": -0.4318634569644928,
126
+ "rewards/margins": 0.563114583492279,
127
+ "rewards/rejected": -0.9949780702590942,
128
  "step": 60
129
  },
130
  {
131
  "epoch": 0.877742946708464,
132
+ "grad_norm": 8.403502856566952,
133
  "learning_rate": 1.956279997278043e-08,
134
+ "logits/chosen": -2.559234619140625,
135
+ "logits/rejected": -2.527210235595703,
136
+ "logps/chosen": -281.3694763183594,
137
+ "logps/pi_response": -184.9126739501953,
138
  "logps/ref_response": -70.37203216552734,
139
+ "logps/rejected": -207.85086059570312,
140
+ "loss": 0.5331,
141
+ "rewards/accuracies": 0.7406250238418579,
142
+ "rewards/chosen": -0.4145432412624359,
143
+ "rewards/margins": 0.6092821359634399,
144
+ "rewards/rejected": -1.0238254070281982,
145
  "step": 70
146
  },
147
  {
148
  "epoch": 0.9905956112852664,
149
  "step": 79,
150
  "total_flos": 0.0,
151
+ "train_loss": 0.5773088418984715,
152
+ "train_runtime": 3291.7593,
153
+ "train_samples_per_second": 6.191,
154
  "train_steps_per_second": 0.024
155
  }
156
  ],
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4d0c3e76061c137e90866e02aa6bbc07931f10e4c35de081b18dec8af745d317
3
  size 6328
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c739fd0ead78f45a1f03fbec91a53ef8de7c93f38163455a579aedbbfb77d02b
3
  size 6328