Jessiecs commited on
Commit
0991a66
·
verified ·
1 Parent(s): 2ead679

Jessiecs/llama-2-7b-a3-1

Browse files
README.md CHANGED
@@ -50,8 +50,8 @@ The following hyperparameters were used during training:
50
 
51
  ### Framework versions
52
 
53
- - PEFT 0.8.2
54
  - Transformers 4.39.0.dev0
55
  - Pytorch 2.1.0+cu121
56
- - Datasets 2.17.1
57
  - Tokenizers 0.15.2
 
50
 
51
  ### Framework versions
52
 
53
+ - PEFT 0.9.1.dev0
54
  - Transformers 4.39.0.dev0
55
  - Pytorch 2.1.0+cu121
56
+ - Datasets 2.18.0
57
  - Tokenizers 0.15.2
adapter_config.json CHANGED
@@ -19,14 +19,9 @@
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
22
- "o_proj",
23
- "gate_proj",
24
- "v_proj",
25
- "down_proj",
26
- "q_proj",
27
- "k_proj",
28
- "up_proj"
29
  ],
30
  "task_type": "CAUSAL_LM",
 
31
  "use_rslora": false
32
  }
 
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
22
+ "base_layer"
 
 
 
 
 
 
23
  ],
24
  "task_type": "CAUSAL_LM",
25
+ "use_dora": false,
26
  "use_rslora": false
27
  }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d96377ef3b44b73b41c9e397b84240de3c2182ce8439c2003588edfe11babc6
3
- size 159967880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7000b23c914cd224db2ce8978cd9850ea53b3ea53b1ae1810a8e5b5aafd9799
3
+ size 319941280
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 0.01,
3
- "total_flos": 1332162927353856.0,
4
- "train_loss": 1.5293153256177903,
5
- "train_runtime": 150.2786,
6
- "train_samples_per_second": 0.532,
7
- "train_steps_per_second": 0.133
8
  }
 
1
  {
2
  "epoch": 0.01,
3
+ "total_flos": 1201627119648768.0,
4
+ "train_loss": 1.4025826781988144,
5
+ "train_runtime": 63.619,
6
+ "train_samples_per_second": 1.257,
7
+ "train_steps_per_second": 0.314
8
  }
runs/Mar03_23-45-18_806464d2401f/events.out.tfevents.1709509519.806464d2401f.1315.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd103f01c228160c61232247d56d86c92efd9997dd973462b089275b98aff4b7
3
+ size 5094
runs/Mar03_23-51-27_806464d2401f/events.out.tfevents.1709509887.806464d2401f.1315.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc50255424634284e358127d57733d452f9d7bf6fdb96685f85a981cdbf8a9d5
3
+ size 9582
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 0.01,
3
- "total_flos": 1332162927353856.0,
4
- "train_loss": 1.5293153256177903,
5
- "train_runtime": 150.2786,
6
- "train_samples_per_second": 0.532,
7
- "train_steps_per_second": 0.133
8
  }
 
1
  {
2
  "epoch": 0.01,
3
+ "total_flos": 1201627119648768.0,
4
+ "train_loss": 1.4025826781988144,
5
+ "train_runtime": 63.619,
6
+ "train_samples_per_second": 1.257,
7
+ "train_steps_per_second": 0.314
8
  }
trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.00812842918106076,
5
  "eval_steps": 500,
6
  "global_step": 20,
7
  "is_hyper_param_search": false,
@@ -10,152 +10,152 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.0,
13
- "grad_norm": 1.7647329568862915,
14
  "learning_rate": 0.0001,
15
- "loss": 1.7092,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 0.0,
20
- "grad_norm": 1.3001192808151245,
21
  "learning_rate": 0.0002,
22
- "loss": 1.2963,
23
  "step": 2
24
  },
25
  {
26
  "epoch": 0.0,
27
- "grad_norm": 1.5390220880508423,
28
  "learning_rate": 0.00018888888888888888,
29
- "loss": 1.5465,
30
  "step": 3
31
  },
32
  {
33
  "epoch": 0.0,
34
- "grad_norm": 2.012969493865967,
35
  "learning_rate": 0.00017777777777777779,
36
- "loss": 1.9059,
37
  "step": 4
38
  },
39
  {
40
  "epoch": 0.0,
41
- "grad_norm": 1.1633769273757935,
42
  "learning_rate": 0.0001666666666666667,
43
- "loss": 1.7834,
44
  "step": 5
45
  },
46
  {
47
  "epoch": 0.0,
48
- "grad_norm": 0.9096754789352417,
49
  "learning_rate": 0.00015555555555555556,
50
- "loss": 1.0758,
51
  "step": 6
52
  },
53
  {
54
  "epoch": 0.0,
55
- "grad_norm": 0.9768149852752686,
56
  "learning_rate": 0.00014444444444444444,
57
- "loss": 0.931,
58
  "step": 7
59
  },
60
  {
61
  "epoch": 0.0,
62
- "grad_norm": 1.853431224822998,
63
  "learning_rate": 0.00013333333333333334,
64
- "loss": 1.6621,
65
  "step": 8
66
  },
67
  {
68
  "epoch": 0.0,
69
- "grad_norm": 1.3012102842330933,
70
  "learning_rate": 0.00012222222222222224,
71
- "loss": 1.8367,
72
  "step": 9
73
  },
74
  {
75
  "epoch": 0.0,
76
- "grad_norm": 1.1715010404586792,
77
  "learning_rate": 0.00011111111111111112,
78
- "loss": 1.4482,
79
  "step": 10
80
  },
81
  {
82
  "epoch": 0.0,
83
- "grad_norm": 0.9172013401985168,
84
  "learning_rate": 0.0001,
85
- "loss": 1.3422,
86
  "step": 11
87
  },
88
  {
89
  "epoch": 0.0,
90
- "grad_norm": 1.617510437965393,
91
  "learning_rate": 8.888888888888889e-05,
92
- "loss": 1.4845,
93
  "step": 12
94
  },
95
  {
96
  "epoch": 0.01,
97
- "grad_norm": 1.1238973140716553,
98
  "learning_rate": 7.777777777777778e-05,
99
- "loss": 1.4094,
100
  "step": 13
101
  },
102
  {
103
  "epoch": 0.01,
104
- "grad_norm": 1.2244164943695068,
105
  "learning_rate": 6.666666666666667e-05,
106
- "loss": 1.8395,
107
  "step": 14
108
  },
109
  {
110
  "epoch": 0.01,
111
- "grad_norm": 3.9478447437286377,
112
  "learning_rate": 5.555555555555556e-05,
113
- "loss": 1.7928,
114
  "step": 15
115
  },
116
  {
117
  "epoch": 0.01,
118
- "grad_norm": 1.8160783052444458,
119
  "learning_rate": 4.4444444444444447e-05,
120
- "loss": 1.226,
121
  "step": 16
122
  },
123
  {
124
  "epoch": 0.01,
125
- "grad_norm": 1.9035887718200684,
126
  "learning_rate": 3.3333333333333335e-05,
127
- "loss": 1.599,
128
  "step": 17
129
  },
130
  {
131
  "epoch": 0.01,
132
- "grad_norm": 1.3164148330688477,
133
  "learning_rate": 2.2222222222222223e-05,
134
- "loss": 1.2311,
135
  "step": 18
136
  },
137
  {
138
  "epoch": 0.01,
139
- "grad_norm": 1.200212836265564,
140
  "learning_rate": 1.1111111111111112e-05,
141
- "loss": 1.926,
142
  "step": 19
143
  },
144
  {
145
  "epoch": 0.01,
146
- "grad_norm": 1.5367623567581177,
147
  "learning_rate": 0.0,
148
- "loss": 1.5407,
149
  "step": 20
150
  },
151
  {
152
  "epoch": 0.01,
153
  "step": 20,
154
- "total_flos": 1332162927353856.0,
155
- "train_loss": 1.5293153256177903,
156
- "train_runtime": 150.2786,
157
- "train_samples_per_second": 0.532,
158
- "train_steps_per_second": 0.133
159
  }
160
  ],
161
  "logging_steps": 1,
@@ -163,7 +163,7 @@
163
  "num_input_tokens_seen": 0,
164
  "num_train_epochs": 1,
165
  "save_steps": 500,
166
- "total_flos": 1332162927353856.0,
167
  "train_batch_size": 1,
168
  "trial_name": null,
169
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.0081276033729554,
5
  "eval_steps": 500,
6
  "global_step": 20,
7
  "is_hyper_param_search": false,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.0,
13
+ "grad_norm": 1.8456366062164307,
14
  "learning_rate": 0.0001,
15
+ "loss": 1.7067,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 0.0,
20
+ "grad_norm": 2.7521228790283203,
21
  "learning_rate": 0.0002,
22
+ "loss": 2.4493,
23
  "step": 2
24
  },
25
  {
26
  "epoch": 0.0,
27
+ "grad_norm": 1.7731740474700928,
28
  "learning_rate": 0.00018888888888888888,
29
+ "loss": 1.7336,
30
  "step": 3
31
  },
32
  {
33
  "epoch": 0.0,
34
+ "grad_norm": 2.415656089782715,
35
  "learning_rate": 0.00017777777777777779,
36
+ "loss": 2.0648,
37
  "step": 4
38
  },
39
  {
40
  "epoch": 0.0,
41
+ "grad_norm": 0.8457527756690979,
42
  "learning_rate": 0.0001666666666666667,
43
+ "loss": 1.1381,
44
  "step": 5
45
  },
46
  {
47
  "epoch": 0.0,
48
+ "grad_norm": 1.1292682886123657,
49
  "learning_rate": 0.00015555555555555556,
50
+ "loss": 1.7235,
51
  "step": 6
52
  },
53
  {
54
  "epoch": 0.0,
55
+ "grad_norm": 0.8633381128311157,
56
  "learning_rate": 0.00014444444444444444,
57
+ "loss": 1.0572,
58
  "step": 7
59
  },
60
  {
61
  "epoch": 0.0,
62
+ "grad_norm": 0.8930997848510742,
63
  "learning_rate": 0.00013333333333333334,
64
+ "loss": 1.1119,
65
  "step": 8
66
  },
67
  {
68
  "epoch": 0.0,
69
+ "grad_norm": 1.5441715717315674,
70
  "learning_rate": 0.00012222222222222224,
71
+ "loss": 1.6177,
72
  "step": 9
73
  },
74
  {
75
  "epoch": 0.0,
76
+ "grad_norm": 0.9879918694496155,
77
  "learning_rate": 0.00011111111111111112,
78
+ "loss": 1.2242,
79
  "step": 10
80
  },
81
  {
82
  "epoch": 0.0,
83
+ "grad_norm": 0.7914152145385742,
84
  "learning_rate": 0.0001,
85
+ "loss": 1.2375,
86
  "step": 11
87
  },
88
  {
89
  "epoch": 0.0,
90
+ "grad_norm": 0.8946468234062195,
91
  "learning_rate": 8.888888888888889e-05,
92
+ "loss": 0.9588,
93
  "step": 12
94
  },
95
  {
96
  "epoch": 0.01,
97
+ "grad_norm": 0.7223942279815674,
98
  "learning_rate": 7.777777777777778e-05,
99
+ "loss": 0.9085,
100
  "step": 13
101
  },
102
  {
103
  "epoch": 0.01,
104
+ "grad_norm": 1.2389971017837524,
105
  "learning_rate": 6.666666666666667e-05,
106
+ "loss": 1.0339,
107
  "step": 14
108
  },
109
  {
110
  "epoch": 0.01,
111
+ "grad_norm": 1.125956654548645,
112
  "learning_rate": 5.555555555555556e-05,
113
+ "loss": 1.4494,
114
  "step": 15
115
  },
116
  {
117
  "epoch": 0.01,
118
+ "grad_norm": 1.5024690628051758,
119
  "learning_rate": 4.4444444444444447e-05,
120
+ "loss": 1.3538,
121
  "step": 16
122
  },
123
  {
124
  "epoch": 0.01,
125
+ "grad_norm": 2.010714054107666,
126
  "learning_rate": 3.3333333333333335e-05,
127
+ "loss": 1.2944,
128
  "step": 17
129
  },
130
  {
131
  "epoch": 0.01,
132
+ "grad_norm": 2.901399850845337,
133
  "learning_rate": 2.2222222222222223e-05,
134
+ "loss": 1.3021,
135
  "step": 18
136
  },
137
  {
138
  "epoch": 0.01,
139
+ "grad_norm": 3.5073766708374023,
140
  "learning_rate": 1.1111111111111112e-05,
141
+ "loss": 1.2913,
142
  "step": 19
143
  },
144
  {
145
  "epoch": 0.01,
146
+ "grad_norm": 1.580161690711975,
147
  "learning_rate": 0.0,
148
+ "loss": 1.3949,
149
  "step": 20
150
  },
151
  {
152
  "epoch": 0.01,
153
  "step": 20,
154
+ "total_flos": 1201627119648768.0,
155
+ "train_loss": 1.4025826781988144,
156
+ "train_runtime": 63.619,
157
+ "train_samples_per_second": 1.257,
158
+ "train_steps_per_second": 0.314
159
  }
160
  ],
161
  "logging_steps": 1,
 
163
  "num_input_tokens_seen": 0,
164
  "num_train_epochs": 1,
165
  "save_steps": 500,
166
+ "total_flos": 1201627119648768.0,
167
  "train_batch_size": 1,
168
  "trial_name": null,
169
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:399b61aeb844ac4532af315b56fb1fc66c1e5f62d42132308aac5f34a33b4848
3
  size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f796c4f9a98cf6e61c7158bd8fa4e75db4e5279ef8cec5f1004e53fd513cb9d
3
  size 4920