xammi commited on
Commit
c192b56
1 Parent(s): c54ccb1

End of training

Browse files
README.md CHANGED
@@ -21,7 +21,7 @@ should probably proofread and complete it, then remove this comment. -->
21
 
22
  This model is a fine-tuned version of [unsloth/tinyllama-bnb-4bit](https://huggingface.co/unsloth/tinyllama-bnb-4bit) on the generator dataset.
23
  It achieves the following results on the evaluation set:
24
- - Loss: 1.2394
25
 
26
  ## Model description
27
 
@@ -48,64 +48,59 @@ The following hyperparameters were used during training:
48
  - total_train_batch_size: 32
49
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
50
  - lr_scheduler_type: linear
51
- - num_epochs: 3
52
  - mixed_precision_training: Native AMP
53
 
54
  ### Training results
55
 
56
  | Training Loss | Epoch | Step | Validation Loss |
57
  |:-------------:|:------:|:----:|:---------------:|
58
- | 1.788 | 0.0578 | 10 | 1.4309 |
59
- | 1.3849 | 0.1156 | 20 | 1.3661 |
60
- | 1.3425 | 0.1734 | 30 | 1.3366 |
61
- | 1.3263 | 0.2312 | 40 | 1.3180 |
62
- | 1.3223 | 0.2890 | 50 | 1.3034 |
63
- | 1.293 | 0.3468 | 60 | 1.2890 |
64
- | 1.2815 | 0.4046 | 70 | 1.2816 |
65
- | 1.275 | 0.4624 | 80 | 1.2763 |
66
- | 1.2765 | 0.5202 | 90 | 1.2676 |
67
- | 1.2786 | 0.5780 | 100 | 1.2635 |
68
- | 1.25 | 0.6358 | 110 | 1.2576 |
69
- | 1.2476 | 0.6936 | 120 | 1.2524 |
70
- | 1.2479 | 0.7514 | 130 | 1.2491 |
71
- | 1.2497 | 0.8092 | 140 | 1.2437 |
72
- | 1.2362 | 0.8671 | 150 | 1.2409 |
73
- | 1.2511 | 0.9249 | 160 | 1.2363 |
74
- | 1.2368 | 0.9827 | 170 | 1.2335 |
75
- | 1.1279 | 1.0405 | 180 | 1.2404 |
76
- | 1.0929 | 1.0983 | 190 | 1.2408 |
77
- | 1.0803 | 1.1561 | 200 | 1.2426 |
78
- | 1.0939 | 1.2139 | 210 | 1.2391 |
79
- | 1.0869 | 1.2717 | 220 | 1.2364 |
80
- | 1.084 | 1.3295 | 230 | 1.2337 |
81
- | 1.0967 | 1.3873 | 240 | 1.2314 |
82
- | 1.0922 | 1.4451 | 250 | 1.2287 |
83
- | 1.0774 | 1.5029 | 260 | 1.2264 |
84
- | 1.0825 | 1.5607 | 270 | 1.2247 |
85
- | 1.08 | 1.6185 | 280 | 1.2217 |
86
- | 1.0828 | 1.6763 | 290 | 1.2174 |
87
- | 1.0791 | 1.7341 | 300 | 1.2169 |
88
- | 1.0907 | 1.7919 | 310 | 1.2148 |
89
- | 1.0748 | 1.8497 | 320 | 1.2137 |
90
- | 1.082 | 1.9075 | 330 | 1.2104 |
91
- | 1.0753 | 1.9653 | 340 | 1.2081 |
92
- | 1.0165 | 2.0231 | 350 | 1.2490 |
93
- | 0.9266 | 2.0809 | 360 | 1.2431 |
94
- | 0.9239 | 2.1387 | 370 | 1.2440 |
95
- | 0.9207 | 2.1965 | 380 | 1.2443 |
96
- | 0.9218 | 2.2543 | 390 | 1.2485 |
97
- | 0.9262 | 2.3121 | 400 | 1.2447 |
98
- | 0.9246 | 2.3699 | 410 | 1.2463 |
99
- | 0.9145 | 2.4277 | 420 | 1.2422 |
100
- | 0.9178 | 2.4855 | 430 | 1.2457 |
101
- | 0.9229 | 2.5434 | 440 | 1.2421 |
102
- | 0.9205 | 2.6012 | 450 | 1.2429 |
103
- | 0.9184 | 2.6590 | 460 | 1.2436 |
104
- | 0.93 | 2.7168 | 470 | 1.2395 |
105
- | 0.9111 | 2.7746 | 480 | 1.2418 |
106
- | 0.9125 | 2.8324 | 490 | 1.2396 |
107
- | 0.9268 | 2.8902 | 500 | 1.2389 |
108
- | 0.9148 | 2.9480 | 510 | 1.2394 |
109
 
110
 
111
  ### Framework versions
 
21
 
22
  This model is a fine-tuned version of [unsloth/tinyllama-bnb-4bit](https://huggingface.co/unsloth/tinyllama-bnb-4bit) on the generator dataset.
23
  It achieves the following results on the evaluation set:
24
+ - Loss: 1.0067
25
 
26
  ## Model description
27
 
 
48
  - total_train_batch_size: 32
49
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
50
  - lr_scheduler_type: linear
51
+ - num_epochs: 2
52
  - mixed_precision_training: Native AMP
53
 
54
  ### Training results
55
 
56
  | Training Loss | Epoch | Step | Validation Loss |
57
  |:-------------:|:------:|:----:|:---------------:|
58
+ | 1.4976 | 0.0431 | 10 | 1.3395 |
59
+ | 1.2954 | 0.0862 | 20 | 1.2420 |
60
+ | 1.2219 | 0.1293 | 30 | 1.1957 |
61
+ | 1.1914 | 0.1724 | 40 | 1.1660 |
62
+ | 1.1515 | 0.2155 | 50 | 1.1449 |
63
+ | 1.1491 | 0.2586 | 60 | 1.1301 |
64
+ | 1.129 | 0.3017 | 70 | 1.1172 |
65
+ | 1.1175 | 0.3448 | 80 | 1.1070 |
66
+ | 1.1129 | 0.3879 | 90 | 1.0982 |
67
+ | 1.088 | 0.4310 | 100 | 1.0902 |
68
+ | 1.0823 | 0.4741 | 110 | 1.0847 |
69
+ | 1.0912 | 0.5172 | 120 | 1.0789 |
70
+ | 1.0847 | 0.5603 | 130 | 1.0734 |
71
+ | 1.0773 | 0.6034 | 140 | 1.0689 |
72
+ | 1.0671 | 0.6466 | 150 | 1.0632 |
73
+ | 1.0711 | 0.6897 | 160 | 1.0602 |
74
+ | 1.0607 | 0.7328 | 170 | 1.0558 |
75
+ | 1.0579 | 0.7759 | 180 | 1.0515 |
76
+ | 1.0565 | 0.8190 | 190 | 1.0481 |
77
+ | 1.0605 | 0.8621 | 200 | 1.0459 |
78
+ | 1.0626 | 0.9052 | 210 | 1.0413 |
79
+ | 1.0363 | 0.9483 | 220 | 1.0392 |
80
+ | 1.0422 | 0.9914 | 230 | 1.0363 |
81
+ | 1.0365 | 1.0345 | 240 | 1.0359 |
82
+ | 1.0083 | 1.0776 | 250 | 1.0327 |
83
+ | 1.0138 | 1.1207 | 260 | 1.0297 |
84
+ | 1.015 | 1.1638 | 270 | 1.0275 |
85
+ | 1.0116 | 1.2069 | 280 | 1.0258 |
86
+ | 1.0168 | 1.25 | 290 | 1.0245 |
87
+ | 1.0147 | 1.2931 | 300 | 1.0225 |
88
+ | 1.022 | 1.3362 | 310 | 1.0203 |
89
+ | 1.0118 | 1.3793 | 320 | 1.0199 |
90
+ | 1.0069 | 1.4224 | 330 | 1.0181 |
91
+ | 1.0042 | 1.4655 | 340 | 1.0163 |
92
+ | 0.9963 | 1.5086 | 350 | 1.0145 |
93
+ | 1.0127 | 1.5517 | 360 | 1.0137 |
94
+ | 1.0076 | 1.5948 | 370 | 1.0125 |
95
+ | 1.0127 | 1.6379 | 380 | 1.0113 |
96
+ | 1.0048 | 1.6810 | 390 | 1.0102 |
97
+ | 0.9904 | 1.7241 | 400 | 1.0093 |
98
+ | 0.9971 | 1.7672 | 410 | 1.0091 |
99
+ | 1.0073 | 1.8103 | 420 | 1.0082 |
100
+ | 1.0138 | 1.8534 | 430 | 1.0077 |
101
+ | 0.9952 | 1.8966 | 440 | 1.0074 |
102
+ | 0.9977 | 1.9397 | 450 | 1.0070 |
103
+ | 0.9966 | 1.9828 | 460 | 1.0067 |
 
 
 
 
 
104
 
105
 
106
  ### Framework versions
adapter_config.json CHANGED
@@ -11,7 +11,7 @@
11
  "layers_to_transform": null,
12
  "loftq_config": {},
13
  "lora_alpha": 32,
14
- "lora_dropout": 0.1,
15
  "megatron_config": null,
16
  "megatron_core": "megatron.core",
17
  "modules_to_save": null,
@@ -20,10 +20,17 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
 
23
  "mlp.gate_up_proj",
 
 
 
 
 
24
  "self_attn.qkv_proj.weight",
25
  "self_attn.o_proj.weight",
26
- "mlp.down_proj"
27
  ],
28
  "task_type": "CAUSAL_LM",
29
  "use_dora": false,
 
11
  "layers_to_transform": null,
12
  "loftq_config": {},
13
  "lora_alpha": 32,
14
+ "lora_dropout": 0.2,
15
  "megatron_config": null,
16
  "megatron_core": "megatron.core",
17
  "modules_to_save": null,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
+ "gate_proj",
24
+ "down_proj",
25
  "mlp.gate_up_proj",
26
+ "v_proj",
27
+ "q_proj",
28
+ "up_proj",
29
+ "mlp.down_proj",
30
+ "o_proj",
31
  "self_attn.qkv_proj.weight",
32
  "self_attn.o_proj.weight",
33
+ "k_proj"
34
  ],
35
  "task_type": "CAUSAL_LM",
36
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f80b9db16e5fed0bf6ecae0adbad111753a18767096b07670e575d400522b07e
3
  size 403743472
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65aac8e52d2b349519b5438dcb882cfad4794a93ffd157f9b49d878172d92e61
3
  size 403743472
runs/Sep23_13-14-37_sammie/events.out.tfevents.1727118893.sammie.79670.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16a8e47d84587b1d54eb6d070362ce65ae6d5e60efb01d0524b858e9e6f74578
3
+ size 28373
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d5c8d4ca54c402a351b455c5aa2a1c419b359bb8071d4496b4ab4bdb1d95dda2
3
  size 5560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b191efad8e0c67e59627b59005b61e1c57ea0e8f2b81b8642ae1ab1df6697c9a
3
  size 5560