xammi commited on
Commit
f90aa2e
1 Parent(s): 1794d44

End of training

Browse files
README.md CHANGED
@@ -21,7 +21,7 @@ should probably proofread and complete it, then remove this comment. -->
21
 
22
  This model is a fine-tuned version of [unsloth/tinyllama-chat-bnb-4bit](https://huggingface.co/unsloth/tinyllama-chat-bnb-4bit) on the generator dataset.
23
  It achieves the following results on the evaluation set:
24
- - Loss: 1.6823
25
 
26
  ## Model description
27
 
@@ -49,38 +49,37 @@ The following hyperparameters were used during training:
49
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
50
  - lr_scheduler_type: linear
51
  - num_epochs: 1
52
- - mixed_precision_training: Native AMP
53
 
54
  ### Training results
55
 
56
  | Training Loss | Epoch | Step | Validation Loss |
57
  |:-------------:|:------:|:----:|:---------------:|
58
- | 4.5606 | 0.0377 | 10 | 4.0606 |
59
- | 3.7693 | 0.0753 | 20 | 3.5539 |
60
- | 3.3915 | 0.1130 | 30 | 3.2022 |
61
- | 3.0938 | 0.1507 | 40 | 2.9560 |
62
- | 2.8692 | 0.1883 | 50 | 2.7576 |
63
- | 2.6774 | 0.2260 | 60 | 2.5706 |
64
- | 2.5012 | 0.2637 | 70 | 2.3805 |
65
- | 2.3332 | 0.3013 | 80 | 2.2277 |
66
- | 2.159 | 0.3390 | 90 | 2.1030 |
67
- | 2.0806 | 0.3766 | 100 | 2.0125 |
68
- | 1.9781 | 0.4143 | 110 | 1.9497 |
69
- | 1.9683 | 0.4520 | 120 | 1.9005 |
70
- | 1.917 | 0.4896 | 130 | 1.8584 |
71
- | 1.8551 | 0.5273 | 140 | 1.8224 |
72
- | 1.8121 | 0.5650 | 150 | 1.7910 |
73
- | 1.7998 | 0.6026 | 160 | 1.7673 |
74
- | 1.7484 | 0.6403 | 170 | 1.7486 |
75
- | 1.7221 | 0.6780 | 180 | 1.7331 |
76
- | 1.7171 | 0.7156 | 190 | 1.7207 |
77
- | 1.7103 | 0.7533 | 200 | 1.7108 |
78
- | 1.7086 | 0.7910 | 210 | 1.7025 |
79
- | 1.7083 | 0.8286 | 220 | 1.6955 |
80
- | 1.7065 | 0.8663 | 230 | 1.6907 |
81
- | 1.6829 | 0.9040 | 240 | 1.6864 |
82
- | 1.6892 | 0.9416 | 250 | 1.6838 |
83
- | 1.6985 | 0.9793 | 260 | 1.6823 |
84
 
85
 
86
  ### Framework versions
 
21
 
22
  This model is a fine-tuned version of [unsloth/tinyllama-chat-bnb-4bit](https://huggingface.co/unsloth/tinyllama-chat-bnb-4bit) on the generator dataset.
23
  It achieves the following results on the evaluation set:
24
+ - Loss: 1.3521
25
 
26
  ## Model description
27
 
 
49
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
50
  - lr_scheduler_type: linear
51
  - num_epochs: 1
 
52
 
53
  ### Training results
54
 
55
  | Training Loss | Epoch | Step | Validation Loss |
56
  |:-------------:|:------:|:----:|:---------------:|
57
+ | 3.8522 | 0.0377 | 10 | 3.0444 |
58
+ | 2.7085 | 0.0753 | 20 | 2.4069 |
59
+ | 2.191 | 0.1130 | 30 | 2.0027 |
60
+ | 1.8996 | 0.1507 | 40 | 1.8018 |
61
+ | 1.7538 | 0.1883 | 50 | 1.6850 |
62
+ | 1.6563 | 0.2260 | 60 | 1.6161 |
63
+ | 1.6275 | 0.2637 | 70 | 1.5722 |
64
+ | 1.5313 | 0.3013 | 80 | 1.5377 |
65
+ | 1.5277 | 0.3390 | 90 | 1.5104 |
66
+ | 1.5039 | 0.3766 | 100 | 1.4889 |
67
+ | 1.4768 | 0.4143 | 110 | 1.4694 |
68
+ | 1.4552 | 0.4520 | 120 | 1.4523 |
69
+ | 1.4481 | 0.4896 | 130 | 1.4385 |
70
+ | 1.4223 | 0.5273 | 140 | 1.4233 |
71
+ | 1.4145 | 0.5650 | 150 | 1.4118 |
72
+ | 1.4207 | 0.6026 | 160 | 1.4012 |
73
+ | 1.4004 | 0.6403 | 170 | 1.3925 |
74
+ | 1.4316 | 0.6780 | 180 | 1.3849 |
75
+ | 1.3841 | 0.7156 | 190 | 1.3784 |
76
+ | 1.3747 | 0.7533 | 200 | 1.3722 |
77
+ | 1.388 | 0.7910 | 210 | 1.3665 |
78
+ | 1.3508 | 0.8286 | 220 | 1.3623 |
79
+ | 1.386 | 0.8663 | 230 | 1.3586 |
80
+ | 1.3605 | 0.9040 | 240 | 1.3555 |
81
+ | 1.369 | 0.9416 | 250 | 1.3534 |
82
+ | 1.3645 | 0.9793 | 260 | 1.3521 |
83
 
84
 
85
  ### Framework versions
adapter_config.json CHANGED
@@ -20,19 +20,19 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
 
 
23
  "down_proj",
 
 
24
  "o_proj",
25
- "mlp.gate_up_proj",
26
  "self_attn.o_proj.weight",
27
- "up_proj",
28
- "gate_proj",
29
  "mlp.down_proj",
30
- "self_attn.qkv_proj.weight",
31
- "v_proj",
32
- "q_proj",
33
- "k_proj"
34
  ],
35
  "task_type": "CAUSAL_LM",
36
  "use_dora": false,
37
- "use_rslora": false
38
  }
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
+ "gate_proj",
24
+ "k_proj",
25
+ "mlp.gate_up_proj",
26
  "down_proj",
27
+ "v_proj",
28
+ "q_proj",
29
  "o_proj",
 
30
  "self_attn.o_proj.weight",
 
 
31
  "mlp.down_proj",
32
+ "up_proj",
33
+ "self_attn.qkv_proj.weight"
 
 
34
  ],
35
  "task_type": "CAUSAL_LM",
36
  "use_dora": false,
37
+ "use_rslora": true
38
  }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:60d95b10b6e140a9626a7058d5038528f2ff80148dc4569b881db56052046509
3
- size 40
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56803c4d98022be7dfa3f8c0e1e22fc44c4994d771418a572448c62920ab292b
3
+ size 403743472
runs/Sep26_00-00-07_sammie/events.out.tfevents.1727330530.sammie.423563.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7d328eeb1fd2ee1c738e7e41da038082fa2a499f4c138a80588772bb7046075
3
+ size 6059
runs/Sep26_00-05-40_sammie/events.out.tfevents.1727330787.sammie.423563.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06d71f55f938bbcfff0019190477660e8cec8bb67d5504d0ca5e4f580392b22b
3
+ size 6059
runs/Sep26_00-16-01_sammie/events.out.tfevents.1727331385.sammie.430449.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb5ecb0e4177bd91562b8ea41ee32efba8d3a59ff9ee3d7150afc14a415e9f2f
3
+ size 18837
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:92ef00ea7a676ed017f06f373e7b051d049f571471eb21b599f6dcb299f671b4
3
- size 5560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e88c7f613ec531155670d9379c212ae8331d83fa95b2fb8801a6da610e8bc9d9
3
+ size 5624