SergCorsi commited on
Commit
d718766
1 Parent(s): 895f815

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. README.md +1 -1
  2. adapter_config.json +2 -2
  3. adapter_model.safetensors +1 -1
  4. checkpoint-10/README.md +1 -1
  5. checkpoint-10/adapter_config.json +2 -2
  6. checkpoint-10/adapter_model.safetensors +1 -1
  7. checkpoint-10/optimizer.pt +1 -1
  8. checkpoint-10/scheduler.pt +1 -1
  9. checkpoint-10/trainer_state.json +10 -10
  10. checkpoint-10/training_args.bin +1 -1
  11. checkpoint-20/README.md +1 -1
  12. checkpoint-20/adapter_config.json +2 -2
  13. checkpoint-20/adapter_model.safetensors +1 -1
  14. checkpoint-20/optimizer.pt +1 -1
  15. checkpoint-20/scheduler.pt +1 -1
  16. checkpoint-20/trainer_state.json +16 -16
  17. checkpoint-20/training_args.bin +1 -1
  18. checkpoint-30/README.md +1 -1
  19. checkpoint-30/adapter_config.json +2 -2
  20. checkpoint-30/adapter_model.safetensors +1 -1
  21. checkpoint-30/optimizer.pt +1 -1
  22. checkpoint-30/scheduler.pt +1 -1
  23. checkpoint-30/trainer_state.json +23 -23
  24. checkpoint-30/training_args.bin +1 -1
  25. checkpoint-40/README.md +1 -1
  26. checkpoint-40/adapter_config.json +2 -2
  27. checkpoint-40/adapter_model.safetensors +1 -1
  28. checkpoint-40/optimizer.pt +1 -1
  29. checkpoint-40/scheduler.pt +1 -1
  30. checkpoint-40/trainer_state.json +30 -30
  31. checkpoint-40/training_args.bin +1 -1
  32. checkpoint-50/README.md +1 -1
  33. checkpoint-50/adapter_config.json +2 -2
  34. checkpoint-50/adapter_model.safetensors +1 -1
  35. checkpoint-50/optimizer.pt +1 -1
  36. checkpoint-50/scheduler.pt +1 -1
  37. checkpoint-50/trainer_state.json +37 -37
  38. checkpoint-50/training_args.bin +1 -1
  39. checkpoint-60/README.md +1 -1
  40. checkpoint-60/adapter_config.json +2 -2
  41. checkpoint-60/adapter_model.safetensors +1 -1
  42. checkpoint-60/optimizer.pt +1 -1
  43. checkpoint-60/scheduler.pt +1 -1
  44. checkpoint-60/trainer_state.json +44 -44
  45. checkpoint-60/training_args.bin +1 -1
  46. checkpoint-70/README.md +1 -1
  47. checkpoint-70/adapter_config.json +2 -2
  48. checkpoint-70/adapter_model.safetensors +1 -1
  49. checkpoint-70/optimizer.pt +1 -1
  50. checkpoint-70/scheduler.pt +1 -1
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
- library_name: peft
3
  base_model: TheBloke/Llama-2-7B-fp16
 
4
  ---
5
 
6
  # Model Card for Model ID
 
1
  ---
 
2
  base_model: TheBloke/Llama-2-7B-fp16
3
+ library_name: peft
4
  ---
5
 
6
  # Model Card for Model ID
adapter_config.json CHANGED
@@ -21,9 +21,9 @@
21
  "revision": null,
22
  "target_modules": [
23
  "k_proj",
24
- "v_proj",
25
  "o_proj",
26
- "q_proj"
27
  ],
28
  "task_type": "CAUSAL_LM",
29
  "use_dora": false,
 
21
  "revision": null,
22
  "target_modules": [
23
  "k_proj",
24
+ "q_proj",
25
  "o_proj",
26
+ "v_proj"
27
  ],
28
  "task_type": "CAUSAL_LM",
29
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:58a8f71955b57e8e1082d6f6f180646592764af968e225bda2b732733e64a34c
3
  size 67143296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6bdfd134cf3b5e167c3aa127bf57024a3e8ff71b6b0ea16d5493a51a01d7e317
3
  size 67143296
checkpoint-10/README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
- library_name: peft
3
  base_model: TheBloke/Llama-2-7B-fp16
 
4
  ---
5
 
6
  # Model Card for Model ID
 
1
  ---
 
2
  base_model: TheBloke/Llama-2-7B-fp16
3
+ library_name: peft
4
  ---
5
 
6
  # Model Card for Model ID
checkpoint-10/adapter_config.json CHANGED
@@ -21,9 +21,9 @@
21
  "revision": null,
22
  "target_modules": [
23
  "k_proj",
24
- "v_proj",
25
  "o_proj",
26
- "q_proj"
27
  ],
28
  "task_type": "CAUSAL_LM",
29
  "use_dora": false,
 
21
  "revision": null,
22
  "target_modules": [
23
  "k_proj",
24
+ "q_proj",
25
  "o_proj",
26
+ "v_proj"
27
  ],
28
  "task_type": "CAUSAL_LM",
29
  "use_dora": false,
checkpoint-10/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:232e478a97bd908880b31806be86d4debfd0492ee0eb83ba509e3293c6c3565a
3
  size 67143296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e16adf919d93d6441c2583be16d89fc2157635291e0c18a1835380e4dd25668
3
  size 67143296
checkpoint-10/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:40afa20170f291b125eb81d0ffabecaf09477598d9e256ea01ff0b97e18e70a9
3
  size 134433530
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90d3b34bfbd9c3f0886fa09e483a0a1fa8853028f68e4ce50843d14911e15412
3
  size 134433530
checkpoint-10/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a35af14c8f81087292db706fc180cce03fb0f692ed151b0664293b217dd11fa5
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04d2341737bca7648a4cdb3a55768450f9758f2298ef492fe1db7f093eaa1902
3
  size 1064
checkpoint-10/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 1.733155369758606,
3
  "best_model_checkpoint": "/kaggle/working/checkpoint-10",
4
  "epoch": 1.1111111111111112,
5
  "eval_steps": 10,
@@ -10,24 +10,24 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.1111111111111112,
13
- "grad_norm": 0.022282764315605164,
14
- "learning_rate": 0.0001851851851851852,
15
- "loss": 2.0424,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 1.1111111111111112,
20
- "eval_loss": 1.733155369758606,
21
- "eval_runtime": 34.5543,
22
- "eval_samples_per_second": 1.042,
23
- "eval_steps_per_second": 0.145,
24
  "step": 10
25
  }
26
  ],
27
  "logging_steps": 10,
28
- "max_steps": 135,
29
  "num_input_tokens_seen": 0,
30
- "num_train_epochs": 15,
31
  "save_steps": 10,
32
  "stateful_callbacks": {
33
  "EarlyStoppingCallback": {
 
1
  {
2
+ "best_metric": 1.737181544303894,
3
  "best_model_checkpoint": "/kaggle/working/checkpoint-10",
4
  "epoch": 1.1111111111111112,
5
  "eval_steps": 10,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.1111111111111112,
13
+ "grad_norm": 0.02217627689242363,
14
+ "learning_rate": 0.00017777777777777779,
15
+ "loss": 2.0442,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 1.1111111111111112,
20
+ "eval_loss": 1.737181544303894,
21
+ "eval_runtime": 35.1318,
22
+ "eval_samples_per_second": 1.025,
23
+ "eval_steps_per_second": 0.142,
24
  "step": 10
25
  }
26
  ],
27
  "logging_steps": 10,
28
+ "max_steps": 90,
29
  "num_input_tokens_seen": 0,
30
+ "num_train_epochs": 10,
31
  "save_steps": 10,
32
  "stateful_callbacks": {
33
  "EarlyStoppingCallback": {
checkpoint-10/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:79c753ff1ba946038f620bad3e42a35ce583c9e8ed52b49fd22fb6614fea0f43
3
  size 5112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f0053334cab1bfd2838c507b240d05093bc205b1d5fbe6dc54a022fef5dcaa7
3
  size 5112
checkpoint-20/README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
- library_name: peft
3
  base_model: TheBloke/Llama-2-7B-fp16
 
4
  ---
5
 
6
  # Model Card for Model ID
 
1
  ---
 
2
  base_model: TheBloke/Llama-2-7B-fp16
3
+ library_name: peft
4
  ---
5
 
6
  # Model Card for Model ID
checkpoint-20/adapter_config.json CHANGED
@@ -21,9 +21,9 @@
21
  "revision": null,
22
  "target_modules": [
23
  "k_proj",
24
- "v_proj",
25
  "o_proj",
26
- "q_proj"
27
  ],
28
  "task_type": "CAUSAL_LM",
29
  "use_dora": false,
 
21
  "revision": null,
22
  "target_modules": [
23
  "k_proj",
24
+ "q_proj",
25
  "o_proj",
26
+ "v_proj"
27
  ],
28
  "task_type": "CAUSAL_LM",
29
  "use_dora": false,
checkpoint-20/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b330fca16a5ab17049bdd4e6e42df52bd8bd0ac1fbeb1577f683e7b1212f0d83
3
  size 67143296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5ab93af4ed23c52b82729ff3b3f871c19b732c90f1094f90d5a9f4ade1ccfac
3
  size 67143296
checkpoint-20/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83e5faf17f41fc7dcd6907ecd97bc5e5b82bfa377a6683ddd9d25e9c5a4f3efd
3
  size 134433530
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a07f3020a10b2a9d3e215c9651b159e8c3b297ab1db69b013b8c7817d5f52a7c
3
  size 134433530
checkpoint-20/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:18bf434d2955f164730aed9c7994cdd6c773ed80052fc3ef975fae4adef61283
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9e7e75183c7081ca7f8f52ddfd0d5f4b8e8dbcf7f7bcd495fc6e0cfff80e3a2
3
  size 1064
checkpoint-20/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 1.5428930521011353,
3
  "best_model_checkpoint": "/kaggle/working/checkpoint-20",
4
  "epoch": 2.2222222222222223,
5
  "eval_steps": 10,
@@ -10,39 +10,39 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.1111111111111112,
13
- "grad_norm": 0.022282764315605164,
14
- "learning_rate": 0.0001851851851851852,
15
- "loss": 2.0424,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 1.1111111111111112,
20
- "eval_loss": 1.733155369758606,
21
- "eval_runtime": 34.5543,
22
- "eval_samples_per_second": 1.042,
23
- "eval_steps_per_second": 0.145,
24
  "step": 10
25
  },
26
  {
27
  "epoch": 2.2222222222222223,
28
- "grad_norm": 0.018981408327817917,
29
- "learning_rate": 0.00017037037037037037,
30
- "loss": 1.6072,
31
  "step": 20
32
  },
33
  {
34
  "epoch": 2.2222222222222223,
35
- "eval_loss": 1.5428930521011353,
36
- "eval_runtime": 34.6485,
37
- "eval_samples_per_second": 1.039,
38
  "eval_steps_per_second": 0.144,
39
  "step": 20
40
  }
41
  ],
42
  "logging_steps": 10,
43
- "max_steps": 135,
44
  "num_input_tokens_seen": 0,
45
- "num_train_epochs": 15,
46
  "save_steps": 10,
47
  "stateful_callbacks": {
48
  "EarlyStoppingCallback": {
 
1
  {
2
+ "best_metric": 1.5489343404769897,
3
  "best_model_checkpoint": "/kaggle/working/checkpoint-20",
4
  "epoch": 2.2222222222222223,
5
  "eval_steps": 10,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.1111111111111112,
13
+ "grad_norm": 0.02217627689242363,
14
+ "learning_rate": 0.00017777777777777779,
15
+ "loss": 2.0442,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 1.1111111111111112,
20
+ "eval_loss": 1.737181544303894,
21
+ "eval_runtime": 35.1318,
22
+ "eval_samples_per_second": 1.025,
23
+ "eval_steps_per_second": 0.142,
24
  "step": 10
25
  },
26
  {
27
  "epoch": 2.2222222222222223,
28
+ "grad_norm": 0.0346713550388813,
29
+ "learning_rate": 0.00015555555555555556,
30
+ "loss": 1.6131,
31
  "step": 20
32
  },
33
  {
34
  "epoch": 2.2222222222222223,
35
+ "eval_loss": 1.5489343404769897,
36
+ "eval_runtime": 34.8402,
37
+ "eval_samples_per_second": 1.033,
38
  "eval_steps_per_second": 0.144,
39
  "step": 20
40
  }
41
  ],
42
  "logging_steps": 10,
43
+ "max_steps": 90,
44
  "num_input_tokens_seen": 0,
45
+ "num_train_epochs": 10,
46
  "save_steps": 10,
47
  "stateful_callbacks": {
48
  "EarlyStoppingCallback": {
checkpoint-20/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:79c753ff1ba946038f620bad3e42a35ce583c9e8ed52b49fd22fb6614fea0f43
3
  size 5112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f0053334cab1bfd2838c507b240d05093bc205b1d5fbe6dc54a022fef5dcaa7
3
  size 5112
checkpoint-30/README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
- library_name: peft
3
  base_model: TheBloke/Llama-2-7B-fp16
 
4
  ---
5
 
6
  # Model Card for Model ID
 
1
  ---
 
2
  base_model: TheBloke/Llama-2-7B-fp16
3
+ library_name: peft
4
  ---
5
 
6
  # Model Card for Model ID
checkpoint-30/adapter_config.json CHANGED
@@ -21,9 +21,9 @@
21
  "revision": null,
22
  "target_modules": [
23
  "k_proj",
24
- "v_proj",
25
  "o_proj",
26
- "q_proj"
27
  ],
28
  "task_type": "CAUSAL_LM",
29
  "use_dora": false,
 
21
  "revision": null,
22
  "target_modules": [
23
  "k_proj",
24
+ "q_proj",
25
  "o_proj",
26
+ "v_proj"
27
  ],
28
  "task_type": "CAUSAL_LM",
29
  "use_dora": false,
checkpoint-30/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:473c4bd6da7c40c632f6b8a627a42ab5e26f5bc3b977b422287013a48424e372
3
  size 67143296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8576250f42c32085cdb174e306461292b115ea33d910d0a59d062fcad935bf0
3
  size 67143296
checkpoint-30/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:09a69f8968f0b0dc5be6245d8969030cda587b4b349b62c8e5c647bd89875fa2
3
  size 134433530
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16613c572dcb0ccca606ca4a382a4476b3f69ed3cf64a7095e7f852e897c8426
3
  size 134433530
checkpoint-30/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8a3ad18d2d099317927f68afa34f1d799ef6644ec4025b52033947a650a29e2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14d970dabadfb95eaf7812b80cb7816a58d7911bb09df450b100b1c052b74a02
3
  size 1064
checkpoint-30/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 1.4176721572875977,
3
  "best_model_checkpoint": "/kaggle/working/checkpoint-30",
4
  "epoch": 3.3333333333333335,
5
  "eval_steps": 10,
@@ -10,54 +10,54 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.1111111111111112,
13
- "grad_norm": 0.022282764315605164,
14
- "learning_rate": 0.0001851851851851852,
15
- "loss": 2.0424,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 1.1111111111111112,
20
- "eval_loss": 1.733155369758606,
21
- "eval_runtime": 34.5543,
22
- "eval_samples_per_second": 1.042,
23
- "eval_steps_per_second": 0.145,
24
  "step": 10
25
  },
26
  {
27
  "epoch": 2.2222222222222223,
28
- "grad_norm": 0.018981408327817917,
29
- "learning_rate": 0.00017037037037037037,
30
- "loss": 1.6072,
31
  "step": 20
32
  },
33
  {
34
  "epoch": 2.2222222222222223,
35
- "eval_loss": 1.5428930521011353,
36
- "eval_runtime": 34.6485,
37
- "eval_samples_per_second": 1.039,
38
  "eval_steps_per_second": 0.144,
39
  "step": 20
40
  },
41
  {
42
  "epoch": 3.3333333333333335,
43
- "grad_norm": 0.023157037794589996,
44
- "learning_rate": 0.00015555555555555556,
45
- "loss": 1.4025,
46
  "step": 30
47
  },
48
  {
49
  "epoch": 3.3333333333333335,
50
- "eval_loss": 1.4176721572875977,
51
- "eval_runtime": 34.5433,
52
- "eval_samples_per_second": 1.042,
53
- "eval_steps_per_second": 0.145,
54
  "step": 30
55
  }
56
  ],
57
  "logging_steps": 10,
58
- "max_steps": 135,
59
  "num_input_tokens_seen": 0,
60
- "num_train_epochs": 15,
61
  "save_steps": 10,
62
  "stateful_callbacks": {
63
  "EarlyStoppingCallback": {
 
1
  {
2
+ "best_metric": 1.4295110702514648,
3
  "best_model_checkpoint": "/kaggle/working/checkpoint-30",
4
  "epoch": 3.3333333333333335,
5
  "eval_steps": 10,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.1111111111111112,
13
+ "grad_norm": 0.02217627689242363,
14
+ "learning_rate": 0.00017777777777777779,
15
+ "loss": 2.0442,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 1.1111111111111112,
20
+ "eval_loss": 1.737181544303894,
21
+ "eval_runtime": 35.1318,
22
+ "eval_samples_per_second": 1.025,
23
+ "eval_steps_per_second": 0.142,
24
  "step": 10
25
  },
26
  {
27
  "epoch": 2.2222222222222223,
28
+ "grad_norm": 0.0346713550388813,
29
+ "learning_rate": 0.00015555555555555556,
30
+ "loss": 1.6131,
31
  "step": 20
32
  },
33
  {
34
  "epoch": 2.2222222222222223,
35
+ "eval_loss": 1.5489343404769897,
36
+ "eval_runtime": 34.8402,
37
+ "eval_samples_per_second": 1.033,
38
  "eval_steps_per_second": 0.144,
39
  "step": 20
40
  },
41
  {
42
  "epoch": 3.3333333333333335,
43
+ "grad_norm": 0.02501535415649414,
44
+ "learning_rate": 0.00013333333333333334,
45
+ "loss": 1.4152,
46
  "step": 30
47
  },
48
  {
49
  "epoch": 3.3333333333333335,
50
+ "eval_loss": 1.4295110702514648,
51
+ "eval_runtime": 34.8537,
52
+ "eval_samples_per_second": 1.033,
53
+ "eval_steps_per_second": 0.143,
54
  "step": 30
55
  }
56
  ],
57
  "logging_steps": 10,
58
+ "max_steps": 90,
59
  "num_input_tokens_seen": 0,
60
+ "num_train_epochs": 10,
61
  "save_steps": 10,
62
  "stateful_callbacks": {
63
  "EarlyStoppingCallback": {
checkpoint-30/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:79c753ff1ba946038f620bad3e42a35ce583c9e8ed52b49fd22fb6614fea0f43
3
  size 5112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f0053334cab1bfd2838c507b240d05093bc205b1d5fbe6dc54a022fef5dcaa7
3
  size 5112
checkpoint-40/README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
- library_name: peft
3
  base_model: TheBloke/Llama-2-7B-fp16
 
4
  ---
5
 
6
  # Model Card for Model ID
 
1
  ---
 
2
  base_model: TheBloke/Llama-2-7B-fp16
3
+ library_name: peft
4
  ---
5
 
6
  # Model Card for Model ID
checkpoint-40/adapter_config.json CHANGED
@@ -21,9 +21,9 @@
21
  "revision": null,
22
  "target_modules": [
23
  "k_proj",
24
- "v_proj",
25
  "o_proj",
26
- "q_proj"
27
  ],
28
  "task_type": "CAUSAL_LM",
29
  "use_dora": false,
 
21
  "revision": null,
22
  "target_modules": [
23
  "k_proj",
24
+ "q_proj",
25
  "o_proj",
26
+ "v_proj"
27
  ],
28
  "task_type": "CAUSAL_LM",
29
  "use_dora": false,
checkpoint-40/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:801d3ce570c9b9ff575f960af3a00ccd6ba6d98cf418a3b2d0dc14a716f5dc87
3
  size 67143296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab38361a67b61947cafd5230ca79626082a1d26b72f5440faf199b3216bc6704
3
  size 67143296
checkpoint-40/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:67c5b4e8da000e9b0681d015858d5f844f66483488fdbbc351086278b492e7ad
3
  size 134433530
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a38dd3cb56490e5a9b4d6a05ea97f3a761cd71841c3d9f7f129c1e4c0b4730f
3
  size 134433530
checkpoint-40/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb020d6653a8f320452374f8532b2d5f261a1314bca08cdf554ed4cd89610334
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc5423f1af1182c2163f569e8f44b9ee18e1849c11acaaa76a185745ad274c02
3
  size 1064
checkpoint-40/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 1.3449772596359253,
3
  "best_model_checkpoint": "/kaggle/working/checkpoint-40",
4
  "epoch": 4.444444444444445,
5
  "eval_steps": 10,
@@ -10,69 +10,69 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.1111111111111112,
13
- "grad_norm": 0.022282764315605164,
14
- "learning_rate": 0.0001851851851851852,
15
- "loss": 2.0424,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 1.1111111111111112,
20
- "eval_loss": 1.733155369758606,
21
- "eval_runtime": 34.5543,
22
- "eval_samples_per_second": 1.042,
23
- "eval_steps_per_second": 0.145,
24
  "step": 10
25
  },
26
  {
27
  "epoch": 2.2222222222222223,
28
- "grad_norm": 0.018981408327817917,
29
- "learning_rate": 0.00017037037037037037,
30
- "loss": 1.6072,
31
  "step": 20
32
  },
33
  {
34
  "epoch": 2.2222222222222223,
35
- "eval_loss": 1.5428930521011353,
36
- "eval_runtime": 34.6485,
37
- "eval_samples_per_second": 1.039,
38
  "eval_steps_per_second": 0.144,
39
  "step": 20
40
  },
41
  {
42
  "epoch": 3.3333333333333335,
43
- "grad_norm": 0.023157037794589996,
44
- "learning_rate": 0.00015555555555555556,
45
- "loss": 1.4025,
46
  "step": 30
47
  },
48
  {
49
  "epoch": 3.3333333333333335,
50
- "eval_loss": 1.4176721572875977,
51
- "eval_runtime": 34.5433,
52
- "eval_samples_per_second": 1.042,
53
- "eval_steps_per_second": 0.145,
54
  "step": 30
55
  },
56
  {
57
  "epoch": 4.444444444444445,
58
- "grad_norm": 0.021338749676942825,
59
- "learning_rate": 0.00014074074074074076,
60
- "loss": 1.285,
61
  "step": 40
62
  },
63
  {
64
  "epoch": 4.444444444444445,
65
- "eval_loss": 1.3449772596359253,
66
- "eval_runtime": 34.5594,
67
- "eval_samples_per_second": 1.042,
68
- "eval_steps_per_second": 0.145,
69
  "step": 40
70
  }
71
  ],
72
  "logging_steps": 10,
73
- "max_steps": 135,
74
  "num_input_tokens_seen": 0,
75
- "num_train_epochs": 15,
76
  "save_steps": 10,
77
  "stateful_callbacks": {
78
  "EarlyStoppingCallback": {
 
1
  {
2
+ "best_metric": 1.3598744869232178,
3
  "best_model_checkpoint": "/kaggle/working/checkpoint-40",
4
  "epoch": 4.444444444444445,
5
  "eval_steps": 10,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.1111111111111112,
13
+ "grad_norm": 0.02217627689242363,
14
+ "learning_rate": 0.00017777777777777779,
15
+ "loss": 2.0442,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 1.1111111111111112,
20
+ "eval_loss": 1.737181544303894,
21
+ "eval_runtime": 35.1318,
22
+ "eval_samples_per_second": 1.025,
23
+ "eval_steps_per_second": 0.142,
24
  "step": 10
25
  },
26
  {
27
  "epoch": 2.2222222222222223,
28
+ "grad_norm": 0.0346713550388813,
29
+ "learning_rate": 0.00015555555555555556,
30
+ "loss": 1.6131,
31
  "step": 20
32
  },
33
  {
34
  "epoch": 2.2222222222222223,
35
+ "eval_loss": 1.5489343404769897,
36
+ "eval_runtime": 34.8402,
37
+ "eval_samples_per_second": 1.033,
38
  "eval_steps_per_second": 0.144,
39
  "step": 20
40
  },
41
  {
42
  "epoch": 3.3333333333333335,
43
+ "grad_norm": 0.02501535415649414,
44
+ "learning_rate": 0.00013333333333333334,
45
+ "loss": 1.4152,
46
  "step": 30
47
  },
48
  {
49
  "epoch": 3.3333333333333335,
50
+ "eval_loss": 1.4295110702514648,
51
+ "eval_runtime": 34.8537,
52
+ "eval_samples_per_second": 1.033,
53
+ "eval_steps_per_second": 0.143,
54
  "step": 30
55
  },
56
  {
57
  "epoch": 4.444444444444445,
58
+ "grad_norm": 0.02104916237294674,
59
+ "learning_rate": 0.00011111111111111112,
60
+ "loss": 1.3068,
61
  "step": 40
62
  },
63
  {
64
  "epoch": 4.444444444444445,
65
+ "eval_loss": 1.3598744869232178,
66
+ "eval_runtime": 35.0281,
67
+ "eval_samples_per_second": 1.028,
68
+ "eval_steps_per_second": 0.143,
69
  "step": 40
70
  }
71
  ],
72
  "logging_steps": 10,
73
+ "max_steps": 90,
74
  "num_input_tokens_seen": 0,
75
+ "num_train_epochs": 10,
76
  "save_steps": 10,
77
  "stateful_callbacks": {
78
  "EarlyStoppingCallback": {
checkpoint-40/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:79c753ff1ba946038f620bad3e42a35ce583c9e8ed52b49fd22fb6614fea0f43
3
  size 5112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f0053334cab1bfd2838c507b240d05093bc205b1d5fbe6dc54a022fef5dcaa7
3
  size 5112
checkpoint-50/README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
- library_name: peft
3
  base_model: TheBloke/Llama-2-7B-fp16
 
4
  ---
5
 
6
  # Model Card for Model ID
 
1
  ---
 
2
  base_model: TheBloke/Llama-2-7B-fp16
3
+ library_name: peft
4
  ---
5
 
6
  # Model Card for Model ID
checkpoint-50/adapter_config.json CHANGED
@@ -21,9 +21,9 @@
21
  "revision": null,
22
  "target_modules": [
23
  "k_proj",
24
- "v_proj",
25
  "o_proj",
26
- "q_proj"
27
  ],
28
  "task_type": "CAUSAL_LM",
29
  "use_dora": false,
 
21
  "revision": null,
22
  "target_modules": [
23
  "k_proj",
24
+ "q_proj",
25
  "o_proj",
26
+ "v_proj"
27
  ],
28
  "task_type": "CAUSAL_LM",
29
  "use_dora": false,
checkpoint-50/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:462ccb4de1a6868b9c314278aa410416a2271f7f6242771f282ecb17db91aa9a
3
  size 67143296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b692c5f45a06d84947aef0a222d424aecd480e40aabcd9ca87aa5d3007aa46e8
3
  size 67143296
checkpoint-50/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9cc1bc1ee11e2e518af04ea6813d8a018c28d601435ab40fb94b4429e7463a6f
3
  size 134433530
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44ab022dad22b0f149a3b1fb04e9cd79842aad48780ac055c542631a6fc57822
3
  size 134433530
checkpoint-50/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:965332493d5d51377ce3dafe8e14c60e285cd11a8955550ce87e8ef7114ed890
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9425a09cb4fd41e0b7c88529bcda485c5bb777b677ec7982ea20ad9edbd69fc
3
  size 1064
checkpoint-50/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 1.2951068878173828,
3
  "best_model_checkpoint": "/kaggle/working/checkpoint-50",
4
  "epoch": 5.555555555555555,
5
  "eval_steps": 10,
@@ -10,84 +10,84 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.1111111111111112,
13
- "grad_norm": 0.022282764315605164,
14
- "learning_rate": 0.0001851851851851852,
15
- "loss": 2.0424,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 1.1111111111111112,
20
- "eval_loss": 1.733155369758606,
21
- "eval_runtime": 34.5543,
22
- "eval_samples_per_second": 1.042,
23
- "eval_steps_per_second": 0.145,
24
  "step": 10
25
  },
26
  {
27
  "epoch": 2.2222222222222223,
28
- "grad_norm": 0.018981408327817917,
29
- "learning_rate": 0.00017037037037037037,
30
- "loss": 1.6072,
31
  "step": 20
32
  },
33
  {
34
  "epoch": 2.2222222222222223,
35
- "eval_loss": 1.5428930521011353,
36
- "eval_runtime": 34.6485,
37
- "eval_samples_per_second": 1.039,
38
  "eval_steps_per_second": 0.144,
39
  "step": 20
40
  },
41
  {
42
  "epoch": 3.3333333333333335,
43
- "grad_norm": 0.023157037794589996,
44
- "learning_rate": 0.00015555555555555556,
45
- "loss": 1.4025,
46
  "step": 30
47
  },
48
  {
49
  "epoch": 3.3333333333333335,
50
- "eval_loss": 1.4176721572875977,
51
- "eval_runtime": 34.5433,
52
- "eval_samples_per_second": 1.042,
53
- "eval_steps_per_second": 0.145,
54
  "step": 30
55
  },
56
  {
57
  "epoch": 4.444444444444445,
58
- "grad_norm": 0.021338749676942825,
59
- "learning_rate": 0.00014074074074074076,
60
- "loss": 1.285,
61
  "step": 40
62
  },
63
  {
64
  "epoch": 4.444444444444445,
65
- "eval_loss": 1.3449772596359253,
66
- "eval_runtime": 34.5594,
67
- "eval_samples_per_second": 1.042,
68
- "eval_steps_per_second": 0.145,
69
  "step": 40
70
  },
71
  {
72
  "epoch": 5.555555555555555,
73
- "grad_norm": 0.02489505708217621,
74
- "learning_rate": 0.00012592592592592592,
75
- "loss": 1.1687,
76
  "step": 50
77
  },
78
  {
79
  "epoch": 5.555555555555555,
80
- "eval_loss": 1.2951068878173828,
81
- "eval_runtime": 34.5896,
82
- "eval_samples_per_second": 1.041,
83
- "eval_steps_per_second": 0.145,
84
  "step": 50
85
  }
86
  ],
87
  "logging_steps": 10,
88
- "max_steps": 135,
89
  "num_input_tokens_seen": 0,
90
- "num_train_epochs": 15,
91
  "save_steps": 10,
92
  "stateful_callbacks": {
93
  "EarlyStoppingCallback": {
 
1
  {
2
+ "best_metric": 1.3168741464614868,
3
  "best_model_checkpoint": "/kaggle/working/checkpoint-50",
4
  "epoch": 5.555555555555555,
5
  "eval_steps": 10,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.1111111111111112,
13
+ "grad_norm": 0.02217627689242363,
14
+ "learning_rate": 0.00017777777777777779,
15
+ "loss": 2.0442,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 1.1111111111111112,
20
+ "eval_loss": 1.737181544303894,
21
+ "eval_runtime": 35.1318,
22
+ "eval_samples_per_second": 1.025,
23
+ "eval_steps_per_second": 0.142,
24
  "step": 10
25
  },
26
  {
27
  "epoch": 2.2222222222222223,
28
+ "grad_norm": 0.0346713550388813,
29
+ "learning_rate": 0.00015555555555555556,
30
+ "loss": 1.6131,
31
  "step": 20
32
  },
33
  {
34
  "epoch": 2.2222222222222223,
35
+ "eval_loss": 1.5489343404769897,
36
+ "eval_runtime": 34.8402,
37
+ "eval_samples_per_second": 1.033,
38
  "eval_steps_per_second": 0.144,
39
  "step": 20
40
  },
41
  {
42
  "epoch": 3.3333333333333335,
43
+ "grad_norm": 0.02501535415649414,
44
+ "learning_rate": 0.00013333333333333334,
45
+ "loss": 1.4152,
46
  "step": 30
47
  },
48
  {
49
  "epoch": 3.3333333333333335,
50
+ "eval_loss": 1.4295110702514648,
51
+ "eval_runtime": 34.8537,
52
+ "eval_samples_per_second": 1.033,
53
+ "eval_steps_per_second": 0.143,
54
  "step": 30
55
  },
56
  {
57
  "epoch": 4.444444444444445,
58
+ "grad_norm": 0.02104916237294674,
59
+ "learning_rate": 0.00011111111111111112,
60
+ "loss": 1.3068,
61
  "step": 40
62
  },
63
  {
64
  "epoch": 4.444444444444445,
65
+ "eval_loss": 1.3598744869232178,
66
+ "eval_runtime": 35.0281,
67
+ "eval_samples_per_second": 1.028,
68
+ "eval_steps_per_second": 0.143,
69
  "step": 40
70
  },
71
  {
72
  "epoch": 5.555555555555555,
73
+ "grad_norm": 0.022395364940166473,
74
+ "learning_rate": 8.888888888888889e-05,
75
+ "loss": 1.2049,
76
  "step": 50
77
  },
78
  {
79
  "epoch": 5.555555555555555,
80
+ "eval_loss": 1.3168741464614868,
81
+ "eval_runtime": 34.692,
82
+ "eval_samples_per_second": 1.038,
83
+ "eval_steps_per_second": 0.144,
84
  "step": 50
85
  }
86
  ],
87
  "logging_steps": 10,
88
+ "max_steps": 90,
89
  "num_input_tokens_seen": 0,
90
+ "num_train_epochs": 10,
91
  "save_steps": 10,
92
  "stateful_callbacks": {
93
  "EarlyStoppingCallback": {
checkpoint-50/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:79c753ff1ba946038f620bad3e42a35ce583c9e8ed52b49fd22fb6614fea0f43
3
  size 5112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f0053334cab1bfd2838c507b240d05093bc205b1d5fbe6dc54a022fef5dcaa7
3
  size 5112
checkpoint-60/README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
- library_name: peft
3
  base_model: TheBloke/Llama-2-7B-fp16
 
4
  ---
5
 
6
  # Model Card for Model ID
 
1
  ---
 
2
  base_model: TheBloke/Llama-2-7B-fp16
3
+ library_name: peft
4
  ---
5
 
6
  # Model Card for Model ID
checkpoint-60/adapter_config.json CHANGED
@@ -21,9 +21,9 @@
21
  "revision": null,
22
  "target_modules": [
23
  "k_proj",
24
- "v_proj",
25
  "o_proj",
26
- "q_proj"
27
  ],
28
  "task_type": "CAUSAL_LM",
29
  "use_dora": false,
 
21
  "revision": null,
22
  "target_modules": [
23
  "k_proj",
24
+ "q_proj",
25
  "o_proj",
26
+ "v_proj"
27
  ],
28
  "task_type": "CAUSAL_LM",
29
  "use_dora": false,
checkpoint-60/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db2917762f8b15e3bf37ffb24d36fadc8281664684350c104e7663e270ea20dc
3
  size 67143296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14ebbefdf71daa025996a412ce6c4f2fd2d5bbf084a4ee0f0ca1dc123cbb85e5
3
  size 67143296
checkpoint-60/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c6ee00713e8f75fec9ac92a487fe43f23b824b8125bd0f0413507f283ce38111
3
  size 134433530
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5863ac3d6f865bddc72753e9a6db83e90985a3348345c91097785f539b2d743e
3
  size 134433530
checkpoint-60/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f2dae0587e75ddf657c6d2f1bfc77ac82e3e62449951f53cbb9a382f8e039b79
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6dfdd6ca5178c66b713159a2bfe5731fea568ef91adf9d3f8039a74c6ff0f6b
3
  size 1064
checkpoint-60/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 1.2674343585968018,
3
  "best_model_checkpoint": "/kaggle/working/checkpoint-60",
4
  "epoch": 6.666666666666667,
5
  "eval_steps": 10,
@@ -10,99 +10,99 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.1111111111111112,
13
- "grad_norm": 0.022282764315605164,
14
- "learning_rate": 0.0001851851851851852,
15
- "loss": 2.0424,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 1.1111111111111112,
20
- "eval_loss": 1.733155369758606,
21
- "eval_runtime": 34.5543,
22
- "eval_samples_per_second": 1.042,
23
- "eval_steps_per_second": 0.145,
24
  "step": 10
25
  },
26
  {
27
  "epoch": 2.2222222222222223,
28
- "grad_norm": 0.018981408327817917,
29
- "learning_rate": 0.00017037037037037037,
30
- "loss": 1.6072,
31
  "step": 20
32
  },
33
  {
34
  "epoch": 2.2222222222222223,
35
- "eval_loss": 1.5428930521011353,
36
- "eval_runtime": 34.6485,
37
- "eval_samples_per_second": 1.039,
38
  "eval_steps_per_second": 0.144,
39
  "step": 20
40
  },
41
  {
42
  "epoch": 3.3333333333333335,
43
- "grad_norm": 0.023157037794589996,
44
- "learning_rate": 0.00015555555555555556,
45
- "loss": 1.4025,
46
  "step": 30
47
  },
48
  {
49
  "epoch": 3.3333333333333335,
50
- "eval_loss": 1.4176721572875977,
51
- "eval_runtime": 34.5433,
52
- "eval_samples_per_second": 1.042,
53
- "eval_steps_per_second": 0.145,
54
  "step": 30
55
  },
56
  {
57
  "epoch": 4.444444444444445,
58
- "grad_norm": 0.021338749676942825,
59
- "learning_rate": 0.00014074074074074076,
60
- "loss": 1.285,
61
  "step": 40
62
  },
63
  {
64
  "epoch": 4.444444444444445,
65
- "eval_loss": 1.3449772596359253,
66
- "eval_runtime": 34.5594,
67
- "eval_samples_per_second": 1.042,
68
- "eval_steps_per_second": 0.145,
69
  "step": 40
70
  },
71
  {
72
  "epoch": 5.555555555555555,
73
- "grad_norm": 0.02489505708217621,
74
- "learning_rate": 0.00012592592592592592,
75
- "loss": 1.1687,
76
  "step": 50
77
  },
78
  {
79
  "epoch": 5.555555555555555,
80
- "eval_loss": 1.2951068878173828,
81
- "eval_runtime": 34.5896,
82
- "eval_samples_per_second": 1.041,
83
- "eval_steps_per_second": 0.145,
84
  "step": 50
85
  },
86
  {
87
  "epoch": 6.666666666666667,
88
- "grad_norm": 0.028962766751646996,
89
- "learning_rate": 0.00011111111111111112,
90
- "loss": 1.0521,
91
  "step": 60
92
  },
93
  {
94
  "epoch": 6.666666666666667,
95
- "eval_loss": 1.2674343585968018,
96
- "eval_runtime": 34.5586,
97
- "eval_samples_per_second": 1.042,
98
- "eval_steps_per_second": 0.145,
99
  "step": 60
100
  }
101
  ],
102
  "logging_steps": 10,
103
- "max_steps": 135,
104
  "num_input_tokens_seen": 0,
105
- "num_train_epochs": 15,
106
  "save_steps": 10,
107
  "stateful_callbacks": {
108
  "EarlyStoppingCallback": {
 
1
  {
2
+ "best_metric": 1.2939578294754028,
3
  "best_model_checkpoint": "/kaggle/working/checkpoint-60",
4
  "epoch": 6.666666666666667,
5
  "eval_steps": 10,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.1111111111111112,
13
+ "grad_norm": 0.02217627689242363,
14
+ "learning_rate": 0.00017777777777777779,
15
+ "loss": 2.0442,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 1.1111111111111112,
20
+ "eval_loss": 1.737181544303894,
21
+ "eval_runtime": 35.1318,
22
+ "eval_samples_per_second": 1.025,
23
+ "eval_steps_per_second": 0.142,
24
  "step": 10
25
  },
26
  {
27
  "epoch": 2.2222222222222223,
28
+ "grad_norm": 0.0346713550388813,
29
+ "learning_rate": 0.00015555555555555556,
30
+ "loss": 1.6131,
31
  "step": 20
32
  },
33
  {
34
  "epoch": 2.2222222222222223,
35
+ "eval_loss": 1.5489343404769897,
36
+ "eval_runtime": 34.8402,
37
+ "eval_samples_per_second": 1.033,
38
  "eval_steps_per_second": 0.144,
39
  "step": 20
40
  },
41
  {
42
  "epoch": 3.3333333333333335,
43
+ "grad_norm": 0.02501535415649414,
44
+ "learning_rate": 0.00013333333333333334,
45
+ "loss": 1.4152,
46
  "step": 30
47
  },
48
  {
49
  "epoch": 3.3333333333333335,
50
+ "eval_loss": 1.4295110702514648,
51
+ "eval_runtime": 34.8537,
52
+ "eval_samples_per_second": 1.033,
53
+ "eval_steps_per_second": 0.143,
54
  "step": 30
55
  },
56
  {
57
  "epoch": 4.444444444444445,
58
+ "grad_norm": 0.02104916237294674,
59
+ "learning_rate": 0.00011111111111111112,
60
+ "loss": 1.3068,
61
  "step": 40
62
  },
63
  {
64
  "epoch": 4.444444444444445,
65
+ "eval_loss": 1.3598744869232178,
66
+ "eval_runtime": 35.0281,
67
+ "eval_samples_per_second": 1.028,
68
+ "eval_steps_per_second": 0.143,
69
  "step": 40
70
  },
71
  {
72
  "epoch": 5.555555555555555,
73
+ "grad_norm": 0.022395364940166473,
74
+ "learning_rate": 8.888888888888889e-05,
75
+ "loss": 1.2049,
76
  "step": 50
77
  },
78
  {
79
  "epoch": 5.555555555555555,
80
+ "eval_loss": 1.3168741464614868,
81
+ "eval_runtime": 34.692,
82
+ "eval_samples_per_second": 1.038,
83
+ "eval_steps_per_second": 0.144,
84
  "step": 50
85
  },
86
  {
87
  "epoch": 6.666666666666667,
88
+ "grad_norm": 0.02603345364332199,
89
+ "learning_rate": 6.666666666666667e-05,
90
+ "loss": 1.1086,
91
  "step": 60
92
  },
93
  {
94
  "epoch": 6.666666666666667,
95
+ "eval_loss": 1.2939578294754028,
96
+ "eval_runtime": 34.6444,
97
+ "eval_samples_per_second": 1.039,
98
+ "eval_steps_per_second": 0.144,
99
  "step": 60
100
  }
101
  ],
102
  "logging_steps": 10,
103
+ "max_steps": 90,
104
  "num_input_tokens_seen": 0,
105
+ "num_train_epochs": 10,
106
  "save_steps": 10,
107
  "stateful_callbacks": {
108
  "EarlyStoppingCallback": {
checkpoint-60/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:79c753ff1ba946038f620bad3e42a35ce583c9e8ed52b49fd22fb6614fea0f43
3
  size 5112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f0053334cab1bfd2838c507b240d05093bc205b1d5fbe6dc54a022fef5dcaa7
3
  size 5112
checkpoint-70/README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
- library_name: peft
3
  base_model: TheBloke/Llama-2-7B-fp16
 
4
  ---
5
 
6
  # Model Card for Model ID
 
1
  ---
 
2
  base_model: TheBloke/Llama-2-7B-fp16
3
+ library_name: peft
4
  ---
5
 
6
  # Model Card for Model ID
checkpoint-70/adapter_config.json CHANGED
@@ -21,9 +21,9 @@
21
  "revision": null,
22
  "target_modules": [
23
  "k_proj",
24
- "v_proj",
25
  "o_proj",
26
- "q_proj"
27
  ],
28
  "task_type": "CAUSAL_LM",
29
  "use_dora": false,
 
21
  "revision": null,
22
  "target_modules": [
23
  "k_proj",
24
+ "q_proj",
25
  "o_proj",
26
+ "v_proj"
27
  ],
28
  "task_type": "CAUSAL_LM",
29
  "use_dora": false,
checkpoint-70/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cf3c37297ed626b47ab9c41ebb2a26326a24e606918ef7c7fd629793854a6799
3
  size 67143296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c44abb9173442b0bc413ba469da95e7be794812df0c2a2c16e54fc301511e3e
3
  size 67143296
checkpoint-70/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c4df5d5144e456b9ea6bf7bd481d07a757eea5bf300085855555e34b8b031648
3
  size 134433530
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2df92dd99064eb6aa6542b203055cf8cc892cff2f30a210807b7667c96cedc3a
3
  size 134433530
checkpoint-70/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0b4acdcca4bce1d839ad3b2f3830a589ce711414ff4548cbd4704149e66a014f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b55d3cbe104822729f4f45e02a6c248fb8a4cb356c229f5c93e65066ff6a397
3
  size 1064