diff --git a/README.md b/README.md index 2d1596ffe16e4d5bdcdf0e1d4322e6667af95962..d94fa0fb36d5aa76962cf3fda3ca0bfe4c2fa517 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ --- -library_name: peft base_model: TheBloke/Llama-2-7B-fp16 +library_name: peft --- # Model Card for Model ID diff --git a/adapter_config.json b/adapter_config.json index cbf93f2809e43fe18fd6ad23406293a68e7f5c98..539f4c41b2550fc30b9c2d0726f51adfa8e4b1e5 100644 --- a/adapter_config.json +++ b/adapter_config.json @@ -21,9 +21,9 @@ "revision": null, "target_modules": [ "k_proj", - "v_proj", + "q_proj", "o_proj", - "q_proj" + "v_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/adapter_model.safetensors b/adapter_model.safetensors index b4f9fb90e930a7899b68e1787dce408f742c6dde..aad60b0963bb0817ddff00ad978492cc2241a8a2 100644 --- a/adapter_model.safetensors +++ b/adapter_model.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:58a8f71955b57e8e1082d6f6f180646592764af968e225bda2b732733e64a34c +oid sha256:6bdfd134cf3b5e167c3aa127bf57024a3e8ff71b6b0ea16d5493a51a01d7e317 size 67143296 diff --git a/checkpoint-10/README.md b/checkpoint-10/README.md index 2d1596ffe16e4d5bdcdf0e1d4322e6667af95962..d94fa0fb36d5aa76962cf3fda3ca0bfe4c2fa517 100644 --- a/checkpoint-10/README.md +++ b/checkpoint-10/README.md @@ -1,6 +1,6 @@ --- -library_name: peft base_model: TheBloke/Llama-2-7B-fp16 +library_name: peft --- # Model Card for Model ID diff --git a/checkpoint-10/adapter_config.json b/checkpoint-10/adapter_config.json index cbf93f2809e43fe18fd6ad23406293a68e7f5c98..539f4c41b2550fc30b9c2d0726f51adfa8e4b1e5 100644 --- a/checkpoint-10/adapter_config.json +++ b/checkpoint-10/adapter_config.json @@ -21,9 +21,9 @@ "revision": null, "target_modules": [ "k_proj", - "v_proj", + "q_proj", "o_proj", - "q_proj" + "v_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-10/adapter_model.safetensors b/checkpoint-10/adapter_model.safetensors index 5c2ebe7a9d782b318f37eeb0942020de6ef351c9..70f0dc5b3797e2589528793537c490c67c49b590 100644 --- a/checkpoint-10/adapter_model.safetensors +++ b/checkpoint-10/adapter_model.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:232e478a97bd908880b31806be86d4debfd0492ee0eb83ba509e3293c6c3565a +oid sha256:2e16adf919d93d6441c2583be16d89fc2157635291e0c18a1835380e4dd25668 size 67143296 diff --git a/checkpoint-10/optimizer.pt b/checkpoint-10/optimizer.pt index f6aa09b5a5c07210fa7f9655cd844f149c370ddd..2ea9b0e82ac9df07c8070110f0e9f7f510d13dbd 100644 --- a/checkpoint-10/optimizer.pt +++ b/checkpoint-10/optimizer.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:40afa20170f291b125eb81d0ffabecaf09477598d9e256ea01ff0b97e18e70a9 +oid sha256:90d3b34bfbd9c3f0886fa09e483a0a1fa8853028f68e4ce50843d14911e15412 size 134433530 diff --git a/checkpoint-10/scheduler.pt b/checkpoint-10/scheduler.pt index 941ed765630f615d588fd402f4d9cc96b1946236..9b8b9b162c3bc0952535c6a06e25390cb24b5d5a 100644 --- a/checkpoint-10/scheduler.pt +++ b/checkpoint-10/scheduler.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a35af14c8f81087292db706fc180cce03fb0f692ed151b0664293b217dd11fa5 +oid sha256:04d2341737bca7648a4cdb3a55768450f9758f2298ef492fe1db7f093eaa1902 size 1064 diff --git a/checkpoint-10/trainer_state.json b/checkpoint-10/trainer_state.json index 5e5b97c2c1c2cda602de8084d07f4f9e745d299b..c4d5e43fa9b77154cc4db4d15de86439adf67490 100644 --- a/checkpoint-10/trainer_state.json +++ b/checkpoint-10/trainer_state.json @@ -1,5 +1,5 @@ { - "best_metric": 1.733155369758606, + "best_metric": 1.737181544303894, "best_model_checkpoint": "/kaggle/working/checkpoint-10", "epoch": 1.1111111111111112, "eval_steps": 10, @@ -10,24 +10,24 @@ "log_history": [ { "epoch": 1.1111111111111112, - "grad_norm": 0.022282764315605164, - "learning_rate": 0.0001851851851851852, - "loss": 2.0424, + "grad_norm": 0.02217627689242363, + "learning_rate": 0.00017777777777777779, + "loss": 2.0442, "step": 10 }, { "epoch": 1.1111111111111112, - "eval_loss": 1.733155369758606, - "eval_runtime": 34.5543, - "eval_samples_per_second": 1.042, - "eval_steps_per_second": 0.145, + "eval_loss": 1.737181544303894, + "eval_runtime": 35.1318, + "eval_samples_per_second": 1.025, + "eval_steps_per_second": 0.142, "step": 10 } ], "logging_steps": 10, - "max_steps": 135, + "max_steps": 90, "num_input_tokens_seen": 0, - "num_train_epochs": 15, + "num_train_epochs": 10, "save_steps": 10, "stateful_callbacks": { "EarlyStoppingCallback": { diff --git a/checkpoint-10/training_args.bin b/checkpoint-10/training_args.bin index db8c5d32a4f54d0e9ab4e7a985fbaee5d6701ecc..992e364d4b54f32a399ec3cd5f5f54c212ea0588 100644 --- a/checkpoint-10/training_args.bin +++ b/checkpoint-10/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:79c753ff1ba946038f620bad3e42a35ce583c9e8ed52b49fd22fb6614fea0f43 +oid sha256:2f0053334cab1bfd2838c507b240d05093bc205b1d5fbe6dc54a022fef5dcaa7 size 5112 diff --git a/checkpoint-20/README.md b/checkpoint-20/README.md index 2d1596ffe16e4d5bdcdf0e1d4322e6667af95962..d94fa0fb36d5aa76962cf3fda3ca0bfe4c2fa517 100644 --- a/checkpoint-20/README.md +++ b/checkpoint-20/README.md @@ -1,6 +1,6 @@ --- -library_name: peft base_model: TheBloke/Llama-2-7B-fp16 +library_name: peft --- # Model Card for Model ID diff --git a/checkpoint-20/adapter_config.json b/checkpoint-20/adapter_config.json index cbf93f2809e43fe18fd6ad23406293a68e7f5c98..539f4c41b2550fc30b9c2d0726f51adfa8e4b1e5 100644 --- a/checkpoint-20/adapter_config.json +++ b/checkpoint-20/adapter_config.json @@ -21,9 +21,9 @@ "revision": null, "target_modules": [ "k_proj", - "v_proj", + "q_proj", "o_proj", - "q_proj" + "v_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-20/adapter_model.safetensors b/checkpoint-20/adapter_model.safetensors index 8223e0ce8d841941324b29f8c42fc45abdc14765..13aa561884e1ba48ec5d29e6a7bed5a31855eed2 100644 --- a/checkpoint-20/adapter_model.safetensors +++ b/checkpoint-20/adapter_model.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b330fca16a5ab17049bdd4e6e42df52bd8bd0ac1fbeb1577f683e7b1212f0d83 +oid sha256:a5ab93af4ed23c52b82729ff3b3f871c19b732c90f1094f90d5a9f4ade1ccfac size 67143296 diff --git a/checkpoint-20/optimizer.pt b/checkpoint-20/optimizer.pt index f4d35d60066f0933a1fd192740c0a094a9d75120..0c81c05ac0b1ac814e5ec818a8b5f47ab91bcf72 100644 --- a/checkpoint-20/optimizer.pt +++ b/checkpoint-20/optimizer.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:83e5faf17f41fc7dcd6907ecd97bc5e5b82bfa377a6683ddd9d25e9c5a4f3efd +oid sha256:a07f3020a10b2a9d3e215c9651b159e8c3b297ab1db69b013b8c7817d5f52a7c size 134433530 diff --git a/checkpoint-20/scheduler.pt b/checkpoint-20/scheduler.pt index 4f536414baf7d040b7ee6765e42c28f6d23eb06d..2c4b1ba5f9c4137961a8e1182dbd5a6a94845c4c 100644 --- a/checkpoint-20/scheduler.pt +++ b/checkpoint-20/scheduler.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:18bf434d2955f164730aed9c7994cdd6c773ed80052fc3ef975fae4adef61283 +oid sha256:f9e7e75183c7081ca7f8f52ddfd0d5f4b8e8dbcf7f7bcd495fc6e0cfff80e3a2 size 1064 diff --git a/checkpoint-20/trainer_state.json b/checkpoint-20/trainer_state.json index fbb6a3c50e3d893d3f94028d5087a658e6bbc7c2..27994450f65cba3cf56c1c4c543734709d21042d 100644 --- a/checkpoint-20/trainer_state.json +++ b/checkpoint-20/trainer_state.json @@ -1,5 +1,5 @@ { - "best_metric": 1.5428930521011353, + "best_metric": 1.5489343404769897, "best_model_checkpoint": "/kaggle/working/checkpoint-20", "epoch": 2.2222222222222223, "eval_steps": 10, @@ -10,39 +10,39 @@ "log_history": [ { "epoch": 1.1111111111111112, - "grad_norm": 0.022282764315605164, - "learning_rate": 0.0001851851851851852, - "loss": 2.0424, + "grad_norm": 0.02217627689242363, + "learning_rate": 0.00017777777777777779, + "loss": 2.0442, "step": 10 }, { "epoch": 1.1111111111111112, - "eval_loss": 1.733155369758606, - "eval_runtime": 34.5543, - "eval_samples_per_second": 1.042, - "eval_steps_per_second": 0.145, + "eval_loss": 1.737181544303894, + "eval_runtime": 35.1318, + "eval_samples_per_second": 1.025, + "eval_steps_per_second": 0.142, "step": 10 }, { "epoch": 2.2222222222222223, - "grad_norm": 0.018981408327817917, - "learning_rate": 0.00017037037037037037, - "loss": 1.6072, + "grad_norm": 0.0346713550388813, + "learning_rate": 0.00015555555555555556, + "loss": 1.6131, "step": 20 }, { "epoch": 2.2222222222222223, - "eval_loss": 1.5428930521011353, - "eval_runtime": 34.6485, - "eval_samples_per_second": 1.039, + "eval_loss": 1.5489343404769897, + "eval_runtime": 34.8402, + "eval_samples_per_second": 1.033, "eval_steps_per_second": 0.144, "step": 20 } ], "logging_steps": 10, - "max_steps": 135, + "max_steps": 90, "num_input_tokens_seen": 0, - "num_train_epochs": 15, + "num_train_epochs": 10, "save_steps": 10, "stateful_callbacks": { "EarlyStoppingCallback": { diff --git a/checkpoint-20/training_args.bin b/checkpoint-20/training_args.bin index db8c5d32a4f54d0e9ab4e7a985fbaee5d6701ecc..992e364d4b54f32a399ec3cd5f5f54c212ea0588 100644 --- a/checkpoint-20/training_args.bin +++ b/checkpoint-20/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:79c753ff1ba946038f620bad3e42a35ce583c9e8ed52b49fd22fb6614fea0f43 +oid sha256:2f0053334cab1bfd2838c507b240d05093bc205b1d5fbe6dc54a022fef5dcaa7 size 5112 diff --git a/checkpoint-30/README.md b/checkpoint-30/README.md index 2d1596ffe16e4d5bdcdf0e1d4322e6667af95962..d94fa0fb36d5aa76962cf3fda3ca0bfe4c2fa517 100644 --- a/checkpoint-30/README.md +++ b/checkpoint-30/README.md @@ -1,6 +1,6 @@ --- -library_name: peft base_model: TheBloke/Llama-2-7B-fp16 +library_name: peft --- # Model Card for Model ID diff --git a/checkpoint-30/adapter_config.json b/checkpoint-30/adapter_config.json index cbf93f2809e43fe18fd6ad23406293a68e7f5c98..539f4c41b2550fc30b9c2d0726f51adfa8e4b1e5 100644 --- a/checkpoint-30/adapter_config.json +++ b/checkpoint-30/adapter_config.json @@ -21,9 +21,9 @@ "revision": null, "target_modules": [ "k_proj", - "v_proj", + "q_proj", "o_proj", - "q_proj" + "v_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-30/adapter_model.safetensors b/checkpoint-30/adapter_model.safetensors index 2dde285768e1a4318a4612f90065ca618c5bdffc..c985a80922131720df8cf9d17f8535245bba0c81 100644 --- a/checkpoint-30/adapter_model.safetensors +++ b/checkpoint-30/adapter_model.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:473c4bd6da7c40c632f6b8a627a42ab5e26f5bc3b977b422287013a48424e372 +oid sha256:d8576250f42c32085cdb174e306461292b115ea33d910d0a59d062fcad935bf0 size 67143296 diff --git a/checkpoint-30/optimizer.pt b/checkpoint-30/optimizer.pt index c08c60df1dba8ab4b623f9330e698d2be010f492..cfc1988c5683d4e48dbf08fb68442468c0e29d45 100644 --- a/checkpoint-30/optimizer.pt +++ b/checkpoint-30/optimizer.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:09a69f8968f0b0dc5be6245d8969030cda587b4b349b62c8e5c647bd89875fa2 +oid sha256:16613c572dcb0ccca606ca4a382a4476b3f69ed3cf64a7095e7f852e897c8426 size 134433530 diff --git a/checkpoint-30/scheduler.pt b/checkpoint-30/scheduler.pt index fc722270e58bfcbf5b68dd4051aa690f4a07fbeb..b5d0f2ca2c7bb5261cdc0c82aa8b49d242267e6b 100644 --- a/checkpoint-30/scheduler.pt +++ b/checkpoint-30/scheduler.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d8a3ad18d2d099317927f68afa34f1d799ef6644ec4025b52033947a650a29e2 +oid sha256:14d970dabadfb95eaf7812b80cb7816a58d7911bb09df450b100b1c052b74a02 size 1064 diff --git a/checkpoint-30/trainer_state.json b/checkpoint-30/trainer_state.json index e6276205b85275a723731d05c8c327a2a3db50b4..3d77f2dc0ab9fea959fe76ba758b7780fdd26130 100644 --- a/checkpoint-30/trainer_state.json +++ b/checkpoint-30/trainer_state.json @@ -1,5 +1,5 @@ { - "best_metric": 1.4176721572875977, + "best_metric": 1.4295110702514648, "best_model_checkpoint": "/kaggle/working/checkpoint-30", "epoch": 3.3333333333333335, "eval_steps": 10, @@ -10,54 +10,54 @@ "log_history": [ { "epoch": 1.1111111111111112, - "grad_norm": 0.022282764315605164, - "learning_rate": 0.0001851851851851852, - "loss": 2.0424, + "grad_norm": 0.02217627689242363, + "learning_rate": 0.00017777777777777779, + "loss": 2.0442, "step": 10 }, { "epoch": 1.1111111111111112, - "eval_loss": 1.733155369758606, - "eval_runtime": 34.5543, - "eval_samples_per_second": 1.042, - "eval_steps_per_second": 0.145, + "eval_loss": 1.737181544303894, + "eval_runtime": 35.1318, + "eval_samples_per_second": 1.025, + "eval_steps_per_second": 0.142, "step": 10 }, { "epoch": 2.2222222222222223, - "grad_norm": 0.018981408327817917, - "learning_rate": 0.00017037037037037037, - "loss": 1.6072, + "grad_norm": 0.0346713550388813, + "learning_rate": 0.00015555555555555556, + "loss": 1.6131, "step": 20 }, { "epoch": 2.2222222222222223, - "eval_loss": 1.5428930521011353, - "eval_runtime": 34.6485, - "eval_samples_per_second": 1.039, + "eval_loss": 1.5489343404769897, + "eval_runtime": 34.8402, + "eval_samples_per_second": 1.033, "eval_steps_per_second": 0.144, "step": 20 }, { "epoch": 3.3333333333333335, - "grad_norm": 0.023157037794589996, - "learning_rate": 0.00015555555555555556, - "loss": 1.4025, + "grad_norm": 0.02501535415649414, + "learning_rate": 0.00013333333333333334, + "loss": 1.4152, "step": 30 }, { "epoch": 3.3333333333333335, - "eval_loss": 1.4176721572875977, - "eval_runtime": 34.5433, - "eval_samples_per_second": 1.042, - "eval_steps_per_second": 0.145, + "eval_loss": 1.4295110702514648, + "eval_runtime": 34.8537, + "eval_samples_per_second": 1.033, + "eval_steps_per_second": 0.143, "step": 30 } ], "logging_steps": 10, - "max_steps": 135, + "max_steps": 90, "num_input_tokens_seen": 0, - "num_train_epochs": 15, + "num_train_epochs": 10, "save_steps": 10, "stateful_callbacks": { "EarlyStoppingCallback": { diff --git a/checkpoint-30/training_args.bin b/checkpoint-30/training_args.bin index db8c5d32a4f54d0e9ab4e7a985fbaee5d6701ecc..992e364d4b54f32a399ec3cd5f5f54c212ea0588 100644 --- a/checkpoint-30/training_args.bin +++ b/checkpoint-30/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:79c753ff1ba946038f620bad3e42a35ce583c9e8ed52b49fd22fb6614fea0f43 +oid sha256:2f0053334cab1bfd2838c507b240d05093bc205b1d5fbe6dc54a022fef5dcaa7 size 5112 diff --git a/checkpoint-40/README.md b/checkpoint-40/README.md index 2d1596ffe16e4d5bdcdf0e1d4322e6667af95962..d94fa0fb36d5aa76962cf3fda3ca0bfe4c2fa517 100644 --- a/checkpoint-40/README.md +++ b/checkpoint-40/README.md @@ -1,6 +1,6 @@ --- -library_name: peft base_model: TheBloke/Llama-2-7B-fp16 +library_name: peft --- # Model Card for Model ID diff --git a/checkpoint-40/adapter_config.json b/checkpoint-40/adapter_config.json index cbf93f2809e43fe18fd6ad23406293a68e7f5c98..539f4c41b2550fc30b9c2d0726f51adfa8e4b1e5 100644 --- a/checkpoint-40/adapter_config.json +++ b/checkpoint-40/adapter_config.json @@ -21,9 +21,9 @@ "revision": null, "target_modules": [ "k_proj", - "v_proj", + "q_proj", "o_proj", - "q_proj" + "v_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-40/adapter_model.safetensors b/checkpoint-40/adapter_model.safetensors index d92565a5b837388e70e09742cd836ed04bcbaafb..0e9c44306157ba76d1b0525594c780969a0a3f98 100644 --- a/checkpoint-40/adapter_model.safetensors +++ b/checkpoint-40/adapter_model.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:801d3ce570c9b9ff575f960af3a00ccd6ba6d98cf418a3b2d0dc14a716f5dc87 +oid sha256:ab38361a67b61947cafd5230ca79626082a1d26b72f5440faf199b3216bc6704 size 67143296 diff --git a/checkpoint-40/optimizer.pt b/checkpoint-40/optimizer.pt index 7bbafe6bb0c6557cdfc17a015eac9b12c17845c9..a7c2c9e810a2f0a8093fc2aa51e73b47a0a9abcd 100644 --- a/checkpoint-40/optimizer.pt +++ b/checkpoint-40/optimizer.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:67c5b4e8da000e9b0681d015858d5f844f66483488fdbbc351086278b492e7ad +oid sha256:2a38dd3cb56490e5a9b4d6a05ea97f3a761cd71841c3d9f7f129c1e4c0b4730f size 134433530 diff --git a/checkpoint-40/scheduler.pt b/checkpoint-40/scheduler.pt index 2d7557681fbd1043af6145a07414467654c47a85..43ed4ecc37d6e4e0738d211af3327e4424a756fc 100644 --- a/checkpoint-40/scheduler.pt +++ b/checkpoint-40/scheduler.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fb020d6653a8f320452374f8532b2d5f261a1314bca08cdf554ed4cd89610334 +oid sha256:dc5423f1af1182c2163f569e8f44b9ee18e1849c11acaaa76a185745ad274c02 size 1064 diff --git a/checkpoint-40/trainer_state.json b/checkpoint-40/trainer_state.json index 86d21c3a21736393bc1efdc29445b1408623b62b..c9bd3601e7274d6268976282b0de89dda2b0b807 100644 --- a/checkpoint-40/trainer_state.json +++ b/checkpoint-40/trainer_state.json @@ -1,5 +1,5 @@ { - "best_metric": 1.3449772596359253, + "best_metric": 1.3598744869232178, "best_model_checkpoint": "/kaggle/working/checkpoint-40", "epoch": 4.444444444444445, "eval_steps": 10, @@ -10,69 +10,69 @@ "log_history": [ { "epoch": 1.1111111111111112, - "grad_norm": 0.022282764315605164, - "learning_rate": 0.0001851851851851852, - "loss": 2.0424, + "grad_norm": 0.02217627689242363, + "learning_rate": 0.00017777777777777779, + "loss": 2.0442, "step": 10 }, { "epoch": 1.1111111111111112, - "eval_loss": 1.733155369758606, - "eval_runtime": 34.5543, - "eval_samples_per_second": 1.042, - "eval_steps_per_second": 0.145, + "eval_loss": 1.737181544303894, + "eval_runtime": 35.1318, + "eval_samples_per_second": 1.025, + "eval_steps_per_second": 0.142, "step": 10 }, { "epoch": 2.2222222222222223, - "grad_norm": 0.018981408327817917, - "learning_rate": 0.00017037037037037037, - "loss": 1.6072, + "grad_norm": 0.0346713550388813, + "learning_rate": 0.00015555555555555556, + "loss": 1.6131, "step": 20 }, { "epoch": 2.2222222222222223, - "eval_loss": 1.5428930521011353, - "eval_runtime": 34.6485, - "eval_samples_per_second": 1.039, + "eval_loss": 1.5489343404769897, + "eval_runtime": 34.8402, + "eval_samples_per_second": 1.033, "eval_steps_per_second": 0.144, "step": 20 }, { "epoch": 3.3333333333333335, - "grad_norm": 0.023157037794589996, - "learning_rate": 0.00015555555555555556, - "loss": 1.4025, + "grad_norm": 0.02501535415649414, + "learning_rate": 0.00013333333333333334, + "loss": 1.4152, "step": 30 }, { "epoch": 3.3333333333333335, - "eval_loss": 1.4176721572875977, - "eval_runtime": 34.5433, - "eval_samples_per_second": 1.042, - "eval_steps_per_second": 0.145, + "eval_loss": 1.4295110702514648, + "eval_runtime": 34.8537, + "eval_samples_per_second": 1.033, + "eval_steps_per_second": 0.143, "step": 30 }, { "epoch": 4.444444444444445, - "grad_norm": 0.021338749676942825, - "learning_rate": 0.00014074074074074076, - "loss": 1.285, + "grad_norm": 0.02104916237294674, + "learning_rate": 0.00011111111111111112, + "loss": 1.3068, "step": 40 }, { "epoch": 4.444444444444445, - "eval_loss": 1.3449772596359253, - "eval_runtime": 34.5594, - "eval_samples_per_second": 1.042, - "eval_steps_per_second": 0.145, + "eval_loss": 1.3598744869232178, + "eval_runtime": 35.0281, + "eval_samples_per_second": 1.028, + "eval_steps_per_second": 0.143, "step": 40 } ], "logging_steps": 10, - "max_steps": 135, + "max_steps": 90, "num_input_tokens_seen": 0, - "num_train_epochs": 15, + "num_train_epochs": 10, "save_steps": 10, "stateful_callbacks": { "EarlyStoppingCallback": { diff --git a/checkpoint-40/training_args.bin b/checkpoint-40/training_args.bin index db8c5d32a4f54d0e9ab4e7a985fbaee5d6701ecc..992e364d4b54f32a399ec3cd5f5f54c212ea0588 100644 --- a/checkpoint-40/training_args.bin +++ b/checkpoint-40/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:79c753ff1ba946038f620bad3e42a35ce583c9e8ed52b49fd22fb6614fea0f43 +oid sha256:2f0053334cab1bfd2838c507b240d05093bc205b1d5fbe6dc54a022fef5dcaa7 size 5112 diff --git a/checkpoint-50/README.md b/checkpoint-50/README.md index 2d1596ffe16e4d5bdcdf0e1d4322e6667af95962..d94fa0fb36d5aa76962cf3fda3ca0bfe4c2fa517 100644 --- a/checkpoint-50/README.md +++ b/checkpoint-50/README.md @@ -1,6 +1,6 @@ --- -library_name: peft base_model: TheBloke/Llama-2-7B-fp16 +library_name: peft --- # Model Card for Model ID diff --git a/checkpoint-50/adapter_config.json b/checkpoint-50/adapter_config.json index cbf93f2809e43fe18fd6ad23406293a68e7f5c98..539f4c41b2550fc30b9c2d0726f51adfa8e4b1e5 100644 --- a/checkpoint-50/adapter_config.json +++ b/checkpoint-50/adapter_config.json @@ -21,9 +21,9 @@ "revision": null, "target_modules": [ "k_proj", - "v_proj", + "q_proj", "o_proj", - "q_proj" + "v_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-50/adapter_model.safetensors b/checkpoint-50/adapter_model.safetensors index d0691d9e050f0256809f29f86e24616a0fc291e2..c090dac91ca150e0ea0a5153fcd6d57765075303 100644 --- a/checkpoint-50/adapter_model.safetensors +++ b/checkpoint-50/adapter_model.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:462ccb4de1a6868b9c314278aa410416a2271f7f6242771f282ecb17db91aa9a +oid sha256:b692c5f45a06d84947aef0a222d424aecd480e40aabcd9ca87aa5d3007aa46e8 size 67143296 diff --git a/checkpoint-50/optimizer.pt b/checkpoint-50/optimizer.pt index ce706c076113ef5a730068e774f4cb258e862101..a30c9c35dca58e15eaf0c8940bf1926a9cac9d4d 100644 --- a/checkpoint-50/optimizer.pt +++ b/checkpoint-50/optimizer.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9cc1bc1ee11e2e518af04ea6813d8a018c28d601435ab40fb94b4429e7463a6f +oid sha256:44ab022dad22b0f149a3b1fb04e9cd79842aad48780ac055c542631a6fc57822 size 134433530 diff --git a/checkpoint-50/scheduler.pt b/checkpoint-50/scheduler.pt index 66f83e8d1a29d1712842e93ebeca9509a1bf2fb0..5183cac3f3e1237799f57507b58ef408fc83ade4 100644 --- a/checkpoint-50/scheduler.pt +++ b/checkpoint-50/scheduler.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:965332493d5d51377ce3dafe8e14c60e285cd11a8955550ce87e8ef7114ed890 +oid sha256:a9425a09cb4fd41e0b7c88529bcda485c5bb777b677ec7982ea20ad9edbd69fc size 1064 diff --git a/checkpoint-50/trainer_state.json b/checkpoint-50/trainer_state.json index b4358c39fda6ddc437753ee9371e20b2c8bb340b..9e26cad60437a7a2d14835768354e565267f6744 100644 --- a/checkpoint-50/trainer_state.json +++ b/checkpoint-50/trainer_state.json @@ -1,5 +1,5 @@ { - "best_metric": 1.2951068878173828, + "best_metric": 1.3168741464614868, "best_model_checkpoint": "/kaggle/working/checkpoint-50", "epoch": 5.555555555555555, "eval_steps": 10, @@ -10,84 +10,84 @@ "log_history": [ { "epoch": 1.1111111111111112, - "grad_norm": 0.022282764315605164, - "learning_rate": 0.0001851851851851852, - "loss": 2.0424, + "grad_norm": 0.02217627689242363, + "learning_rate": 0.00017777777777777779, + "loss": 2.0442, "step": 10 }, { "epoch": 1.1111111111111112, - "eval_loss": 1.733155369758606, - "eval_runtime": 34.5543, - "eval_samples_per_second": 1.042, - "eval_steps_per_second": 0.145, + "eval_loss": 1.737181544303894, + "eval_runtime": 35.1318, + "eval_samples_per_second": 1.025, + "eval_steps_per_second": 0.142, "step": 10 }, { "epoch": 2.2222222222222223, - "grad_norm": 0.018981408327817917, - "learning_rate": 0.00017037037037037037, - "loss": 1.6072, + "grad_norm": 0.0346713550388813, + "learning_rate": 0.00015555555555555556, + "loss": 1.6131, "step": 20 }, { "epoch": 2.2222222222222223, - "eval_loss": 1.5428930521011353, - "eval_runtime": 34.6485, - "eval_samples_per_second": 1.039, + "eval_loss": 1.5489343404769897, + "eval_runtime": 34.8402, + "eval_samples_per_second": 1.033, "eval_steps_per_second": 0.144, "step": 20 }, { "epoch": 3.3333333333333335, - "grad_norm": 0.023157037794589996, - "learning_rate": 0.00015555555555555556, - "loss": 1.4025, + "grad_norm": 0.02501535415649414, + "learning_rate": 0.00013333333333333334, + "loss": 1.4152, "step": 30 }, { "epoch": 3.3333333333333335, - "eval_loss": 1.4176721572875977, - "eval_runtime": 34.5433, - "eval_samples_per_second": 1.042, - "eval_steps_per_second": 0.145, + "eval_loss": 1.4295110702514648, + "eval_runtime": 34.8537, + "eval_samples_per_second": 1.033, + "eval_steps_per_second": 0.143, "step": 30 }, { "epoch": 4.444444444444445, - "grad_norm": 0.021338749676942825, - "learning_rate": 0.00014074074074074076, - "loss": 1.285, + "grad_norm": 0.02104916237294674, + "learning_rate": 0.00011111111111111112, + "loss": 1.3068, "step": 40 }, { "epoch": 4.444444444444445, - "eval_loss": 1.3449772596359253, - "eval_runtime": 34.5594, - "eval_samples_per_second": 1.042, - "eval_steps_per_second": 0.145, + "eval_loss": 1.3598744869232178, + "eval_runtime": 35.0281, + "eval_samples_per_second": 1.028, + "eval_steps_per_second": 0.143, "step": 40 }, { "epoch": 5.555555555555555, - "grad_norm": 0.02489505708217621, - "learning_rate": 0.00012592592592592592, - "loss": 1.1687, + "grad_norm": 0.022395364940166473, + "learning_rate": 8.888888888888889e-05, + "loss": 1.2049, "step": 50 }, { "epoch": 5.555555555555555, - "eval_loss": 1.2951068878173828, - "eval_runtime": 34.5896, - "eval_samples_per_second": 1.041, - "eval_steps_per_second": 0.145, + "eval_loss": 1.3168741464614868, + "eval_runtime": 34.692, + "eval_samples_per_second": 1.038, + "eval_steps_per_second": 0.144, "step": 50 } ], "logging_steps": 10, - "max_steps": 135, + "max_steps": 90, "num_input_tokens_seen": 0, - "num_train_epochs": 15, + "num_train_epochs": 10, "save_steps": 10, "stateful_callbacks": { "EarlyStoppingCallback": { diff --git a/checkpoint-50/training_args.bin b/checkpoint-50/training_args.bin index db8c5d32a4f54d0e9ab4e7a985fbaee5d6701ecc..992e364d4b54f32a399ec3cd5f5f54c212ea0588 100644 --- a/checkpoint-50/training_args.bin +++ b/checkpoint-50/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:79c753ff1ba946038f620bad3e42a35ce583c9e8ed52b49fd22fb6614fea0f43 +oid sha256:2f0053334cab1bfd2838c507b240d05093bc205b1d5fbe6dc54a022fef5dcaa7 size 5112 diff --git a/checkpoint-60/README.md b/checkpoint-60/README.md index 2d1596ffe16e4d5bdcdf0e1d4322e6667af95962..d94fa0fb36d5aa76962cf3fda3ca0bfe4c2fa517 100644 --- a/checkpoint-60/README.md +++ b/checkpoint-60/README.md @@ -1,6 +1,6 @@ --- -library_name: peft base_model: TheBloke/Llama-2-7B-fp16 +library_name: peft --- # Model Card for Model ID diff --git a/checkpoint-60/adapter_config.json b/checkpoint-60/adapter_config.json index cbf93f2809e43fe18fd6ad23406293a68e7f5c98..539f4c41b2550fc30b9c2d0726f51adfa8e4b1e5 100644 --- a/checkpoint-60/adapter_config.json +++ b/checkpoint-60/adapter_config.json @@ -21,9 +21,9 @@ "revision": null, "target_modules": [ "k_proj", - "v_proj", + "q_proj", "o_proj", - "q_proj" + "v_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-60/adapter_model.safetensors b/checkpoint-60/adapter_model.safetensors index e572cbaec375d03394cd3c280f91fac11a3fc2b0..1f8e1524c1f516003349c944aa0c31a98ac40f6a 100644 --- a/checkpoint-60/adapter_model.safetensors +++ b/checkpoint-60/adapter_model.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:db2917762f8b15e3bf37ffb24d36fadc8281664684350c104e7663e270ea20dc +oid sha256:14ebbefdf71daa025996a412ce6c4f2fd2d5bbf084a4ee0f0ca1dc123cbb85e5 size 67143296 diff --git a/checkpoint-60/optimizer.pt b/checkpoint-60/optimizer.pt index 6056f4627bc13603632015486db18c9a4c53a353..d69bad85fc71c2ca5cb24f868de1721993458dde 100644 --- a/checkpoint-60/optimizer.pt +++ b/checkpoint-60/optimizer.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c6ee00713e8f75fec9ac92a487fe43f23b824b8125bd0f0413507f283ce38111 +oid sha256:5863ac3d6f865bddc72753e9a6db83e90985a3348345c91097785f539b2d743e size 134433530 diff --git a/checkpoint-60/scheduler.pt b/checkpoint-60/scheduler.pt index 26adfd2b99d76fb3c7ae41002cae269cd882ee38..cc6a804c402fc13ca8d00ce3c4186576d888c22b 100644 --- a/checkpoint-60/scheduler.pt +++ b/checkpoint-60/scheduler.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f2dae0587e75ddf657c6d2f1bfc77ac82e3e62449951f53cbb9a382f8e039b79 +oid sha256:a6dfdd6ca5178c66b713159a2bfe5731fea568ef91adf9d3f8039a74c6ff0f6b size 1064 diff --git a/checkpoint-60/trainer_state.json b/checkpoint-60/trainer_state.json index 7a12784894d4693dd4e1e7b8da2c088a7e613802..d67f979705af37a8eb8025b225e52fc1fa9c702d 100644 --- a/checkpoint-60/trainer_state.json +++ b/checkpoint-60/trainer_state.json @@ -1,5 +1,5 @@ { - "best_metric": 1.2674343585968018, + "best_metric": 1.2939578294754028, "best_model_checkpoint": "/kaggle/working/checkpoint-60", "epoch": 6.666666666666667, "eval_steps": 10, @@ -10,99 +10,99 @@ "log_history": [ { "epoch": 1.1111111111111112, - "grad_norm": 0.022282764315605164, - "learning_rate": 0.0001851851851851852, - "loss": 2.0424, + "grad_norm": 0.02217627689242363, + "learning_rate": 0.00017777777777777779, + "loss": 2.0442, "step": 10 }, { "epoch": 1.1111111111111112, - "eval_loss": 1.733155369758606, - "eval_runtime": 34.5543, - "eval_samples_per_second": 1.042, - "eval_steps_per_second": 0.145, + "eval_loss": 1.737181544303894, + "eval_runtime": 35.1318, + "eval_samples_per_second": 1.025, + "eval_steps_per_second": 0.142, "step": 10 }, { "epoch": 2.2222222222222223, - "grad_norm": 0.018981408327817917, - "learning_rate": 0.00017037037037037037, - "loss": 1.6072, + "grad_norm": 0.0346713550388813, + "learning_rate": 0.00015555555555555556, + "loss": 1.6131, "step": 20 }, { "epoch": 2.2222222222222223, - "eval_loss": 1.5428930521011353, - "eval_runtime": 34.6485, - "eval_samples_per_second": 1.039, + "eval_loss": 1.5489343404769897, + "eval_runtime": 34.8402, + "eval_samples_per_second": 1.033, "eval_steps_per_second": 0.144, "step": 20 }, { "epoch": 3.3333333333333335, - "grad_norm": 0.023157037794589996, - "learning_rate": 0.00015555555555555556, - "loss": 1.4025, + "grad_norm": 0.02501535415649414, + "learning_rate": 0.00013333333333333334, + "loss": 1.4152, "step": 30 }, { "epoch": 3.3333333333333335, - "eval_loss": 1.4176721572875977, - "eval_runtime": 34.5433, - "eval_samples_per_second": 1.042, - "eval_steps_per_second": 0.145, + "eval_loss": 1.4295110702514648, + "eval_runtime": 34.8537, + "eval_samples_per_second": 1.033, + "eval_steps_per_second": 0.143, "step": 30 }, { "epoch": 4.444444444444445, - "grad_norm": 0.021338749676942825, - "learning_rate": 0.00014074074074074076, - "loss": 1.285, + "grad_norm": 0.02104916237294674, + "learning_rate": 0.00011111111111111112, + "loss": 1.3068, "step": 40 }, { "epoch": 4.444444444444445, - "eval_loss": 1.3449772596359253, - "eval_runtime": 34.5594, - "eval_samples_per_second": 1.042, - "eval_steps_per_second": 0.145, + "eval_loss": 1.3598744869232178, + "eval_runtime": 35.0281, + "eval_samples_per_second": 1.028, + "eval_steps_per_second": 0.143, "step": 40 }, { "epoch": 5.555555555555555, - "grad_norm": 0.02489505708217621, - "learning_rate": 0.00012592592592592592, - "loss": 1.1687, + "grad_norm": 0.022395364940166473, + "learning_rate": 8.888888888888889e-05, + "loss": 1.2049, "step": 50 }, { "epoch": 5.555555555555555, - "eval_loss": 1.2951068878173828, - "eval_runtime": 34.5896, - "eval_samples_per_second": 1.041, - "eval_steps_per_second": 0.145, + "eval_loss": 1.3168741464614868, + "eval_runtime": 34.692, + "eval_samples_per_second": 1.038, + "eval_steps_per_second": 0.144, "step": 50 }, { "epoch": 6.666666666666667, - "grad_norm": 0.028962766751646996, - "learning_rate": 0.00011111111111111112, - "loss": 1.0521, + "grad_norm": 0.02603345364332199, + "learning_rate": 6.666666666666667e-05, + "loss": 1.1086, "step": 60 }, { "epoch": 6.666666666666667, - "eval_loss": 1.2674343585968018, - "eval_runtime": 34.5586, - "eval_samples_per_second": 1.042, - "eval_steps_per_second": 0.145, + "eval_loss": 1.2939578294754028, + "eval_runtime": 34.6444, + "eval_samples_per_second": 1.039, + "eval_steps_per_second": 0.144, "step": 60 } ], "logging_steps": 10, - "max_steps": 135, + "max_steps": 90, "num_input_tokens_seen": 0, - "num_train_epochs": 15, + "num_train_epochs": 10, "save_steps": 10, "stateful_callbacks": { "EarlyStoppingCallback": { diff --git a/checkpoint-60/training_args.bin b/checkpoint-60/training_args.bin index db8c5d32a4f54d0e9ab4e7a985fbaee5d6701ecc..992e364d4b54f32a399ec3cd5f5f54c212ea0588 100644 --- a/checkpoint-60/training_args.bin +++ b/checkpoint-60/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:79c753ff1ba946038f620bad3e42a35ce583c9e8ed52b49fd22fb6614fea0f43 +oid sha256:2f0053334cab1bfd2838c507b240d05093bc205b1d5fbe6dc54a022fef5dcaa7 size 5112 diff --git a/checkpoint-70/README.md b/checkpoint-70/README.md index 2d1596ffe16e4d5bdcdf0e1d4322e6667af95962..d94fa0fb36d5aa76962cf3fda3ca0bfe4c2fa517 100644 --- a/checkpoint-70/README.md +++ b/checkpoint-70/README.md @@ -1,6 +1,6 @@ --- -library_name: peft base_model: TheBloke/Llama-2-7B-fp16 +library_name: peft --- # Model Card for Model ID diff --git a/checkpoint-70/adapter_config.json b/checkpoint-70/adapter_config.json index cbf93f2809e43fe18fd6ad23406293a68e7f5c98..539f4c41b2550fc30b9c2d0726f51adfa8e4b1e5 100644 --- a/checkpoint-70/adapter_config.json +++ b/checkpoint-70/adapter_config.json @@ -21,9 +21,9 @@ "revision": null, "target_modules": [ "k_proj", - "v_proj", + "q_proj", "o_proj", - "q_proj" + "v_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-70/adapter_model.safetensors b/checkpoint-70/adapter_model.safetensors index 7ed56b59d76b6a1da2bf2568dbfab45abde9d58f..d37ae25ce097d1ffcfe43a98645a9931dd503a17 100644 --- a/checkpoint-70/adapter_model.safetensors +++ b/checkpoint-70/adapter_model.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cf3c37297ed626b47ab9c41ebb2a26326a24e606918ef7c7fd629793854a6799 +oid sha256:0c44abb9173442b0bc413ba469da95e7be794812df0c2a2c16e54fc301511e3e size 67143296 diff --git a/checkpoint-70/optimizer.pt b/checkpoint-70/optimizer.pt index 28208764b805c1768aa12d977465b294ef889535..bffa2b54a8c022331899d9d0ede4fa6b5c991b85 100644 --- a/checkpoint-70/optimizer.pt +++ b/checkpoint-70/optimizer.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c4df5d5144e456b9ea6bf7bd481d07a757eea5bf300085855555e34b8b031648 +oid sha256:2df92dd99064eb6aa6542b203055cf8cc892cff2f30a210807b7667c96cedc3a size 134433530 diff --git a/checkpoint-70/scheduler.pt b/checkpoint-70/scheduler.pt index 7d715f69ec0008c942a0c82d8c13ce801a3ab1c3..c0754c9b1731ec8dc0ccf94773cb0b29b5676b39 100644 --- a/checkpoint-70/scheduler.pt +++ b/checkpoint-70/scheduler.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0b4acdcca4bce1d839ad3b2f3830a589ce711414ff4548cbd4704149e66a014f +oid sha256:2b55d3cbe104822729f4f45e02a6c248fb8a4cb356c229f5c93e65066ff6a397 size 1064 diff --git a/checkpoint-70/trainer_state.json b/checkpoint-70/trainer_state.json index 3a48bb1be57349a537b049969fba97285f69e7b7..f237dd39f79c989b86e5ff97969592a63cb20742 100644 --- a/checkpoint-70/trainer_state.json +++ b/checkpoint-70/trainer_state.json @@ -1,5 +1,5 @@ { - "best_metric": 1.2424466609954834, + "best_metric": 1.2787070274353027, "best_model_checkpoint": "/kaggle/working/checkpoint-70", "epoch": 7.777777777777778, "eval_steps": 10, @@ -10,114 +10,114 @@ "log_history": [ { "epoch": 1.1111111111111112, - "grad_norm": 0.022282764315605164, - "learning_rate": 0.0001851851851851852, - "loss": 2.0424, + "grad_norm": 0.02217627689242363, + "learning_rate": 0.00017777777777777779, + "loss": 2.0442, "step": 10 }, { "epoch": 1.1111111111111112, - "eval_loss": 1.733155369758606, - "eval_runtime": 34.5543, - "eval_samples_per_second": 1.042, - "eval_steps_per_second": 0.145, + "eval_loss": 1.737181544303894, + "eval_runtime": 35.1318, + "eval_samples_per_second": 1.025, + "eval_steps_per_second": 0.142, "step": 10 }, { "epoch": 2.2222222222222223, - "grad_norm": 0.018981408327817917, - "learning_rate": 0.00017037037037037037, - "loss": 1.6072, + "grad_norm": 0.0346713550388813, + "learning_rate": 0.00015555555555555556, + "loss": 1.6131, "step": 20 }, { "epoch": 2.2222222222222223, - "eval_loss": 1.5428930521011353, - "eval_runtime": 34.6485, - "eval_samples_per_second": 1.039, + "eval_loss": 1.5489343404769897, + "eval_runtime": 34.8402, + "eval_samples_per_second": 1.033, "eval_steps_per_second": 0.144, "step": 20 }, { "epoch": 3.3333333333333335, - "grad_norm": 0.023157037794589996, - "learning_rate": 0.00015555555555555556, - "loss": 1.4025, + "grad_norm": 0.02501535415649414, + "learning_rate": 0.00013333333333333334, + "loss": 1.4152, "step": 30 }, { "epoch": 3.3333333333333335, - "eval_loss": 1.4176721572875977, - "eval_runtime": 34.5433, - "eval_samples_per_second": 1.042, - "eval_steps_per_second": 0.145, + "eval_loss": 1.4295110702514648, + "eval_runtime": 34.8537, + "eval_samples_per_second": 1.033, + "eval_steps_per_second": 0.143, "step": 30 }, { "epoch": 4.444444444444445, - "grad_norm": 0.021338749676942825, - "learning_rate": 0.00014074074074074076, - "loss": 1.285, + "grad_norm": 0.02104916237294674, + "learning_rate": 0.00011111111111111112, + "loss": 1.3068, "step": 40 }, { "epoch": 4.444444444444445, - "eval_loss": 1.3449772596359253, - "eval_runtime": 34.5594, - "eval_samples_per_second": 1.042, - "eval_steps_per_second": 0.145, + "eval_loss": 1.3598744869232178, + "eval_runtime": 35.0281, + "eval_samples_per_second": 1.028, + "eval_steps_per_second": 0.143, "step": 40 }, { "epoch": 5.555555555555555, - "grad_norm": 0.02489505708217621, - "learning_rate": 0.00012592592592592592, - "loss": 1.1687, + "grad_norm": 0.022395364940166473, + "learning_rate": 8.888888888888889e-05, + "loss": 1.2049, "step": 50 }, { "epoch": 5.555555555555555, - "eval_loss": 1.2951068878173828, - "eval_runtime": 34.5896, - "eval_samples_per_second": 1.041, - "eval_steps_per_second": 0.145, + "eval_loss": 1.3168741464614868, + "eval_runtime": 34.692, + "eval_samples_per_second": 1.038, + "eval_steps_per_second": 0.144, "step": 50 }, { "epoch": 6.666666666666667, - "grad_norm": 0.028962766751646996, - "learning_rate": 0.00011111111111111112, - "loss": 1.0521, + "grad_norm": 0.02603345364332199, + "learning_rate": 6.666666666666667e-05, + "loss": 1.1086, "step": 60 }, { "epoch": 6.666666666666667, - "eval_loss": 1.2674343585968018, - "eval_runtime": 34.5586, - "eval_samples_per_second": 1.042, - "eval_steps_per_second": 0.145, + "eval_loss": 1.2939578294754028, + "eval_runtime": 34.6444, + "eval_samples_per_second": 1.039, + "eval_steps_per_second": 0.144, "step": 60 }, { "epoch": 7.777777777777778, - "grad_norm": 0.033917125314474106, - "learning_rate": 9.62962962962963e-05, - "loss": 0.9885, + "grad_norm": 0.02798735536634922, + "learning_rate": 4.4444444444444447e-05, + "loss": 1.0716, "step": 70 }, { "epoch": 7.777777777777778, - "eval_loss": 1.2424466609954834, - "eval_runtime": 34.5412, - "eval_samples_per_second": 1.042, - "eval_steps_per_second": 0.145, + "eval_loss": 1.2787070274353027, + "eval_runtime": 34.7046, + "eval_samples_per_second": 1.037, + "eval_steps_per_second": 0.144, "step": 70 } ], "logging_steps": 10, - "max_steps": 135, + "max_steps": 90, "num_input_tokens_seen": 0, - "num_train_epochs": 15, + "num_train_epochs": 10, "save_steps": 10, "stateful_callbacks": { "EarlyStoppingCallback": { diff --git a/checkpoint-70/training_args.bin b/checkpoint-70/training_args.bin index db8c5d32a4f54d0e9ab4e7a985fbaee5d6701ecc..992e364d4b54f32a399ec3cd5f5f54c212ea0588 100644 --- a/checkpoint-70/training_args.bin +++ b/checkpoint-70/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:79c753ff1ba946038f620bad3e42a35ce583c9e8ed52b49fd22fb6614fea0f43 +oid sha256:2f0053334cab1bfd2838c507b240d05093bc205b1d5fbe6dc54a022fef5dcaa7 size 5112 diff --git a/checkpoint-80/README.md b/checkpoint-80/README.md index 2d1596ffe16e4d5bdcdf0e1d4322e6667af95962..d94fa0fb36d5aa76962cf3fda3ca0bfe4c2fa517 100644 --- a/checkpoint-80/README.md +++ b/checkpoint-80/README.md @@ -1,6 +1,6 @@ --- -library_name: peft base_model: TheBloke/Llama-2-7B-fp16 +library_name: peft --- # Model Card for Model ID diff --git a/checkpoint-80/adapter_config.json b/checkpoint-80/adapter_config.json index cbf93f2809e43fe18fd6ad23406293a68e7f5c98..539f4c41b2550fc30b9c2d0726f51adfa8e4b1e5 100644 --- a/checkpoint-80/adapter_config.json +++ b/checkpoint-80/adapter_config.json @@ -21,9 +21,9 @@ "revision": null, "target_modules": [ "k_proj", - "v_proj", + "q_proj", "o_proj", - "q_proj" + "v_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-80/adapter_model.safetensors b/checkpoint-80/adapter_model.safetensors index 6186d5290dd28ae7e99b0fe968a081ab6d21d5b9..da6da76e3af83770baf51b8d9c83a5a2b88ccd17 100644 --- a/checkpoint-80/adapter_model.safetensors +++ b/checkpoint-80/adapter_model.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8fa6fb27f2616ec681d67ea4bdbe8a2579fb38adfa9fe8d06fc9c19524e1da7b +oid sha256:ee04952462bb07ccc849271f7f78d75c5f8afc6c7ff3a1fa361acabf44fc0ed6 size 67143296 diff --git a/checkpoint-80/optimizer.pt b/checkpoint-80/optimizer.pt index 6eb9649fce4390a4b3a402652bfce39cc3c173ea..51b090c22dc8b998c0fedccdeba50c58dbd0edb2 100644 --- a/checkpoint-80/optimizer.pt +++ b/checkpoint-80/optimizer.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3ef989a76ea9a861bed593cbd65072f76fa28bbc0ec71984d26dd8c6e0775b98 +oid sha256:45c3bb44731e3776f6a34962f3d7fff840e05a3c8f5d4ae210cd851b70ac0818 size 134433530 diff --git a/checkpoint-80/scheduler.pt b/checkpoint-80/scheduler.pt index 0440fcc5c6c0ee35644344e92c7f5db3e29f3de6..c1333b2d0a9597978117f1289678ae5312068439 100644 --- a/checkpoint-80/scheduler.pt +++ b/checkpoint-80/scheduler.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9b82529da49a74f66d08569617b44d465bfa94ebb75128eba7c218e73a755220 +oid sha256:41273c1b8d35bfdc89d8195f07d765f03030886c79bd8a46673f085df81965d2 size 1064 diff --git a/checkpoint-80/trainer_state.json b/checkpoint-80/trainer_state.json index 9f89f0b6522c39b15eff7ee5969d94a70d12d7ba..1e7673eb76c2be46da58c796dc4c4042fc22719c 100644 --- a/checkpoint-80/trainer_state.json +++ b/checkpoint-80/trainer_state.json @@ -1,5 +1,5 @@ { - "best_metric": 1.2252851724624634, + "best_metric": 1.2677136659622192, "best_model_checkpoint": "/kaggle/working/checkpoint-80", "epoch": 8.88888888888889, "eval_steps": 10, @@ -10,129 +10,129 @@ "log_history": [ { "epoch": 1.1111111111111112, - "grad_norm": 0.022282764315605164, - "learning_rate": 0.0001851851851851852, - "loss": 2.0424, + "grad_norm": 0.02217627689242363, + "learning_rate": 0.00017777777777777779, + "loss": 2.0442, "step": 10 }, { "epoch": 1.1111111111111112, - "eval_loss": 1.733155369758606, - "eval_runtime": 34.5543, - "eval_samples_per_second": 1.042, - "eval_steps_per_second": 0.145, + "eval_loss": 1.737181544303894, + "eval_runtime": 35.1318, + "eval_samples_per_second": 1.025, + "eval_steps_per_second": 0.142, "step": 10 }, { "epoch": 2.2222222222222223, - "grad_norm": 0.018981408327817917, - "learning_rate": 0.00017037037037037037, - "loss": 1.6072, + "grad_norm": 0.0346713550388813, + "learning_rate": 0.00015555555555555556, + "loss": 1.6131, "step": 20 }, { "epoch": 2.2222222222222223, - "eval_loss": 1.5428930521011353, - "eval_runtime": 34.6485, - "eval_samples_per_second": 1.039, + "eval_loss": 1.5489343404769897, + "eval_runtime": 34.8402, + "eval_samples_per_second": 1.033, "eval_steps_per_second": 0.144, "step": 20 }, { "epoch": 3.3333333333333335, - "grad_norm": 0.023157037794589996, - "learning_rate": 0.00015555555555555556, - "loss": 1.4025, + "grad_norm": 0.02501535415649414, + "learning_rate": 0.00013333333333333334, + "loss": 1.4152, "step": 30 }, { "epoch": 3.3333333333333335, - "eval_loss": 1.4176721572875977, - "eval_runtime": 34.5433, - "eval_samples_per_second": 1.042, - "eval_steps_per_second": 0.145, + "eval_loss": 1.4295110702514648, + "eval_runtime": 34.8537, + "eval_samples_per_second": 1.033, + "eval_steps_per_second": 0.143, "step": 30 }, { "epoch": 4.444444444444445, - "grad_norm": 0.021338749676942825, - "learning_rate": 0.00014074074074074076, - "loss": 1.285, + "grad_norm": 0.02104916237294674, + "learning_rate": 0.00011111111111111112, + "loss": 1.3068, "step": 40 }, { "epoch": 4.444444444444445, - "eval_loss": 1.3449772596359253, - "eval_runtime": 34.5594, - "eval_samples_per_second": 1.042, - "eval_steps_per_second": 0.145, + "eval_loss": 1.3598744869232178, + "eval_runtime": 35.0281, + "eval_samples_per_second": 1.028, + "eval_steps_per_second": 0.143, "step": 40 }, { "epoch": 5.555555555555555, - "grad_norm": 0.02489505708217621, - "learning_rate": 0.00012592592592592592, - "loss": 1.1687, + "grad_norm": 0.022395364940166473, + "learning_rate": 8.888888888888889e-05, + "loss": 1.2049, "step": 50 }, { "epoch": 5.555555555555555, - "eval_loss": 1.2951068878173828, - "eval_runtime": 34.5896, - "eval_samples_per_second": 1.041, - "eval_steps_per_second": 0.145, + "eval_loss": 1.3168741464614868, + "eval_runtime": 34.692, + "eval_samples_per_second": 1.038, + "eval_steps_per_second": 0.144, "step": 50 }, { "epoch": 6.666666666666667, - "grad_norm": 0.028962766751646996, - "learning_rate": 0.00011111111111111112, - "loss": 1.0521, + "grad_norm": 0.02603345364332199, + "learning_rate": 6.666666666666667e-05, + "loss": 1.1086, "step": 60 }, { "epoch": 6.666666666666667, - "eval_loss": 1.2674343585968018, - "eval_runtime": 34.5586, - "eval_samples_per_second": 1.042, - "eval_steps_per_second": 0.145, + "eval_loss": 1.2939578294754028, + "eval_runtime": 34.6444, + "eval_samples_per_second": 1.039, + "eval_steps_per_second": 0.144, "step": 60 }, { "epoch": 7.777777777777778, - "grad_norm": 0.033917125314474106, - "learning_rate": 9.62962962962963e-05, - "loss": 0.9885, + "grad_norm": 0.02798735536634922, + "learning_rate": 4.4444444444444447e-05, + "loss": 1.0716, "step": 70 }, { "epoch": 7.777777777777778, - "eval_loss": 1.2424466609954834, - "eval_runtime": 34.5412, - "eval_samples_per_second": 1.042, - "eval_steps_per_second": 0.145, + "eval_loss": 1.2787070274353027, + "eval_runtime": 34.7046, + "eval_samples_per_second": 1.037, + "eval_steps_per_second": 0.144, "step": 70 }, { "epoch": 8.88888888888889, - "grad_norm": 0.03393130004405975, - "learning_rate": 8.148148148148148e-05, - "loss": 0.8784, + "grad_norm": 0.028509726747870445, + "learning_rate": 2.2222222222222223e-05, + "loss": 1.0051, "step": 80 }, { "epoch": 8.88888888888889, - "eval_loss": 1.2252851724624634, - "eval_runtime": 34.58, - "eval_samples_per_second": 1.041, - "eval_steps_per_second": 0.145, + "eval_loss": 1.2677136659622192, + "eval_runtime": 34.6744, + "eval_samples_per_second": 1.038, + "eval_steps_per_second": 0.144, "step": 80 } ], "logging_steps": 10, - "max_steps": 135, + "max_steps": 90, "num_input_tokens_seen": 0, - "num_train_epochs": 15, + "num_train_epochs": 10, "save_steps": 10, "stateful_callbacks": { "EarlyStoppingCallback": { diff --git a/checkpoint-80/training_args.bin b/checkpoint-80/training_args.bin index db8c5d32a4f54d0e9ab4e7a985fbaee5d6701ecc..992e364d4b54f32a399ec3cd5f5f54c212ea0588 100644 --- a/checkpoint-80/training_args.bin +++ b/checkpoint-80/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:79c753ff1ba946038f620bad3e42a35ce583c9e8ed52b49fd22fb6614fea0f43 +oid sha256:2f0053334cab1bfd2838c507b240d05093bc205b1d5fbe6dc54a022fef5dcaa7 size 5112 diff --git a/checkpoint-90/README.md b/checkpoint-90/README.md index 2d1596ffe16e4d5bdcdf0e1d4322e6667af95962..d94fa0fb36d5aa76962cf3fda3ca0bfe4c2fa517 100644 --- a/checkpoint-90/README.md +++ b/checkpoint-90/README.md @@ -1,6 +1,6 @@ --- -library_name: peft base_model: TheBloke/Llama-2-7B-fp16 +library_name: peft --- # Model Card for Model ID diff --git a/checkpoint-90/adapter_config.json b/checkpoint-90/adapter_config.json index cbf93f2809e43fe18fd6ad23406293a68e7f5c98..539f4c41b2550fc30b9c2d0726f51adfa8e4b1e5 100644 --- a/checkpoint-90/adapter_config.json +++ b/checkpoint-90/adapter_config.json @@ -21,9 +21,9 @@ "revision": null, "target_modules": [ "k_proj", - "v_proj", + "q_proj", "o_proj", - "q_proj" + "v_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-90/adapter_model.safetensors b/checkpoint-90/adapter_model.safetensors index 464aecd7c193abd91aa5ac6a99ad4747551b506f..aad60b0963bb0817ddff00ad978492cc2241a8a2 100644 --- a/checkpoint-90/adapter_model.safetensors +++ b/checkpoint-90/adapter_model.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b9c5a0c100f8bc851b10aa9fa27f71bf6aa477d8c0d7b7747dc10050dbdbe8a9 +oid sha256:6bdfd134cf3b5e167c3aa127bf57024a3e8ff71b6b0ea16d5493a51a01d7e317 size 67143296 diff --git a/checkpoint-90/optimizer.pt b/checkpoint-90/optimizer.pt index 0d21653ed911ba9a59aa948e3d510b5ca26671a9..4df135bf4da4f6ccc5d4d149b95f13a983128c6c 100644 --- a/checkpoint-90/optimizer.pt +++ b/checkpoint-90/optimizer.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:56a16c09a2c26f51affd932a4f61ddef77f90f2764342dfcc95095b133132790 +oid sha256:d0b8a18cc875405991f47cfb39631df339f1779ad23225cf25c95a999ce4fce8 size 134433530 diff --git a/checkpoint-90/scheduler.pt b/checkpoint-90/scheduler.pt index 96950aa94dbaaaa48fc31ffce5aa897ea65e909c..d64adf52b600194652799bb966d26d4c3c3f82a0 100644 --- a/checkpoint-90/scheduler.pt +++ b/checkpoint-90/scheduler.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:beb0291b44310791547f87f6141cea4568622e9ffdad94f4d68fcee857c2fabe +oid sha256:cf6a9a1e39c0655c6941309388d2a028b5b8dbbca031ca2500bdfcdc9f2c01aa size 1064 diff --git a/checkpoint-90/trainer_state.json b/checkpoint-90/trainer_state.json index 85a9b74cd54c9d40c81660a5a9479c8041b34cda..73dbd767f4448771ba9b48d6966e9f9712de9b40 100644 --- a/checkpoint-90/trainer_state.json +++ b/checkpoint-90/trainer_state.json @@ -1,5 +1,5 @@ { - "best_metric": 1.2115424871444702, + "best_metric": 1.2664015293121338, "best_model_checkpoint": "/kaggle/working/checkpoint-90", "epoch": 10.0, "eval_steps": 10, @@ -10,144 +10,144 @@ "log_history": [ { "epoch": 1.1111111111111112, - "grad_norm": 0.022282764315605164, - "learning_rate": 0.0001851851851851852, - "loss": 2.0424, + "grad_norm": 0.02217627689242363, + "learning_rate": 0.00017777777777777779, + "loss": 2.0442, "step": 10 }, { "epoch": 1.1111111111111112, - "eval_loss": 1.733155369758606, - "eval_runtime": 34.5543, - "eval_samples_per_second": 1.042, - "eval_steps_per_second": 0.145, + "eval_loss": 1.737181544303894, + "eval_runtime": 35.1318, + "eval_samples_per_second": 1.025, + "eval_steps_per_second": 0.142, "step": 10 }, { "epoch": 2.2222222222222223, - "grad_norm": 0.018981408327817917, - "learning_rate": 0.00017037037037037037, - "loss": 1.6072, + "grad_norm": 0.0346713550388813, + "learning_rate": 0.00015555555555555556, + "loss": 1.6131, "step": 20 }, { "epoch": 2.2222222222222223, - "eval_loss": 1.5428930521011353, - "eval_runtime": 34.6485, - "eval_samples_per_second": 1.039, + "eval_loss": 1.5489343404769897, + "eval_runtime": 34.8402, + "eval_samples_per_second": 1.033, "eval_steps_per_second": 0.144, "step": 20 }, { "epoch": 3.3333333333333335, - "grad_norm": 0.023157037794589996, - "learning_rate": 0.00015555555555555556, - "loss": 1.4025, + "grad_norm": 0.02501535415649414, + "learning_rate": 0.00013333333333333334, + "loss": 1.4152, "step": 30 }, { "epoch": 3.3333333333333335, - "eval_loss": 1.4176721572875977, - "eval_runtime": 34.5433, - "eval_samples_per_second": 1.042, - "eval_steps_per_second": 0.145, + "eval_loss": 1.4295110702514648, + "eval_runtime": 34.8537, + "eval_samples_per_second": 1.033, + "eval_steps_per_second": 0.143, "step": 30 }, { "epoch": 4.444444444444445, - "grad_norm": 0.021338749676942825, - "learning_rate": 0.00014074074074074076, - "loss": 1.285, + "grad_norm": 0.02104916237294674, + "learning_rate": 0.00011111111111111112, + "loss": 1.3068, "step": 40 }, { "epoch": 4.444444444444445, - "eval_loss": 1.3449772596359253, - "eval_runtime": 34.5594, - "eval_samples_per_second": 1.042, - "eval_steps_per_second": 0.145, + "eval_loss": 1.3598744869232178, + "eval_runtime": 35.0281, + "eval_samples_per_second": 1.028, + "eval_steps_per_second": 0.143, "step": 40 }, { "epoch": 5.555555555555555, - "grad_norm": 0.02489505708217621, - "learning_rate": 0.00012592592592592592, - "loss": 1.1687, + "grad_norm": 0.022395364940166473, + "learning_rate": 8.888888888888889e-05, + "loss": 1.2049, "step": 50 }, { "epoch": 5.555555555555555, - "eval_loss": 1.2951068878173828, - "eval_runtime": 34.5896, - "eval_samples_per_second": 1.041, - "eval_steps_per_second": 0.145, + "eval_loss": 1.3168741464614868, + "eval_runtime": 34.692, + "eval_samples_per_second": 1.038, + "eval_steps_per_second": 0.144, "step": 50 }, { "epoch": 6.666666666666667, - "grad_norm": 0.028962766751646996, - "learning_rate": 0.00011111111111111112, - "loss": 1.0521, + "grad_norm": 0.02603345364332199, + "learning_rate": 6.666666666666667e-05, + "loss": 1.1086, "step": 60 }, { "epoch": 6.666666666666667, - "eval_loss": 1.2674343585968018, - "eval_runtime": 34.5586, - "eval_samples_per_second": 1.042, - "eval_steps_per_second": 0.145, + "eval_loss": 1.2939578294754028, + "eval_runtime": 34.6444, + "eval_samples_per_second": 1.039, + "eval_steps_per_second": 0.144, "step": 60 }, { "epoch": 7.777777777777778, - "grad_norm": 0.033917125314474106, - "learning_rate": 9.62962962962963e-05, - "loss": 0.9885, + "grad_norm": 0.02798735536634922, + "learning_rate": 4.4444444444444447e-05, + "loss": 1.0716, "step": 70 }, { "epoch": 7.777777777777778, - "eval_loss": 1.2424466609954834, - "eval_runtime": 34.5412, - "eval_samples_per_second": 1.042, - "eval_steps_per_second": 0.145, + "eval_loss": 1.2787070274353027, + "eval_runtime": 34.7046, + "eval_samples_per_second": 1.037, + "eval_steps_per_second": 0.144, "step": 70 }, { "epoch": 8.88888888888889, - "grad_norm": 0.03393130004405975, - "learning_rate": 8.148148148148148e-05, - "loss": 0.8784, + "grad_norm": 0.028509726747870445, + "learning_rate": 2.2222222222222223e-05, + "loss": 1.0051, "step": 80 }, { "epoch": 8.88888888888889, - "eval_loss": 1.2252851724624634, - "eval_runtime": 34.58, - "eval_samples_per_second": 1.041, - "eval_steps_per_second": 0.145, + "eval_loss": 1.2677136659622192, + "eval_runtime": 34.6744, + "eval_samples_per_second": 1.038, + "eval_steps_per_second": 0.144, "step": 80 }, { "epoch": 10.0, - "grad_norm": 0.04081139340996742, - "learning_rate": 6.666666666666667e-05, - "loss": 0.8154, + "grad_norm": 0.03036084771156311, + "learning_rate": 0.0, + "loss": 0.9939, "step": 90 }, { "epoch": 10.0, - "eval_loss": 1.2115424871444702, - "eval_runtime": 34.5784, - "eval_samples_per_second": 1.041, - "eval_steps_per_second": 0.145, + "eval_loss": 1.2664015293121338, + "eval_runtime": 34.6396, + "eval_samples_per_second": 1.039, + "eval_steps_per_second": 0.144, "step": 90 } ], "logging_steps": 10, - "max_steps": 135, + "max_steps": 90, "num_input_tokens_seen": 0, - "num_train_epochs": 15, + "num_train_epochs": 10, "save_steps": 10, "stateful_callbacks": { "EarlyStoppingCallback": { @@ -165,7 +165,7 @@ "should_evaluate": false, "should_log": false, "should_save": true, - "should_training_stop": false + "should_training_stop": true }, "attributes": {} } diff --git a/checkpoint-90/training_args.bin b/checkpoint-90/training_args.bin index db8c5d32a4f54d0e9ab4e7a985fbaee5d6701ecc..992e364d4b54f32a399ec3cd5f5f54c212ea0588 100644 --- a/checkpoint-90/training_args.bin +++ b/checkpoint-90/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:79c753ff1ba946038f620bad3e42a35ce583c9e8ed52b49fd22fb6614fea0f43 +oid sha256:2f0053334cab1bfd2838c507b240d05093bc205b1d5fbe6dc54a022fef5dcaa7 size 5112 diff --git a/config.json b/config.json index 80afc097aeac0205c9feeb402b87040f5d89be74..258a5dd33a9cd50769867ae18e3f1c8164c04074 100644 --- a/config.json +++ b/config.json @@ -39,7 +39,7 @@ "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", - "transformers_version": "4.41.2", + "transformers_version": "4.42.3", "use_cache": true, "vocab_size": 32000 } diff --git a/runs/Aug04_20-10-13_a1e7c179e62a/events.out.tfevents.1722802215.a1e7c179e62a.34.0 b/runs/Aug04_20-10-13_a1e7c179e62a/events.out.tfevents.1722802215.a1e7c179e62a.34.0 new file mode 100644 index 0000000000000000000000000000000000000000..c457c886410a5d8b3b1e6d421359e7e6d3b6845a --- /dev/null +++ b/runs/Aug04_20-10-13_a1e7c179e62a/events.out.tfevents.1722802215.a1e7c179e62a.34.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15748826c7dc1fb9aaa4deb066901ddbbb01ad1047b96515ddaef526591d4571 +size 9996