diff --git a/README.md b/README.md index d94fa0fb36d5aa76962cf3fda3ca0bfe4c2fa517..2d1596ffe16e4d5bdcdf0e1d4322e6667af95962 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ --- -base_model: TheBloke/Llama-2-7B-fp16 library_name: peft +base_model: TheBloke/Llama-2-7B-fp16 --- # Model Card for Model ID diff --git a/adapter_config.json b/adapter_config.json index 923a080092a0575b2d3304503832fcf7c77462f4..cbf93f2809e43fe18fd6ad23406293a68e7f5c98 100644 --- a/adapter_config.json +++ b/adapter_config.json @@ -20,10 +20,10 @@ "rank_pattern": {}, "revision": null, "target_modules": [ + "k_proj", "v_proj", - "q_proj", "o_proj", - "k_proj" + "q_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/adapter_model.safetensors b/adapter_model.safetensors index 06513132c03fe764076772f7ca590751400c2853..b4f9fb90e930a7899b68e1787dce408f742c6dde 100644 --- a/adapter_model.safetensors +++ b/adapter_model.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:06e781f63931f2a767880f6d4b91f140eb5043f4a642a1a68d8adbb81bbcae9d +oid sha256:58a8f71955b57e8e1082d6f6f180646592764af968e225bda2b732733e64a34c size 67143296 diff --git a/checkpoint-10/README.md b/checkpoint-10/README.md index d94fa0fb36d5aa76962cf3fda3ca0bfe4c2fa517..2d1596ffe16e4d5bdcdf0e1d4322e6667af95962 100644 --- a/checkpoint-10/README.md +++ b/checkpoint-10/README.md @@ -1,6 +1,6 @@ --- -base_model: TheBloke/Llama-2-7B-fp16 library_name: peft +base_model: TheBloke/Llama-2-7B-fp16 --- # Model Card for Model ID diff --git a/checkpoint-10/adapter_config.json b/checkpoint-10/adapter_config.json index 923a080092a0575b2d3304503832fcf7c77462f4..cbf93f2809e43fe18fd6ad23406293a68e7f5c98 100644 --- a/checkpoint-10/adapter_config.json +++ b/checkpoint-10/adapter_config.json @@ -20,10 +20,10 @@ "rank_pattern": {}, "revision": null, "target_modules": [ + "k_proj", "v_proj", - "q_proj", "o_proj", - "k_proj" + "q_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-10/adapter_model.safetensors b/checkpoint-10/adapter_model.safetensors index 3602a4d36d9e2b76fde36e4378e7ca0bb641aa41..5c2ebe7a9d782b318f37eeb0942020de6ef351c9 100644 --- a/checkpoint-10/adapter_model.safetensors +++ b/checkpoint-10/adapter_model.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:83bdb12be1e8a3735b34051dd69cbe3689aa855355eecde29c57851904d81ecf +oid sha256:232e478a97bd908880b31806be86d4debfd0492ee0eb83ba509e3293c6c3565a size 67143296 diff --git a/checkpoint-10/optimizer.pt b/checkpoint-10/optimizer.pt index fc8e9df2aaea2f8b4c4b743a6915e923c7f3e01a..f6aa09b5a5c07210fa7f9655cd844f149c370ddd 100644 --- a/checkpoint-10/optimizer.pt +++ b/checkpoint-10/optimizer.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6a7148ac4a39214d064ffce80fb50e3dd973562f331727ef157843d938bed4c7 +oid sha256:40afa20170f291b125eb81d0ffabecaf09477598d9e256ea01ff0b97e18e70a9 size 134433530 diff --git a/checkpoint-10/scheduler.pt b/checkpoint-10/scheduler.pt index 3f66941942fe4ae2d6b0f5794daec11d0b26e37b..941ed765630f615d588fd402f4d9cc96b1946236 100644 --- a/checkpoint-10/scheduler.pt +++ b/checkpoint-10/scheduler.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:657ce80fb8c4abe8e223e56f6ece2f925cddc4720e917ac9a1e7ce94ab2749a8 +oid sha256:a35af14c8f81087292db706fc180cce03fb0f692ed151b0664293b217dd11fa5 size 1064 diff --git a/checkpoint-10/trainer_state.json b/checkpoint-10/trainer_state.json index 4c77a4bc2a30fa60428c0e5ce619ff776b69241c..5e5b97c2c1c2cda602de8084d07f4f9e745d299b 100644 --- a/checkpoint-10/trainer_state.json +++ b/checkpoint-10/trainer_state.json @@ -1,5 +1,5 @@ { - "best_metric": 1.769914984703064, + "best_metric": 1.733155369758606, "best_model_checkpoint": "/kaggle/working/checkpoint-10", "epoch": 1.1111111111111112, "eval_steps": 10, @@ -10,24 +10,24 @@ "log_history": [ { "epoch": 1.1111111111111112, - "grad_norm": 0.022207504138350487, - "learning_rate": 0.00012592592592592592, - "loss": 2.058, + "grad_norm": 0.022282764315605164, + "learning_rate": 0.0001851851851851852, + "loss": 2.0424, "step": 10 }, { "epoch": 1.1111111111111112, - "eval_loss": 1.769914984703064, - "eval_runtime": 34.8115, - "eval_samples_per_second": 1.034, - "eval_steps_per_second": 0.144, + "eval_loss": 1.733155369758606, + "eval_runtime": 34.5543, + "eval_samples_per_second": 1.042, + "eval_steps_per_second": 0.145, "step": 10 } ], "logging_steps": 10, - "max_steps": 27, + "max_steps": 135, "num_input_tokens_seen": 0, - "num_train_epochs": 3, + "num_train_epochs": 15, "save_steps": 10, "stateful_callbacks": { "EarlyStoppingCallback": { diff --git a/checkpoint-10/training_args.bin b/checkpoint-10/training_args.bin index ac046aa3239775eec549604f7b57242cef974c25..db8c5d32a4f54d0e9ab4e7a985fbaee5d6701ecc 100644 --- a/checkpoint-10/training_args.bin +++ b/checkpoint-10/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6729829c35b3a85ce1b9965237f3a3b4ff249959cfad240042e0219e283812ed +oid sha256:79c753ff1ba946038f620bad3e42a35ce583c9e8ed52b49fd22fb6614fea0f43 size 5112 diff --git a/checkpoint-100/README.md b/checkpoint-100/README.md new file mode 100644 index 0000000000000000000000000000000000000000..2d1596ffe16e4d5bdcdf0e1d4322e6667af95962 --- /dev/null +++ b/checkpoint-100/README.md @@ -0,0 +1,202 @@ +--- +library_name: peft +base_model: TheBloke/Llama-2-7B-fp16 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.12.0 \ No newline at end of file diff --git a/checkpoint-100/adapter_config.json b/checkpoint-100/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..cbf93f2809e43fe18fd6ad23406293a68e7f5c98 --- /dev/null +++ b/checkpoint-100/adapter_config.json @@ -0,0 +1,31 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "TheBloke/Llama-2-7B-fp16", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "v_proj", + "o_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-100/adapter_model.safetensors b/checkpoint-100/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5474456dc9191482da68a63e25fc4b57741ddb66 --- /dev/null +++ b/checkpoint-100/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac3b59826a91a331332b5850491ffec38f48afde058dead68205fb9903924aac +size 67143296 diff --git a/checkpoint-100/optimizer.pt b/checkpoint-100/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..ed26cd0b52ff12b00beef892c62747154ff73280 --- /dev/null +++ b/checkpoint-100/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49ff3c81928bee92eb73c79e2f6088612cc35f2fab427d0f73ba21269e3c8085 +size 134433530 diff --git a/checkpoint-100/rng_state.pth b/checkpoint-100/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..bda67fd2eea9e9037427984468dca53fb7064feb --- /dev/null +++ b/checkpoint-100/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad9fcf2e86ce0f113dac0c406c0b49ce72f5891aaa3942215cde240415672553 +size 14244 diff --git a/checkpoint-100/scheduler.pt b/checkpoint-100/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..4edc88d9af1e5877cdf4a912cba5fd5dd30760f3 --- /dev/null +++ b/checkpoint-100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a67071c9831c7625e547eac0c0538006ee7fe06d1b1052844fd1cdb5172b8b9f +size 1064 diff --git a/checkpoint-100/trainer_state.json b/checkpoint-100/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..7c2f7daa1437992d332458338001a59717ed5348 --- /dev/null +++ b/checkpoint-100/trainer_state.json @@ -0,0 +1,192 @@ +{ + "best_metric": 1.2115424871444702, + "best_model_checkpoint": "/kaggle/working/checkpoint-90", + "epoch": 11.11111111111111, + "eval_steps": 10, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.1111111111111112, + "grad_norm": 0.022282764315605164, + "learning_rate": 0.0001851851851851852, + "loss": 2.0424, + "step": 10 + }, + { + "epoch": 1.1111111111111112, + "eval_loss": 1.733155369758606, + "eval_runtime": 34.5543, + "eval_samples_per_second": 1.042, + "eval_steps_per_second": 0.145, + "step": 10 + }, + { + "epoch": 2.2222222222222223, + "grad_norm": 0.018981408327817917, + "learning_rate": 0.00017037037037037037, + "loss": 1.6072, + "step": 20 + }, + { + "epoch": 2.2222222222222223, + "eval_loss": 1.5428930521011353, + "eval_runtime": 34.6485, + "eval_samples_per_second": 1.039, + "eval_steps_per_second": 0.144, + "step": 20 + }, + { + "epoch": 3.3333333333333335, + "grad_norm": 0.023157037794589996, + "learning_rate": 0.00015555555555555556, + "loss": 1.4025, + "step": 30 + }, + { + "epoch": 3.3333333333333335, + "eval_loss": 1.4176721572875977, + "eval_runtime": 34.5433, + "eval_samples_per_second": 1.042, + "eval_steps_per_second": 0.145, + "step": 30 + }, + { + "epoch": 4.444444444444445, + "grad_norm": 0.021338749676942825, + "learning_rate": 0.00014074074074074076, + "loss": 1.285, + "step": 40 + }, + { + "epoch": 4.444444444444445, + "eval_loss": 1.3449772596359253, + "eval_runtime": 34.5594, + "eval_samples_per_second": 1.042, + "eval_steps_per_second": 0.145, + "step": 40 + }, + { + "epoch": 5.555555555555555, + "grad_norm": 0.02489505708217621, + "learning_rate": 0.00012592592592592592, + "loss": 1.1687, + "step": 50 + }, + { + "epoch": 5.555555555555555, + "eval_loss": 1.2951068878173828, + "eval_runtime": 34.5896, + "eval_samples_per_second": 1.041, + "eval_steps_per_second": 0.145, + "step": 50 + }, + { + "epoch": 6.666666666666667, + "grad_norm": 0.028962766751646996, + "learning_rate": 0.00011111111111111112, + "loss": 1.0521, + "step": 60 + }, + { + "epoch": 6.666666666666667, + "eval_loss": 1.2674343585968018, + "eval_runtime": 34.5586, + "eval_samples_per_second": 1.042, + "eval_steps_per_second": 0.145, + "step": 60 + }, + { + "epoch": 7.777777777777778, + "grad_norm": 0.033917125314474106, + "learning_rate": 9.62962962962963e-05, + "loss": 0.9885, + "step": 70 + }, + { + "epoch": 7.777777777777778, + "eval_loss": 1.2424466609954834, + "eval_runtime": 34.5412, + "eval_samples_per_second": 1.042, + "eval_steps_per_second": 0.145, + "step": 70 + }, + { + "epoch": 8.88888888888889, + "grad_norm": 0.03393130004405975, + "learning_rate": 8.148148148148148e-05, + "loss": 0.8784, + "step": 80 + }, + { + "epoch": 8.88888888888889, + "eval_loss": 1.2252851724624634, + "eval_runtime": 34.58, + "eval_samples_per_second": 1.041, + "eval_steps_per_second": 0.145, + "step": 80 + }, + { + "epoch": 10.0, + "grad_norm": 0.04081139340996742, + "learning_rate": 6.666666666666667e-05, + "loss": 0.8154, + "step": 90 + }, + { + "epoch": 10.0, + "eval_loss": 1.2115424871444702, + "eval_runtime": 34.5784, + "eval_samples_per_second": 1.041, + "eval_steps_per_second": 0.145, + "step": 90 + }, + { + "epoch": 11.11111111111111, + "grad_norm": 0.04114004969596863, + "learning_rate": 5.185185185185185e-05, + "loss": 0.7376, + "step": 100 + }, + { + "epoch": 11.11111111111111, + "eval_loss": 1.2147088050842285, + "eval_runtime": 34.595, + "eval_samples_per_second": 1.041, + "eval_steps_per_second": 0.145, + "step": 100 + } + ], + "logging_steps": 10, + "max_steps": 135, + "num_input_tokens_seen": 0, + "num_train_epochs": 15, + "save_steps": 10, + "stateful_callbacks": { + "EarlyStoppingCallback": { + "args": { + "early_stopping_patience": 3, + "early_stopping_threshold": 0.0 + }, + "attributes": { + "early_stopping_patience_counter": 0 + } + }, + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.6718311886422016e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-100/training_args.bin b/checkpoint-100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..db8c5d32a4f54d0e9ab4e7a985fbaee5d6701ecc --- /dev/null +++ b/checkpoint-100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79c753ff1ba946038f620bad3e42a35ce583c9e8ed52b49fd22fb6614fea0f43 +size 5112 diff --git a/checkpoint-110/README.md b/checkpoint-110/README.md new file mode 100644 index 0000000000000000000000000000000000000000..2d1596ffe16e4d5bdcdf0e1d4322e6667af95962 --- /dev/null +++ b/checkpoint-110/README.md @@ -0,0 +1,202 @@ +--- +library_name: peft +base_model: TheBloke/Llama-2-7B-fp16 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.12.0 \ No newline at end of file diff --git a/checkpoint-110/adapter_config.json b/checkpoint-110/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..cbf93f2809e43fe18fd6ad23406293a68e7f5c98 --- /dev/null +++ b/checkpoint-110/adapter_config.json @@ -0,0 +1,31 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "TheBloke/Llama-2-7B-fp16", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "v_proj", + "o_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-110/adapter_model.safetensors b/checkpoint-110/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7f605b66a87175ab6628e4ed30bb65e58cfd30b5 --- /dev/null +++ b/checkpoint-110/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be7b95474c7a25c6961db7fe4913e88e0e78819b321a21a383b179782c22ef6c +size 67143296 diff --git a/checkpoint-110/optimizer.pt b/checkpoint-110/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..128e5885bb3a9ff0f3a9e5377049be1f310e384b --- /dev/null +++ b/checkpoint-110/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:567bf2cdba466fb3da7301567fc6eee0fd77e99808ab9402911468a94017eb0a +size 134433530 diff --git a/checkpoint-110/rng_state.pth b/checkpoint-110/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c7d7b7fbbf5033245a0164d48dee7aac7380c478 --- /dev/null +++ b/checkpoint-110/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b74e65bff4ecaf37ea6199e51bc5b668d4b069c4bc042e44eeac5d4c4ffef401 +size 14244 diff --git a/checkpoint-110/scheduler.pt b/checkpoint-110/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..70cd77658255ba7e2dd9a0fdcb8ac3766d0b1df3 --- /dev/null +++ b/checkpoint-110/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c7403172deaf546b51410d54cab636a5c53264be17aa6a439e5934523944587 +size 1064 diff --git a/checkpoint-110/trainer_state.json b/checkpoint-110/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3cfcc9fcbe02f92fe34d4c23ddc019a88d9a5400 --- /dev/null +++ b/checkpoint-110/trainer_state.json @@ -0,0 +1,207 @@ +{ + "best_metric": 1.2115424871444702, + "best_model_checkpoint": "/kaggle/working/checkpoint-90", + "epoch": 12.222222222222221, + "eval_steps": 10, + "global_step": 110, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.1111111111111112, + "grad_norm": 0.022282764315605164, + "learning_rate": 0.0001851851851851852, + "loss": 2.0424, + "step": 10 + }, + { + "epoch": 1.1111111111111112, + "eval_loss": 1.733155369758606, + "eval_runtime": 34.5543, + "eval_samples_per_second": 1.042, + "eval_steps_per_second": 0.145, + "step": 10 + }, + { + "epoch": 2.2222222222222223, + "grad_norm": 0.018981408327817917, + "learning_rate": 0.00017037037037037037, + "loss": 1.6072, + "step": 20 + }, + { + "epoch": 2.2222222222222223, + "eval_loss": 1.5428930521011353, + "eval_runtime": 34.6485, + "eval_samples_per_second": 1.039, + "eval_steps_per_second": 0.144, + "step": 20 + }, + { + "epoch": 3.3333333333333335, + "grad_norm": 0.023157037794589996, + "learning_rate": 0.00015555555555555556, + "loss": 1.4025, + "step": 30 + }, + { + "epoch": 3.3333333333333335, + "eval_loss": 1.4176721572875977, + "eval_runtime": 34.5433, + "eval_samples_per_second": 1.042, + "eval_steps_per_second": 0.145, + "step": 30 + }, + { + "epoch": 4.444444444444445, + "grad_norm": 0.021338749676942825, + "learning_rate": 0.00014074074074074076, + "loss": 1.285, + "step": 40 + }, + { + "epoch": 4.444444444444445, + "eval_loss": 1.3449772596359253, + "eval_runtime": 34.5594, + "eval_samples_per_second": 1.042, + "eval_steps_per_second": 0.145, + "step": 40 + }, + { + "epoch": 5.555555555555555, + "grad_norm": 0.02489505708217621, + "learning_rate": 0.00012592592592592592, + "loss": 1.1687, + "step": 50 + }, + { + "epoch": 5.555555555555555, + "eval_loss": 1.2951068878173828, + "eval_runtime": 34.5896, + "eval_samples_per_second": 1.041, + "eval_steps_per_second": 0.145, + "step": 50 + }, + { + "epoch": 6.666666666666667, + "grad_norm": 0.028962766751646996, + "learning_rate": 0.00011111111111111112, + "loss": 1.0521, + "step": 60 + }, + { + "epoch": 6.666666666666667, + "eval_loss": 1.2674343585968018, + "eval_runtime": 34.5586, + "eval_samples_per_second": 1.042, + "eval_steps_per_second": 0.145, + "step": 60 + }, + { + "epoch": 7.777777777777778, + "grad_norm": 0.033917125314474106, + "learning_rate": 9.62962962962963e-05, + "loss": 0.9885, + "step": 70 + }, + { + "epoch": 7.777777777777778, + "eval_loss": 1.2424466609954834, + "eval_runtime": 34.5412, + "eval_samples_per_second": 1.042, + "eval_steps_per_second": 0.145, + "step": 70 + }, + { + "epoch": 8.88888888888889, + "grad_norm": 0.03393130004405975, + "learning_rate": 8.148148148148148e-05, + "loss": 0.8784, + "step": 80 + }, + { + "epoch": 8.88888888888889, + "eval_loss": 1.2252851724624634, + "eval_runtime": 34.58, + "eval_samples_per_second": 1.041, + "eval_steps_per_second": 0.145, + "step": 80 + }, + { + "epoch": 10.0, + "grad_norm": 0.04081139340996742, + "learning_rate": 6.666666666666667e-05, + "loss": 0.8154, + "step": 90 + }, + { + "epoch": 10.0, + "eval_loss": 1.2115424871444702, + "eval_runtime": 34.5784, + "eval_samples_per_second": 1.041, + "eval_steps_per_second": 0.145, + "step": 90 + }, + { + "epoch": 11.11111111111111, + "grad_norm": 0.04114004969596863, + "learning_rate": 5.185185185185185e-05, + "loss": 0.7376, + "step": 100 + }, + { + "epoch": 11.11111111111111, + "eval_loss": 1.2147088050842285, + "eval_runtime": 34.595, + "eval_samples_per_second": 1.041, + "eval_steps_per_second": 0.145, + "step": 100 + }, + { + "epoch": 12.222222222222221, + "grad_norm": 0.04217207431793213, + "learning_rate": 3.7037037037037037e-05, + "loss": 0.6642, + "step": 110 + }, + { + "epoch": 12.222222222222221, + "eval_loss": 1.2141155004501343, + "eval_runtime": 34.6053, + "eval_samples_per_second": 1.04, + "eval_steps_per_second": 0.144, + "step": 110 + } + ], + "logging_steps": 10, + "max_steps": 135, + "num_input_tokens_seen": 0, + "num_train_epochs": 15, + "save_steps": 10, + "stateful_callbacks": { + "EarlyStoppingCallback": { + "args": { + "early_stopping_patience": 3, + "early_stopping_threshold": 0.0 + }, + "attributes": { + "early_stopping_patience_counter": 0 + } + }, + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.93951986040832e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-110/training_args.bin b/checkpoint-110/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..db8c5d32a4f54d0e9ab4e7a985fbaee5d6701ecc --- /dev/null +++ b/checkpoint-110/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79c753ff1ba946038f620bad3e42a35ce583c9e8ed52b49fd22fb6614fea0f43 +size 5112 diff --git a/checkpoint-120/README.md b/checkpoint-120/README.md new file mode 100644 index 0000000000000000000000000000000000000000..2d1596ffe16e4d5bdcdf0e1d4322e6667af95962 --- /dev/null +++ b/checkpoint-120/README.md @@ -0,0 +1,202 @@ +--- +library_name: peft +base_model: TheBloke/Llama-2-7B-fp16 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.12.0 \ No newline at end of file diff --git a/checkpoint-120/adapter_config.json b/checkpoint-120/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..cbf93f2809e43fe18fd6ad23406293a68e7f5c98 --- /dev/null +++ b/checkpoint-120/adapter_config.json @@ -0,0 +1,31 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "TheBloke/Llama-2-7B-fp16", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "v_proj", + "o_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-120/adapter_model.safetensors b/checkpoint-120/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b0331d3326f9fe8e6101bed734cda4120704beb5 --- /dev/null +++ b/checkpoint-120/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb1690155a5e4a4452a0e292d686003b08ac457e37c20099c948aae55ca8e453 +size 67143296 diff --git a/checkpoint-120/optimizer.pt b/checkpoint-120/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..92ef0684e214370f01a6b0138c929044159067b4 --- /dev/null +++ b/checkpoint-120/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14cc791678f8ce58b5f8b40f05113973aa7f94325c356b74d4df44ff8c1a956a +size 134433530 diff --git a/checkpoint-120/rng_state.pth b/checkpoint-120/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..89a481768021c3826e125da45856bda83012f974 --- /dev/null +++ b/checkpoint-120/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:899181c47750d0177e5f1571bc0ad909b3a3ebab597f384c5023f1d8dcb8a73a +size 14244 diff --git a/checkpoint-120/scheduler.pt b/checkpoint-120/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..1d230859237cac17ad8b06f8e53289cfacddd780 --- /dev/null +++ b/checkpoint-120/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7846da062f26e398f1065295f9c7cbaf4768d3aa6b5518863ce89b7eb9d328e +size 1064 diff --git a/checkpoint-120/trainer_state.json b/checkpoint-120/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1d0d62df39a8ef027f9eb6cb16e365592f41f1b1 --- /dev/null +++ b/checkpoint-120/trainer_state.json @@ -0,0 +1,222 @@ +{ + "best_metric": 1.20501708984375, + "best_model_checkpoint": "/kaggle/working/checkpoint-120", + "epoch": 13.333333333333334, + "eval_steps": 10, + "global_step": 120, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.1111111111111112, + "grad_norm": 0.022282764315605164, + "learning_rate": 0.0001851851851851852, + "loss": 2.0424, + "step": 10 + }, + { + "epoch": 1.1111111111111112, + "eval_loss": 1.733155369758606, + "eval_runtime": 34.5543, + "eval_samples_per_second": 1.042, + "eval_steps_per_second": 0.145, + "step": 10 + }, + { + "epoch": 2.2222222222222223, + "grad_norm": 0.018981408327817917, + "learning_rate": 0.00017037037037037037, + "loss": 1.6072, + "step": 20 + }, + { + "epoch": 2.2222222222222223, + "eval_loss": 1.5428930521011353, + "eval_runtime": 34.6485, + "eval_samples_per_second": 1.039, + "eval_steps_per_second": 0.144, + "step": 20 + }, + { + "epoch": 3.3333333333333335, + "grad_norm": 0.023157037794589996, + "learning_rate": 0.00015555555555555556, + "loss": 1.4025, + "step": 30 + }, + { + "epoch": 3.3333333333333335, + "eval_loss": 1.4176721572875977, + "eval_runtime": 34.5433, + "eval_samples_per_second": 1.042, + "eval_steps_per_second": 0.145, + "step": 30 + }, + { + "epoch": 4.444444444444445, + "grad_norm": 0.021338749676942825, + "learning_rate": 0.00014074074074074076, + "loss": 1.285, + "step": 40 + }, + { + "epoch": 4.444444444444445, + "eval_loss": 1.3449772596359253, + "eval_runtime": 34.5594, + "eval_samples_per_second": 1.042, + "eval_steps_per_second": 0.145, + "step": 40 + }, + { + "epoch": 5.555555555555555, + "grad_norm": 0.02489505708217621, + "learning_rate": 0.00012592592592592592, + "loss": 1.1687, + "step": 50 + }, + { + "epoch": 5.555555555555555, + "eval_loss": 1.2951068878173828, + "eval_runtime": 34.5896, + "eval_samples_per_second": 1.041, + "eval_steps_per_second": 0.145, + "step": 50 + }, + { + "epoch": 6.666666666666667, + "grad_norm": 0.028962766751646996, + "learning_rate": 0.00011111111111111112, + "loss": 1.0521, + "step": 60 + }, + { + "epoch": 6.666666666666667, + "eval_loss": 1.2674343585968018, + "eval_runtime": 34.5586, + "eval_samples_per_second": 1.042, + "eval_steps_per_second": 0.145, + "step": 60 + }, + { + "epoch": 7.777777777777778, + "grad_norm": 0.033917125314474106, + "learning_rate": 9.62962962962963e-05, + "loss": 0.9885, + "step": 70 + }, + { + "epoch": 7.777777777777778, + "eval_loss": 1.2424466609954834, + "eval_runtime": 34.5412, + "eval_samples_per_second": 1.042, + "eval_steps_per_second": 0.145, + "step": 70 + }, + { + "epoch": 8.88888888888889, + "grad_norm": 0.03393130004405975, + "learning_rate": 8.148148148148148e-05, + "loss": 0.8784, + "step": 80 + }, + { + "epoch": 8.88888888888889, + "eval_loss": 1.2252851724624634, + "eval_runtime": 34.58, + "eval_samples_per_second": 1.041, + "eval_steps_per_second": 0.145, + "step": 80 + }, + { + "epoch": 10.0, + "grad_norm": 0.04081139340996742, + "learning_rate": 6.666666666666667e-05, + "loss": 0.8154, + "step": 90 + }, + { + "epoch": 10.0, + "eval_loss": 1.2115424871444702, + "eval_runtime": 34.5784, + "eval_samples_per_second": 1.041, + "eval_steps_per_second": 0.145, + "step": 90 + }, + { + "epoch": 11.11111111111111, + "grad_norm": 0.04114004969596863, + "learning_rate": 5.185185185185185e-05, + "loss": 0.7376, + "step": 100 + }, + { + "epoch": 11.11111111111111, + "eval_loss": 1.2147088050842285, + "eval_runtime": 34.595, + "eval_samples_per_second": 1.041, + "eval_steps_per_second": 0.145, + "step": 100 + }, + { + "epoch": 12.222222222222221, + "grad_norm": 0.04217207431793213, + "learning_rate": 3.7037037037037037e-05, + "loss": 0.6642, + "step": 110 + }, + { + "epoch": 12.222222222222221, + "eval_loss": 1.2141155004501343, + "eval_runtime": 34.6053, + "eval_samples_per_second": 1.04, + "eval_steps_per_second": 0.144, + "step": 110 + }, + { + "epoch": 13.333333333333334, + "grad_norm": 0.04223904013633728, + "learning_rate": 2.2222222222222223e-05, + "loss": 0.6353, + "step": 120 + }, + { + "epoch": 13.333333333333334, + "eval_loss": 1.20501708984375, + "eval_runtime": 34.6447, + "eval_samples_per_second": 1.039, + "eval_steps_per_second": 0.144, + "step": 120 + } + ], + "logging_steps": 10, + "max_steps": 135, + "num_input_tokens_seen": 0, + "num_train_epochs": 15, + "save_steps": 10, + "stateful_callbacks": { + "EarlyStoppingCallback": { + "args": { + "early_stopping_patience": 3, + "early_stopping_threshold": 0.0 + }, + "attributes": { + "early_stopping_patience_counter": 0 + } + }, + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.2063182503346176e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-120/training_args.bin b/checkpoint-120/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..db8c5d32a4f54d0e9ab4e7a985fbaee5d6701ecc --- /dev/null +++ b/checkpoint-120/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79c753ff1ba946038f620bad3e42a35ce583c9e8ed52b49fd22fb6614fea0f43 +size 5112 diff --git a/checkpoint-130/README.md b/checkpoint-130/README.md new file mode 100644 index 0000000000000000000000000000000000000000..2d1596ffe16e4d5bdcdf0e1d4322e6667af95962 --- /dev/null +++ b/checkpoint-130/README.md @@ -0,0 +1,202 @@ +--- +library_name: peft +base_model: TheBloke/Llama-2-7B-fp16 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.12.0 \ No newline at end of file diff --git a/checkpoint-130/adapter_config.json b/checkpoint-130/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..cbf93f2809e43fe18fd6ad23406293a68e7f5c98 --- /dev/null +++ b/checkpoint-130/adapter_config.json @@ -0,0 +1,31 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "TheBloke/Llama-2-7B-fp16", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "v_proj", + "o_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-130/adapter_model.safetensors b/checkpoint-130/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b4f9fb90e930a7899b68e1787dce408f742c6dde --- /dev/null +++ b/checkpoint-130/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58a8f71955b57e8e1082d6f6f180646592764af968e225bda2b732733e64a34c +size 67143296 diff --git a/checkpoint-130/optimizer.pt b/checkpoint-130/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..11988fcadeca2673214a0285e4c846ac04d4f1ae --- /dev/null +++ b/checkpoint-130/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4daf30044a564bcf59ea2751408ea8dce0ec90f49d285e08583c20de07041a11 +size 134433530 diff --git a/checkpoint-130/rng_state.pth b/checkpoint-130/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..10c9d602a70402bac855f7680b6ab4917c70e41a --- /dev/null +++ b/checkpoint-130/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae41843bfbaf9bfb2da44e736c97098eebc3d8816f1d5ca8fbf5df92c5f45233 +size 14244 diff --git a/checkpoint-130/scheduler.pt b/checkpoint-130/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6cdb223c58ffea4e674469ccff687934d4d7f279 --- /dev/null +++ b/checkpoint-130/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9b0d7953ae2a99e745d016a7542f93f08a2bf1f998ff2ee97a233ec022e1d0b +size 1064 diff --git a/checkpoint-130/trainer_state.json b/checkpoint-130/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f86474c05c992b2a7868efa880e894f56bb20e75 --- /dev/null +++ b/checkpoint-130/trainer_state.json @@ -0,0 +1,237 @@ +{ + "best_metric": 1.2035548686981201, + "best_model_checkpoint": "/kaggle/working/checkpoint-130", + "epoch": 14.444444444444445, + "eval_steps": 10, + "global_step": 130, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.1111111111111112, + "grad_norm": 0.022282764315605164, + "learning_rate": 0.0001851851851851852, + "loss": 2.0424, + "step": 10 + }, + { + "epoch": 1.1111111111111112, + "eval_loss": 1.733155369758606, + "eval_runtime": 34.5543, + "eval_samples_per_second": 1.042, + "eval_steps_per_second": 0.145, + "step": 10 + }, + { + "epoch": 2.2222222222222223, + "grad_norm": 0.018981408327817917, + "learning_rate": 0.00017037037037037037, + "loss": 1.6072, + "step": 20 + }, + { + "epoch": 2.2222222222222223, + "eval_loss": 1.5428930521011353, + "eval_runtime": 34.6485, + "eval_samples_per_second": 1.039, + "eval_steps_per_second": 0.144, + "step": 20 + }, + { + "epoch": 3.3333333333333335, + "grad_norm": 0.023157037794589996, + "learning_rate": 0.00015555555555555556, + "loss": 1.4025, + "step": 30 + }, + { + "epoch": 3.3333333333333335, + "eval_loss": 1.4176721572875977, + "eval_runtime": 34.5433, + "eval_samples_per_second": 1.042, + "eval_steps_per_second": 0.145, + "step": 30 + }, + { + "epoch": 4.444444444444445, + "grad_norm": 0.021338749676942825, + "learning_rate": 0.00014074074074074076, + "loss": 1.285, + "step": 40 + }, + { + "epoch": 4.444444444444445, + "eval_loss": 1.3449772596359253, + "eval_runtime": 34.5594, + "eval_samples_per_second": 1.042, + "eval_steps_per_second": 0.145, + "step": 40 + }, + { + "epoch": 5.555555555555555, + "grad_norm": 0.02489505708217621, + "learning_rate": 0.00012592592592592592, + "loss": 1.1687, + "step": 50 + }, + { + "epoch": 5.555555555555555, + "eval_loss": 1.2951068878173828, + "eval_runtime": 34.5896, + "eval_samples_per_second": 1.041, + "eval_steps_per_second": 0.145, + "step": 50 + }, + { + "epoch": 6.666666666666667, + "grad_norm": 0.028962766751646996, + "learning_rate": 0.00011111111111111112, + "loss": 1.0521, + "step": 60 + }, + { + "epoch": 6.666666666666667, + "eval_loss": 1.2674343585968018, + "eval_runtime": 34.5586, + "eval_samples_per_second": 1.042, + "eval_steps_per_second": 0.145, + "step": 60 + }, + { + "epoch": 7.777777777777778, + "grad_norm": 0.033917125314474106, + "learning_rate": 9.62962962962963e-05, + "loss": 0.9885, + "step": 70 + }, + { + "epoch": 7.777777777777778, + "eval_loss": 1.2424466609954834, + "eval_runtime": 34.5412, + "eval_samples_per_second": 1.042, + "eval_steps_per_second": 0.145, + "step": 70 + }, + { + "epoch": 8.88888888888889, + "grad_norm": 0.03393130004405975, + "learning_rate": 8.148148148148148e-05, + "loss": 0.8784, + "step": 80 + }, + { + "epoch": 8.88888888888889, + "eval_loss": 1.2252851724624634, + "eval_runtime": 34.58, + "eval_samples_per_second": 1.041, + "eval_steps_per_second": 0.145, + "step": 80 + }, + { + "epoch": 10.0, + "grad_norm": 0.04081139340996742, + "learning_rate": 6.666666666666667e-05, + "loss": 0.8154, + "step": 90 + }, + { + "epoch": 10.0, + "eval_loss": 1.2115424871444702, + "eval_runtime": 34.5784, + "eval_samples_per_second": 1.041, + "eval_steps_per_second": 0.145, + "step": 90 + }, + { + "epoch": 11.11111111111111, + "grad_norm": 0.04114004969596863, + "learning_rate": 5.185185185185185e-05, + "loss": 0.7376, + "step": 100 + }, + { + "epoch": 11.11111111111111, + "eval_loss": 1.2147088050842285, + "eval_runtime": 34.595, + "eval_samples_per_second": 1.041, + "eval_steps_per_second": 0.145, + "step": 100 + }, + { + "epoch": 12.222222222222221, + "grad_norm": 0.04217207431793213, + "learning_rate": 3.7037037037037037e-05, + "loss": 0.6642, + "step": 110 + }, + { + "epoch": 12.222222222222221, + "eval_loss": 1.2141155004501343, + "eval_runtime": 34.6053, + "eval_samples_per_second": 1.04, + "eval_steps_per_second": 0.144, + "step": 110 + }, + { + "epoch": 13.333333333333334, + "grad_norm": 0.04223904013633728, + "learning_rate": 2.2222222222222223e-05, + "loss": 0.6353, + "step": 120 + }, + { + "epoch": 13.333333333333334, + "eval_loss": 1.20501708984375, + "eval_runtime": 34.6447, + "eval_samples_per_second": 1.039, + "eval_steps_per_second": 0.144, + "step": 120 + }, + { + "epoch": 14.444444444444445, + "grad_norm": 0.04531875252723694, + "learning_rate": 7.4074074074074075e-06, + "loss": 0.5992, + "step": 130 + }, + { + "epoch": 14.444444444444445, + "eval_loss": 1.2035548686981201, + "eval_runtime": 34.5919, + "eval_samples_per_second": 1.041, + "eval_steps_per_second": 0.145, + "step": 130 + } + ], + "logging_steps": 10, + "max_steps": 135, + "num_input_tokens_seen": 0, + "num_train_epochs": 15, + "save_steps": 10, + "stateful_callbacks": { + "EarlyStoppingCallback": { + "args": { + "early_stopping_patience": 3, + "early_stopping_threshold": 0.0 + }, + "attributes": { + "early_stopping_patience_counter": 0 + } + }, + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.474229492560691e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-130/training_args.bin b/checkpoint-130/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..db8c5d32a4f54d0e9ab4e7a985fbaee5d6701ecc --- /dev/null +++ b/checkpoint-130/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79c753ff1ba946038f620bad3e42a35ce583c9e8ed52b49fd22fb6614fea0f43 +size 5112 diff --git a/checkpoint-20/README.md b/checkpoint-20/README.md index d94fa0fb36d5aa76962cf3fda3ca0bfe4c2fa517..2d1596ffe16e4d5bdcdf0e1d4322e6667af95962 100644 --- a/checkpoint-20/README.md +++ b/checkpoint-20/README.md @@ -1,6 +1,6 @@ --- -base_model: TheBloke/Llama-2-7B-fp16 library_name: peft +base_model: TheBloke/Llama-2-7B-fp16 --- # Model Card for Model ID diff --git a/checkpoint-20/adapter_config.json b/checkpoint-20/adapter_config.json index 923a080092a0575b2d3304503832fcf7c77462f4..cbf93f2809e43fe18fd6ad23406293a68e7f5c98 100644 --- a/checkpoint-20/adapter_config.json +++ b/checkpoint-20/adapter_config.json @@ -20,10 +20,10 @@ "rank_pattern": {}, "revision": null, "target_modules": [ + "k_proj", "v_proj", - "q_proj", "o_proj", - "k_proj" + "q_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, diff --git a/checkpoint-20/adapter_model.safetensors b/checkpoint-20/adapter_model.safetensors index 06513132c03fe764076772f7ca590751400c2853..8223e0ce8d841941324b29f8c42fc45abdc14765 100644 --- a/checkpoint-20/adapter_model.safetensors +++ b/checkpoint-20/adapter_model.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:06e781f63931f2a767880f6d4b91f140eb5043f4a642a1a68d8adbb81bbcae9d +oid sha256:b330fca16a5ab17049bdd4e6e42df52bd8bd0ac1fbeb1577f683e7b1212f0d83 size 67143296 diff --git a/checkpoint-20/optimizer.pt b/checkpoint-20/optimizer.pt index c1124b3ffb9edcb96b7c30f63f1ce1072adfe1a0..f4d35d60066f0933a1fd192740c0a094a9d75120 100644 --- a/checkpoint-20/optimizer.pt +++ b/checkpoint-20/optimizer.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3c2d558714cf4e61497566f22afcd3dd08e9e361ad958db0e013352981bfea11 +oid sha256:83e5faf17f41fc7dcd6907ecd97bc5e5b82bfa377a6683ddd9d25e9c5a4f3efd size 134433530 diff --git a/checkpoint-20/scheduler.pt b/checkpoint-20/scheduler.pt index b763c3c2c2e02a82b650f9eee98c8def87c1ad7b..4f536414baf7d040b7ee6765e42c28f6d23eb06d 100644 --- a/checkpoint-20/scheduler.pt +++ b/checkpoint-20/scheduler.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ebdc6c179af92eaaefe04390084f5c059665a54c6acce66616e26194c4f7008a +oid sha256:18bf434d2955f164730aed9c7994cdd6c773ed80052fc3ef975fae4adef61283 size 1064 diff --git a/checkpoint-20/trainer_state.json b/checkpoint-20/trainer_state.json index 6cf871e71df9c7807e59faaa6a247dccce1e7a9c..fbb6a3c50e3d893d3f94028d5087a658e6bbc7c2 100644 --- a/checkpoint-20/trainer_state.json +++ b/checkpoint-20/trainer_state.json @@ -1,5 +1,5 @@ { - "best_metric": 1.6234142780303955, + "best_metric": 1.5428930521011353, "best_model_checkpoint": "/kaggle/working/checkpoint-20", "epoch": 2.2222222222222223, "eval_steps": 10, @@ -10,39 +10,39 @@ "log_history": [ { "epoch": 1.1111111111111112, - "grad_norm": 0.022207504138350487, - "learning_rate": 0.00012592592592592592, - "loss": 2.058, + "grad_norm": 0.022282764315605164, + "learning_rate": 0.0001851851851851852, + "loss": 2.0424, "step": 10 }, { "epoch": 1.1111111111111112, - "eval_loss": 1.769914984703064, - "eval_runtime": 34.8115, - "eval_samples_per_second": 1.034, - "eval_steps_per_second": 0.144, + "eval_loss": 1.733155369758606, + "eval_runtime": 34.5543, + "eval_samples_per_second": 1.042, + "eval_steps_per_second": 0.145, "step": 10 }, { "epoch": 2.2222222222222223, - "grad_norm": 0.022052476182579994, - "learning_rate": 5.185185185185185e-05, - "loss": 1.6674, + "grad_norm": 0.018981408327817917, + "learning_rate": 0.00017037037037037037, + "loss": 1.6072, "step": 20 }, { "epoch": 2.2222222222222223, - "eval_loss": 1.6234142780303955, - "eval_runtime": 34.7607, - "eval_samples_per_second": 1.036, + "eval_loss": 1.5428930521011353, + "eval_runtime": 34.6485, + "eval_samples_per_second": 1.039, "eval_steps_per_second": 0.144, "step": 20 } ], "logging_steps": 10, - "max_steps": 27, + "max_steps": 135, "num_input_tokens_seen": 0, - "num_train_epochs": 3, + "num_train_epochs": 15, "save_steps": 10, "stateful_callbacks": { "EarlyStoppingCallback": { diff --git a/checkpoint-20/training_args.bin b/checkpoint-20/training_args.bin index ac046aa3239775eec549604f7b57242cef974c25..db8c5d32a4f54d0e9ab4e7a985fbaee5d6701ecc 100644 --- a/checkpoint-20/training_args.bin +++ b/checkpoint-20/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6729829c35b3a85ce1b9965237f3a3b4ff249959cfad240042e0219e283812ed +oid sha256:79c753ff1ba946038f620bad3e42a35ce583c9e8ed52b49fd22fb6614fea0f43 size 5112 diff --git a/checkpoint-30/README.md b/checkpoint-30/README.md new file mode 100644 index 0000000000000000000000000000000000000000..2d1596ffe16e4d5bdcdf0e1d4322e6667af95962 --- /dev/null +++ b/checkpoint-30/README.md @@ -0,0 +1,202 @@ +--- +library_name: peft +base_model: TheBloke/Llama-2-7B-fp16 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.12.0 \ No newline at end of file diff --git a/checkpoint-30/adapter_config.json b/checkpoint-30/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..cbf93f2809e43fe18fd6ad23406293a68e7f5c98 --- /dev/null +++ b/checkpoint-30/adapter_config.json @@ -0,0 +1,31 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "TheBloke/Llama-2-7B-fp16", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "v_proj", + "o_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-30/adapter_model.safetensors b/checkpoint-30/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2dde285768e1a4318a4612f90065ca618c5bdffc --- /dev/null +++ b/checkpoint-30/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:473c4bd6da7c40c632f6b8a627a42ab5e26f5bc3b977b422287013a48424e372 +size 67143296 diff --git a/checkpoint-30/optimizer.pt b/checkpoint-30/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c08c60df1dba8ab4b623f9330e698d2be010f492 --- /dev/null +++ b/checkpoint-30/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09a69f8968f0b0dc5be6245d8969030cda587b4b349b62c8e5c647bd89875fa2 +size 134433530 diff --git a/checkpoint-30/rng_state.pth b/checkpoint-30/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..9d5d7d167cef9e301898f6825f88ed06664db22b --- /dev/null +++ b/checkpoint-30/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5db3958c8bba66b9da4e7dd565b9b2233d51d750c94fc093ab71c68c8d043d29 +size 14244 diff --git a/checkpoint-30/scheduler.pt b/checkpoint-30/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..fc722270e58bfcbf5b68dd4051aa690f4a07fbeb --- /dev/null +++ b/checkpoint-30/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8a3ad18d2d099317927f68afa34f1d799ef6644ec4025b52033947a650a29e2 +size 1064 diff --git a/checkpoint-30/trainer_state.json b/checkpoint-30/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e6276205b85275a723731d05c8c327a2a3db50b4 --- /dev/null +++ b/checkpoint-30/trainer_state.json @@ -0,0 +1,87 @@ +{ + "best_metric": 1.4176721572875977, + "best_model_checkpoint": "/kaggle/working/checkpoint-30", + "epoch": 3.3333333333333335, + "eval_steps": 10, + "global_step": 30, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.1111111111111112, + "grad_norm": 0.022282764315605164, + "learning_rate": 0.0001851851851851852, + "loss": 2.0424, + "step": 10 + }, + { + "epoch": 1.1111111111111112, + "eval_loss": 1.733155369758606, + "eval_runtime": 34.5543, + "eval_samples_per_second": 1.042, + "eval_steps_per_second": 0.145, + "step": 10 + }, + { + "epoch": 2.2222222222222223, + "grad_norm": 0.018981408327817917, + "learning_rate": 0.00017037037037037037, + "loss": 1.6072, + "step": 20 + }, + { + "epoch": 2.2222222222222223, + "eval_loss": 1.5428930521011353, + "eval_runtime": 34.6485, + "eval_samples_per_second": 1.039, + "eval_steps_per_second": 0.144, + "step": 20 + }, + { + "epoch": 3.3333333333333335, + "grad_norm": 0.023157037794589996, + "learning_rate": 0.00015555555555555556, + "loss": 1.4025, + "step": 30 + }, + { + "epoch": 3.3333333333333335, + "eval_loss": 1.4176721572875977, + "eval_runtime": 34.5433, + "eval_samples_per_second": 1.042, + "eval_steps_per_second": 0.145, + "step": 30 + } + ], + "logging_steps": 10, + "max_steps": 135, + "num_input_tokens_seen": 0, + "num_train_epochs": 15, + "save_steps": 10, + "stateful_callbacks": { + "EarlyStoppingCallback": { + "args": { + "early_stopping_patience": 3, + "early_stopping_threshold": 0.0 + }, + "attributes": { + "early_stopping_patience_counter": 0 + } + }, + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 8007449233588224.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-30/training_args.bin b/checkpoint-30/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..db8c5d32a4f54d0e9ab4e7a985fbaee5d6701ecc --- /dev/null +++ b/checkpoint-30/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79c753ff1ba946038f620bad3e42a35ce583c9e8ed52b49fd22fb6614fea0f43 +size 5112 diff --git a/checkpoint-40/README.md b/checkpoint-40/README.md new file mode 100644 index 0000000000000000000000000000000000000000..2d1596ffe16e4d5bdcdf0e1d4322e6667af95962 --- /dev/null +++ b/checkpoint-40/README.md @@ -0,0 +1,202 @@ +--- +library_name: peft +base_model: TheBloke/Llama-2-7B-fp16 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.12.0 \ No newline at end of file diff --git a/checkpoint-40/adapter_config.json b/checkpoint-40/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..cbf93f2809e43fe18fd6ad23406293a68e7f5c98 --- /dev/null +++ b/checkpoint-40/adapter_config.json @@ -0,0 +1,31 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "TheBloke/Llama-2-7B-fp16", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "v_proj", + "o_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-40/adapter_model.safetensors b/checkpoint-40/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d92565a5b837388e70e09742cd836ed04bcbaafb --- /dev/null +++ b/checkpoint-40/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:801d3ce570c9b9ff575f960af3a00ccd6ba6d98cf418a3b2d0dc14a716f5dc87 +size 67143296 diff --git a/checkpoint-40/optimizer.pt b/checkpoint-40/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..7bbafe6bb0c6557cdfc17a015eac9b12c17845c9 --- /dev/null +++ b/checkpoint-40/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67c5b4e8da000e9b0681d015858d5f844f66483488fdbbc351086278b492e7ad +size 134433530 diff --git a/checkpoint-40/rng_state.pth b/checkpoint-40/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..38dc6b1b002ae88565c9cbb1471af25537930750 --- /dev/null +++ b/checkpoint-40/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30d6e73f3b9fa7d8e374b585bd95a623039b7a42a7db62f2bdba34e6c0b4145f +size 14244 diff --git a/checkpoint-40/scheduler.pt b/checkpoint-40/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..2d7557681fbd1043af6145a07414467654c47a85 --- /dev/null +++ b/checkpoint-40/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb020d6653a8f320452374f8532b2d5f261a1314bca08cdf554ed4cd89610334 +size 1064 diff --git a/checkpoint-40/trainer_state.json b/checkpoint-40/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..86d21c3a21736393bc1efdc29445b1408623b62b --- /dev/null +++ b/checkpoint-40/trainer_state.json @@ -0,0 +1,102 @@ +{ + "best_metric": 1.3449772596359253, + "best_model_checkpoint": "/kaggle/working/checkpoint-40", + "epoch": 4.444444444444445, + "eval_steps": 10, + "global_step": 40, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.1111111111111112, + "grad_norm": 0.022282764315605164, + "learning_rate": 0.0001851851851851852, + "loss": 2.0424, + "step": 10 + }, + { + "epoch": 1.1111111111111112, + "eval_loss": 1.733155369758606, + "eval_runtime": 34.5543, + "eval_samples_per_second": 1.042, + "eval_steps_per_second": 0.145, + "step": 10 + }, + { + "epoch": 2.2222222222222223, + "grad_norm": 0.018981408327817917, + "learning_rate": 0.00017037037037037037, + "loss": 1.6072, + "step": 20 + }, + { + "epoch": 2.2222222222222223, + "eval_loss": 1.5428930521011353, + "eval_runtime": 34.6485, + "eval_samples_per_second": 1.039, + "eval_steps_per_second": 0.144, + "step": 20 + }, + { + "epoch": 3.3333333333333335, + "grad_norm": 0.023157037794589996, + "learning_rate": 0.00015555555555555556, + "loss": 1.4025, + "step": 30 + }, + { + "epoch": 3.3333333333333335, + "eval_loss": 1.4176721572875977, + "eval_runtime": 34.5433, + "eval_samples_per_second": 1.042, + "eval_steps_per_second": 0.145, + "step": 30 + }, + { + "epoch": 4.444444444444445, + "grad_norm": 0.021338749676942825, + "learning_rate": 0.00014074074074074076, + "loss": 1.285, + "step": 40 + }, + { + "epoch": 4.444444444444445, + "eval_loss": 1.3449772596359253, + "eval_runtime": 34.5594, + "eval_samples_per_second": 1.042, + "eval_steps_per_second": 0.145, + "step": 40 + } + ], + "logging_steps": 10, + "max_steps": 135, + "num_input_tokens_seen": 0, + "num_train_epochs": 15, + "save_steps": 10, + "stateful_callbacks": { + "EarlyStoppingCallback": { + "args": { + "early_stopping_patience": 3, + "early_stopping_threshold": 0.0 + }, + "attributes": { + "early_stopping_patience_counter": 0 + } + }, + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.0684017993449472e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-40/training_args.bin b/checkpoint-40/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..db8c5d32a4f54d0e9ab4e7a985fbaee5d6701ecc --- /dev/null +++ b/checkpoint-40/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79c753ff1ba946038f620bad3e42a35ce583c9e8ed52b49fd22fb6614fea0f43 +size 5112 diff --git a/checkpoint-50/README.md b/checkpoint-50/README.md new file mode 100644 index 0000000000000000000000000000000000000000..2d1596ffe16e4d5bdcdf0e1d4322e6667af95962 --- /dev/null +++ b/checkpoint-50/README.md @@ -0,0 +1,202 @@ +--- +library_name: peft +base_model: TheBloke/Llama-2-7B-fp16 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.12.0 \ No newline at end of file diff --git a/checkpoint-50/adapter_config.json b/checkpoint-50/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..cbf93f2809e43fe18fd6ad23406293a68e7f5c98 --- /dev/null +++ b/checkpoint-50/adapter_config.json @@ -0,0 +1,31 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "TheBloke/Llama-2-7B-fp16", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "v_proj", + "o_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-50/adapter_model.safetensors b/checkpoint-50/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d0691d9e050f0256809f29f86e24616a0fc291e2 --- /dev/null +++ b/checkpoint-50/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:462ccb4de1a6868b9c314278aa410416a2271f7f6242771f282ecb17db91aa9a +size 67143296 diff --git a/checkpoint-50/optimizer.pt b/checkpoint-50/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..ce706c076113ef5a730068e774f4cb258e862101 --- /dev/null +++ b/checkpoint-50/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cc1bc1ee11e2e518af04ea6813d8a018c28d601435ab40fb94b4429e7463a6f +size 134433530 diff --git a/checkpoint-50/rng_state.pth b/checkpoint-50/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..eaa5f8c2b84b2e8b46fcadd43baec18ad8bfbbc7 --- /dev/null +++ b/checkpoint-50/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37f759d78abce0f2d67fdd5ef876de5fc9f78841bcec9f9875cf9d75dc7947b6 +size 14244 diff --git a/checkpoint-50/scheduler.pt b/checkpoint-50/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..66f83e8d1a29d1712842e93ebeca9509a1bf2fb0 --- /dev/null +++ b/checkpoint-50/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:965332493d5d51377ce3dafe8e14c60e285cd11a8955550ce87e8ef7114ed890 +size 1064 diff --git a/checkpoint-50/trainer_state.json b/checkpoint-50/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b4358c39fda6ddc437753ee9371e20b2c8bb340b --- /dev/null +++ b/checkpoint-50/trainer_state.json @@ -0,0 +1,117 @@ +{ + "best_metric": 1.2951068878173828, + "best_model_checkpoint": "/kaggle/working/checkpoint-50", + "epoch": 5.555555555555555, + "eval_steps": 10, + "global_step": 50, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.1111111111111112, + "grad_norm": 0.022282764315605164, + "learning_rate": 0.0001851851851851852, + "loss": 2.0424, + "step": 10 + }, + { + "epoch": 1.1111111111111112, + "eval_loss": 1.733155369758606, + "eval_runtime": 34.5543, + "eval_samples_per_second": 1.042, + "eval_steps_per_second": 0.145, + "step": 10 + }, + { + "epoch": 2.2222222222222223, + "grad_norm": 0.018981408327817917, + "learning_rate": 0.00017037037037037037, + "loss": 1.6072, + "step": 20 + }, + { + "epoch": 2.2222222222222223, + "eval_loss": 1.5428930521011353, + "eval_runtime": 34.6485, + "eval_samples_per_second": 1.039, + "eval_steps_per_second": 0.144, + "step": 20 + }, + { + "epoch": 3.3333333333333335, + "grad_norm": 0.023157037794589996, + "learning_rate": 0.00015555555555555556, + "loss": 1.4025, + "step": 30 + }, + { + "epoch": 3.3333333333333335, + "eval_loss": 1.4176721572875977, + "eval_runtime": 34.5433, + "eval_samples_per_second": 1.042, + "eval_steps_per_second": 0.145, + "step": 30 + }, + { + "epoch": 4.444444444444445, + "grad_norm": 0.021338749676942825, + "learning_rate": 0.00014074074074074076, + "loss": 1.285, + "step": 40 + }, + { + "epoch": 4.444444444444445, + "eval_loss": 1.3449772596359253, + "eval_runtime": 34.5594, + "eval_samples_per_second": 1.042, + "eval_steps_per_second": 0.145, + "step": 40 + }, + { + "epoch": 5.555555555555555, + "grad_norm": 0.02489505708217621, + "learning_rate": 0.00012592592592592592, + "loss": 1.1687, + "step": 50 + }, + { + "epoch": 5.555555555555555, + "eval_loss": 1.2951068878173828, + "eval_runtime": 34.5896, + "eval_samples_per_second": 1.041, + "eval_steps_per_second": 0.145, + "step": 50 + } + ], + "logging_steps": 10, + "max_steps": 135, + "num_input_tokens_seen": 0, + "num_train_epochs": 15, + "save_steps": 10, + "stateful_callbacks": { + "EarlyStoppingCallback": { + "args": { + "early_stopping_patience": 3, + "early_stopping_threshold": 0.0 + }, + "attributes": { + "early_stopping_patience_counter": 0 + } + }, + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.3359632879910912e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-50/training_args.bin b/checkpoint-50/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..db8c5d32a4f54d0e9ab4e7a985fbaee5d6701ecc --- /dev/null +++ b/checkpoint-50/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79c753ff1ba946038f620bad3e42a35ce583c9e8ed52b49fd22fb6614fea0f43 +size 5112 diff --git a/checkpoint-60/README.md b/checkpoint-60/README.md new file mode 100644 index 0000000000000000000000000000000000000000..2d1596ffe16e4d5bdcdf0e1d4322e6667af95962 --- /dev/null +++ b/checkpoint-60/README.md @@ -0,0 +1,202 @@ +--- +library_name: peft +base_model: TheBloke/Llama-2-7B-fp16 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.12.0 \ No newline at end of file diff --git a/checkpoint-60/adapter_config.json b/checkpoint-60/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..cbf93f2809e43fe18fd6ad23406293a68e7f5c98 --- /dev/null +++ b/checkpoint-60/adapter_config.json @@ -0,0 +1,31 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "TheBloke/Llama-2-7B-fp16", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "v_proj", + "o_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-60/adapter_model.safetensors b/checkpoint-60/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e572cbaec375d03394cd3c280f91fac11a3fc2b0 --- /dev/null +++ b/checkpoint-60/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db2917762f8b15e3bf37ffb24d36fadc8281664684350c104e7663e270ea20dc +size 67143296 diff --git a/checkpoint-60/optimizer.pt b/checkpoint-60/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6056f4627bc13603632015486db18c9a4c53a353 --- /dev/null +++ b/checkpoint-60/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6ee00713e8f75fec9ac92a487fe43f23b824b8125bd0f0413507f283ce38111 +size 134433530 diff --git a/checkpoint-60/rng_state.pth b/checkpoint-60/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..da55a8c8ec9c90fd906c57775dafb08d822354ee --- /dev/null +++ b/checkpoint-60/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25d70730d443178b00076ddaa68745a66875c06caff14848ea6edca1cd7e460c +size 14244 diff --git a/checkpoint-60/scheduler.pt b/checkpoint-60/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..26adfd2b99d76fb3c7ae41002cae269cd882ee38 --- /dev/null +++ b/checkpoint-60/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2dae0587e75ddf657c6d2f1bfc77ac82e3e62449951f53cbb9a382f8e039b79 +size 1064 diff --git a/checkpoint-60/trainer_state.json b/checkpoint-60/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..7a12784894d4693dd4e1e7b8da2c088a7e613802 --- /dev/null +++ b/checkpoint-60/trainer_state.json @@ -0,0 +1,132 @@ +{ + "best_metric": 1.2674343585968018, + "best_model_checkpoint": "/kaggle/working/checkpoint-60", + "epoch": 6.666666666666667, + "eval_steps": 10, + "global_step": 60, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.1111111111111112, + "grad_norm": 0.022282764315605164, + "learning_rate": 0.0001851851851851852, + "loss": 2.0424, + "step": 10 + }, + { + "epoch": 1.1111111111111112, + "eval_loss": 1.733155369758606, + "eval_runtime": 34.5543, + "eval_samples_per_second": 1.042, + "eval_steps_per_second": 0.145, + "step": 10 + }, + { + "epoch": 2.2222222222222223, + "grad_norm": 0.018981408327817917, + "learning_rate": 0.00017037037037037037, + "loss": 1.6072, + "step": 20 + }, + { + "epoch": 2.2222222222222223, + "eval_loss": 1.5428930521011353, + "eval_runtime": 34.6485, + "eval_samples_per_second": 1.039, + "eval_steps_per_second": 0.144, + "step": 20 + }, + { + "epoch": 3.3333333333333335, + "grad_norm": 0.023157037794589996, + "learning_rate": 0.00015555555555555556, + "loss": 1.4025, + "step": 30 + }, + { + "epoch": 3.3333333333333335, + "eval_loss": 1.4176721572875977, + "eval_runtime": 34.5433, + "eval_samples_per_second": 1.042, + "eval_steps_per_second": 0.145, + "step": 30 + }, + { + "epoch": 4.444444444444445, + "grad_norm": 0.021338749676942825, + "learning_rate": 0.00014074074074074076, + "loss": 1.285, + "step": 40 + }, + { + "epoch": 4.444444444444445, + "eval_loss": 1.3449772596359253, + "eval_runtime": 34.5594, + "eval_samples_per_second": 1.042, + "eval_steps_per_second": 0.145, + "step": 40 + }, + { + "epoch": 5.555555555555555, + "grad_norm": 0.02489505708217621, + "learning_rate": 0.00012592592592592592, + "loss": 1.1687, + "step": 50 + }, + { + "epoch": 5.555555555555555, + "eval_loss": 1.2951068878173828, + "eval_runtime": 34.5896, + "eval_samples_per_second": 1.041, + "eval_steps_per_second": 0.145, + "step": 50 + }, + { + "epoch": 6.666666666666667, + "grad_norm": 0.028962766751646996, + "learning_rate": 0.00011111111111111112, + "loss": 1.0521, + "step": 60 + }, + { + "epoch": 6.666666666666667, + "eval_loss": 1.2674343585968018, + "eval_runtime": 34.5586, + "eval_samples_per_second": 1.042, + "eval_steps_per_second": 0.145, + "step": 60 + } + ], + "logging_steps": 10, + "max_steps": 135, + "num_input_tokens_seen": 0, + "num_train_epochs": 15, + "save_steps": 10, + "stateful_callbacks": { + "EarlyStoppingCallback": { + "args": { + "early_stopping_patience": 3, + "early_stopping_threshold": 0.0 + }, + "attributes": { + "early_stopping_patience_counter": 0 + } + }, + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.6036519597572096e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-60/training_args.bin b/checkpoint-60/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..db8c5d32a4f54d0e9ab4e7a985fbaee5d6701ecc --- /dev/null +++ b/checkpoint-60/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79c753ff1ba946038f620bad3e42a35ce583c9e8ed52b49fd22fb6614fea0f43 +size 5112 diff --git a/checkpoint-70/README.md b/checkpoint-70/README.md new file mode 100644 index 0000000000000000000000000000000000000000..2d1596ffe16e4d5bdcdf0e1d4322e6667af95962 --- /dev/null +++ b/checkpoint-70/README.md @@ -0,0 +1,202 @@ +--- +library_name: peft +base_model: TheBloke/Llama-2-7B-fp16 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.12.0 \ No newline at end of file diff --git a/checkpoint-70/adapter_config.json b/checkpoint-70/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..cbf93f2809e43fe18fd6ad23406293a68e7f5c98 --- /dev/null +++ b/checkpoint-70/adapter_config.json @@ -0,0 +1,31 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "TheBloke/Llama-2-7B-fp16", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "v_proj", + "o_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-70/adapter_model.safetensors b/checkpoint-70/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7ed56b59d76b6a1da2bf2568dbfab45abde9d58f --- /dev/null +++ b/checkpoint-70/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf3c37297ed626b47ab9c41ebb2a26326a24e606918ef7c7fd629793854a6799 +size 67143296 diff --git a/checkpoint-70/optimizer.pt b/checkpoint-70/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..28208764b805c1768aa12d977465b294ef889535 --- /dev/null +++ b/checkpoint-70/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4df5d5144e456b9ea6bf7bd481d07a757eea5bf300085855555e34b8b031648 +size 134433530 diff --git a/checkpoint-70/rng_state.pth b/checkpoint-70/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e586194fc1ef6abc2c5f56248ded9928618df599 --- /dev/null +++ b/checkpoint-70/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:372756df51e53230a016e7ffa865a7bc1529480107042eb1599dcd5226d1997b +size 14244 diff --git a/checkpoint-70/scheduler.pt b/checkpoint-70/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..7d715f69ec0008c942a0c82d8c13ce801a3ab1c3 --- /dev/null +++ b/checkpoint-70/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b4acdcca4bce1d839ad3b2f3830a589ce711414ff4548cbd4704149e66a014f +size 1064 diff --git a/checkpoint-70/trainer_state.json b/checkpoint-70/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3a48bb1be57349a537b049969fba97285f69e7b7 --- /dev/null +++ b/checkpoint-70/trainer_state.json @@ -0,0 +1,147 @@ +{ + "best_metric": 1.2424466609954834, + "best_model_checkpoint": "/kaggle/working/checkpoint-70", + "epoch": 7.777777777777778, + "eval_steps": 10, + "global_step": 70, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.1111111111111112, + "grad_norm": 0.022282764315605164, + "learning_rate": 0.0001851851851851852, + "loss": 2.0424, + "step": 10 + }, + { + "epoch": 1.1111111111111112, + "eval_loss": 1.733155369758606, + "eval_runtime": 34.5543, + "eval_samples_per_second": 1.042, + "eval_steps_per_second": 0.145, + "step": 10 + }, + { + "epoch": 2.2222222222222223, + "grad_norm": 0.018981408327817917, + "learning_rate": 0.00017037037037037037, + "loss": 1.6072, + "step": 20 + }, + { + "epoch": 2.2222222222222223, + "eval_loss": 1.5428930521011353, + "eval_runtime": 34.6485, + "eval_samples_per_second": 1.039, + "eval_steps_per_second": 0.144, + "step": 20 + }, + { + "epoch": 3.3333333333333335, + "grad_norm": 0.023157037794589996, + "learning_rate": 0.00015555555555555556, + "loss": 1.4025, + "step": 30 + }, + { + "epoch": 3.3333333333333335, + "eval_loss": 1.4176721572875977, + "eval_runtime": 34.5433, + "eval_samples_per_second": 1.042, + "eval_steps_per_second": 0.145, + "step": 30 + }, + { + "epoch": 4.444444444444445, + "grad_norm": 0.021338749676942825, + "learning_rate": 0.00014074074074074076, + "loss": 1.285, + "step": 40 + }, + { + "epoch": 4.444444444444445, + "eval_loss": 1.3449772596359253, + "eval_runtime": 34.5594, + "eval_samples_per_second": 1.042, + "eval_steps_per_second": 0.145, + "step": 40 + }, + { + "epoch": 5.555555555555555, + "grad_norm": 0.02489505708217621, + "learning_rate": 0.00012592592592592592, + "loss": 1.1687, + "step": 50 + }, + { + "epoch": 5.555555555555555, + "eval_loss": 1.2951068878173828, + "eval_runtime": 34.5896, + "eval_samples_per_second": 1.041, + "eval_steps_per_second": 0.145, + "step": 50 + }, + { + "epoch": 6.666666666666667, + "grad_norm": 0.028962766751646996, + "learning_rate": 0.00011111111111111112, + "loss": 1.0521, + "step": 60 + }, + { + "epoch": 6.666666666666667, + "eval_loss": 1.2674343585968018, + "eval_runtime": 34.5586, + "eval_samples_per_second": 1.042, + "eval_steps_per_second": 0.145, + "step": 60 + }, + { + "epoch": 7.777777777777778, + "grad_norm": 0.033917125314474106, + "learning_rate": 9.62962962962963e-05, + "loss": 0.9885, + "step": 70 + }, + { + "epoch": 7.777777777777778, + "eval_loss": 1.2424466609954834, + "eval_runtime": 34.5412, + "eval_samples_per_second": 1.042, + "eval_steps_per_second": 0.145, + "step": 70 + } + ], + "logging_steps": 10, + "max_steps": 135, + "num_input_tokens_seen": 0, + "num_train_epochs": 15, + "save_steps": 10, + "stateful_callbacks": { + "EarlyStoppingCallback": { + "args": { + "early_stopping_patience": 3, + "early_stopping_threshold": 0.0 + }, + "attributes": { + "early_stopping_patience_counter": 0 + } + }, + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.8705775328034816e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-70/training_args.bin b/checkpoint-70/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..db8c5d32a4f54d0e9ab4e7a985fbaee5d6701ecc --- /dev/null +++ b/checkpoint-70/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79c753ff1ba946038f620bad3e42a35ce583c9e8ed52b49fd22fb6614fea0f43 +size 5112 diff --git a/checkpoint-80/README.md b/checkpoint-80/README.md new file mode 100644 index 0000000000000000000000000000000000000000..2d1596ffe16e4d5bdcdf0e1d4322e6667af95962 --- /dev/null +++ b/checkpoint-80/README.md @@ -0,0 +1,202 @@ +--- +library_name: peft +base_model: TheBloke/Llama-2-7B-fp16 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.12.0 \ No newline at end of file diff --git a/checkpoint-80/adapter_config.json b/checkpoint-80/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..cbf93f2809e43fe18fd6ad23406293a68e7f5c98 --- /dev/null +++ b/checkpoint-80/adapter_config.json @@ -0,0 +1,31 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "TheBloke/Llama-2-7B-fp16", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "v_proj", + "o_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-80/adapter_model.safetensors b/checkpoint-80/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6186d5290dd28ae7e99b0fe968a081ab6d21d5b9 --- /dev/null +++ b/checkpoint-80/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fa6fb27f2616ec681d67ea4bdbe8a2579fb38adfa9fe8d06fc9c19524e1da7b +size 67143296 diff --git a/checkpoint-80/optimizer.pt b/checkpoint-80/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6eb9649fce4390a4b3a402652bfce39cc3c173ea --- /dev/null +++ b/checkpoint-80/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ef989a76ea9a861bed593cbd65072f76fa28bbc0ec71984d26dd8c6e0775b98 +size 134433530 diff --git a/checkpoint-80/rng_state.pth b/checkpoint-80/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..bc3745cef029c90705ca84f2589bfb1f43715a7e --- /dev/null +++ b/checkpoint-80/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1145244a3fab5b36667f9c722b9e7a9ea285e3d07028c024cbf3acc4e83c92db +size 14244 diff --git a/checkpoint-80/scheduler.pt b/checkpoint-80/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..0440fcc5c6c0ee35644344e92c7f5db3e29f3de6 --- /dev/null +++ b/checkpoint-80/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b82529da49a74f66d08569617b44d465bfa94ebb75128eba7c218e73a755220 +size 1064 diff --git a/checkpoint-80/trainer_state.json b/checkpoint-80/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9f89f0b6522c39b15eff7ee5969d94a70d12d7ba --- /dev/null +++ b/checkpoint-80/trainer_state.json @@ -0,0 +1,162 @@ +{ + "best_metric": 1.2252851724624634, + "best_model_checkpoint": "/kaggle/working/checkpoint-80", + "epoch": 8.88888888888889, + "eval_steps": 10, + "global_step": 80, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.1111111111111112, + "grad_norm": 0.022282764315605164, + "learning_rate": 0.0001851851851851852, + "loss": 2.0424, + "step": 10 + }, + { + "epoch": 1.1111111111111112, + "eval_loss": 1.733155369758606, + "eval_runtime": 34.5543, + "eval_samples_per_second": 1.042, + "eval_steps_per_second": 0.145, + "step": 10 + }, + { + "epoch": 2.2222222222222223, + "grad_norm": 0.018981408327817917, + "learning_rate": 0.00017037037037037037, + "loss": 1.6072, + "step": 20 + }, + { + "epoch": 2.2222222222222223, + "eval_loss": 1.5428930521011353, + "eval_runtime": 34.6485, + "eval_samples_per_second": 1.039, + "eval_steps_per_second": 0.144, + "step": 20 + }, + { + "epoch": 3.3333333333333335, + "grad_norm": 0.023157037794589996, + "learning_rate": 0.00015555555555555556, + "loss": 1.4025, + "step": 30 + }, + { + "epoch": 3.3333333333333335, + "eval_loss": 1.4176721572875977, + "eval_runtime": 34.5433, + "eval_samples_per_second": 1.042, + "eval_steps_per_second": 0.145, + "step": 30 + }, + { + "epoch": 4.444444444444445, + "grad_norm": 0.021338749676942825, + "learning_rate": 0.00014074074074074076, + "loss": 1.285, + "step": 40 + }, + { + "epoch": 4.444444444444445, + "eval_loss": 1.3449772596359253, + "eval_runtime": 34.5594, + "eval_samples_per_second": 1.042, + "eval_steps_per_second": 0.145, + "step": 40 + }, + { + "epoch": 5.555555555555555, + "grad_norm": 0.02489505708217621, + "learning_rate": 0.00012592592592592592, + "loss": 1.1687, + "step": 50 + }, + { + "epoch": 5.555555555555555, + "eval_loss": 1.2951068878173828, + "eval_runtime": 34.5896, + "eval_samples_per_second": 1.041, + "eval_steps_per_second": 0.145, + "step": 50 + }, + { + "epoch": 6.666666666666667, + "grad_norm": 0.028962766751646996, + "learning_rate": 0.00011111111111111112, + "loss": 1.0521, + "step": 60 + }, + { + "epoch": 6.666666666666667, + "eval_loss": 1.2674343585968018, + "eval_runtime": 34.5586, + "eval_samples_per_second": 1.042, + "eval_steps_per_second": 0.145, + "step": 60 + }, + { + "epoch": 7.777777777777778, + "grad_norm": 0.033917125314474106, + "learning_rate": 9.62962962962963e-05, + "loss": 0.9885, + "step": 70 + }, + { + "epoch": 7.777777777777778, + "eval_loss": 1.2424466609954834, + "eval_runtime": 34.5412, + "eval_samples_per_second": 1.042, + "eval_steps_per_second": 0.145, + "step": 70 + }, + { + "epoch": 8.88888888888889, + "grad_norm": 0.03393130004405975, + "learning_rate": 8.148148148148148e-05, + "loss": 0.8784, + "step": 80 + }, + { + "epoch": 8.88888888888889, + "eval_loss": 1.2252851724624634, + "eval_runtime": 34.58, + "eval_samples_per_second": 1.041, + "eval_steps_per_second": 0.145, + "step": 80 + } + ], + "logging_steps": 10, + "max_steps": 135, + "num_input_tokens_seen": 0, + "num_train_epochs": 15, + "save_steps": 10, + "stateful_callbacks": { + "EarlyStoppingCallback": { + "args": { + "early_stopping_patience": 3, + "early_stopping_threshold": 0.0 + }, + "attributes": { + "early_stopping_patience_counter": 0 + } + }, + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.137153352269824e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-80/training_args.bin b/checkpoint-80/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..db8c5d32a4f54d0e9ab4e7a985fbaee5d6701ecc --- /dev/null +++ b/checkpoint-80/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79c753ff1ba946038f620bad3e42a35ce583c9e8ed52b49fd22fb6614fea0f43 +size 5112 diff --git a/checkpoint-90/README.md b/checkpoint-90/README.md new file mode 100644 index 0000000000000000000000000000000000000000..2d1596ffe16e4d5bdcdf0e1d4322e6667af95962 --- /dev/null +++ b/checkpoint-90/README.md @@ -0,0 +1,202 @@ +--- +library_name: peft +base_model: TheBloke/Llama-2-7B-fp16 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.12.0 \ No newline at end of file diff --git a/checkpoint-90/adapter_config.json b/checkpoint-90/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..cbf93f2809e43fe18fd6ad23406293a68e7f5c98 --- /dev/null +++ b/checkpoint-90/adapter_config.json @@ -0,0 +1,31 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "TheBloke/Llama-2-7B-fp16", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "v_proj", + "o_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-90/adapter_model.safetensors b/checkpoint-90/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..464aecd7c193abd91aa5ac6a99ad4747551b506f --- /dev/null +++ b/checkpoint-90/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9c5a0c100f8bc851b10aa9fa27f71bf6aa477d8c0d7b7747dc10050dbdbe8a9 +size 67143296 diff --git a/checkpoint-90/optimizer.pt b/checkpoint-90/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0d21653ed911ba9a59aa948e3d510b5ca26671a9 --- /dev/null +++ b/checkpoint-90/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56a16c09a2c26f51affd932a4f61ddef77f90f2764342dfcc95095b133132790 +size 134433530 diff --git a/checkpoint-90/rng_state.pth b/checkpoint-90/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..554718b706d63a3f48155e8c454f03cbb05b6703 --- /dev/null +++ b/checkpoint-90/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:646be11220bb4209bcc0cd870eda869caa0d4b0e01b9b5c90750d00e50c5a8bc +size 14244 diff --git a/checkpoint-90/scheduler.pt b/checkpoint-90/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..96950aa94dbaaaa48fc31ffce5aa897ea65e909c --- /dev/null +++ b/checkpoint-90/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:beb0291b44310791547f87f6141cea4568622e9ffdad94f4d68fcee857c2fabe +size 1064 diff --git a/checkpoint-90/trainer_state.json b/checkpoint-90/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..85a9b74cd54c9d40c81660a5a9479c8041b34cda --- /dev/null +++ b/checkpoint-90/trainer_state.json @@ -0,0 +1,177 @@ +{ + "best_metric": 1.2115424871444702, + "best_model_checkpoint": "/kaggle/working/checkpoint-90", + "epoch": 10.0, + "eval_steps": 10, + "global_step": 90, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.1111111111111112, + "grad_norm": 0.022282764315605164, + "learning_rate": 0.0001851851851851852, + "loss": 2.0424, + "step": 10 + }, + { + "epoch": 1.1111111111111112, + "eval_loss": 1.733155369758606, + "eval_runtime": 34.5543, + "eval_samples_per_second": 1.042, + "eval_steps_per_second": 0.145, + "step": 10 + }, + { + "epoch": 2.2222222222222223, + "grad_norm": 0.018981408327817917, + "learning_rate": 0.00017037037037037037, + "loss": 1.6072, + "step": 20 + }, + { + "epoch": 2.2222222222222223, + "eval_loss": 1.5428930521011353, + "eval_runtime": 34.6485, + "eval_samples_per_second": 1.039, + "eval_steps_per_second": 0.144, + "step": 20 + }, + { + "epoch": 3.3333333333333335, + "grad_norm": 0.023157037794589996, + "learning_rate": 0.00015555555555555556, + "loss": 1.4025, + "step": 30 + }, + { + "epoch": 3.3333333333333335, + "eval_loss": 1.4176721572875977, + "eval_runtime": 34.5433, + "eval_samples_per_second": 1.042, + "eval_steps_per_second": 0.145, + "step": 30 + }, + { + "epoch": 4.444444444444445, + "grad_norm": 0.021338749676942825, + "learning_rate": 0.00014074074074074076, + "loss": 1.285, + "step": 40 + }, + { + "epoch": 4.444444444444445, + "eval_loss": 1.3449772596359253, + "eval_runtime": 34.5594, + "eval_samples_per_second": 1.042, + "eval_steps_per_second": 0.145, + "step": 40 + }, + { + "epoch": 5.555555555555555, + "grad_norm": 0.02489505708217621, + "learning_rate": 0.00012592592592592592, + "loss": 1.1687, + "step": 50 + }, + { + "epoch": 5.555555555555555, + "eval_loss": 1.2951068878173828, + "eval_runtime": 34.5896, + "eval_samples_per_second": 1.041, + "eval_steps_per_second": 0.145, + "step": 50 + }, + { + "epoch": 6.666666666666667, + "grad_norm": 0.028962766751646996, + "learning_rate": 0.00011111111111111112, + "loss": 1.0521, + "step": 60 + }, + { + "epoch": 6.666666666666667, + "eval_loss": 1.2674343585968018, + "eval_runtime": 34.5586, + "eval_samples_per_second": 1.042, + "eval_steps_per_second": 0.145, + "step": 60 + }, + { + "epoch": 7.777777777777778, + "grad_norm": 0.033917125314474106, + "learning_rate": 9.62962962962963e-05, + "loss": 0.9885, + "step": 70 + }, + { + "epoch": 7.777777777777778, + "eval_loss": 1.2424466609954834, + "eval_runtime": 34.5412, + "eval_samples_per_second": 1.042, + "eval_steps_per_second": 0.145, + "step": 70 + }, + { + "epoch": 8.88888888888889, + "grad_norm": 0.03393130004405975, + "learning_rate": 8.148148148148148e-05, + "loss": 0.8784, + "step": 80 + }, + { + "epoch": 8.88888888888889, + "eval_loss": 1.2252851724624634, + "eval_runtime": 34.58, + "eval_samples_per_second": 1.041, + "eval_steps_per_second": 0.145, + "step": 80 + }, + { + "epoch": 10.0, + "grad_norm": 0.04081139340996742, + "learning_rate": 6.666666666666667e-05, + "loss": 0.8154, + "step": 90 + }, + { + "epoch": 10.0, + "eval_loss": 1.2115424871444702, + "eval_runtime": 34.5784, + "eval_samples_per_second": 1.041, + "eval_steps_per_second": 0.145, + "step": 90 + } + ], + "logging_steps": 10, + "max_steps": 135, + "num_input_tokens_seen": 0, + "num_train_epochs": 15, + "save_steps": 10, + "stateful_callbacks": { + "EarlyStoppingCallback": { + "args": { + "early_stopping_patience": 3, + "early_stopping_threshold": 0.0 + }, + "attributes": { + "early_stopping_patience_counter": 0 + } + }, + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.404714840915968e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-90/training_args.bin b/checkpoint-90/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..db8c5d32a4f54d0e9ab4e7a985fbaee5d6701ecc --- /dev/null +++ b/checkpoint-90/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79c753ff1ba946038f620bad3e42a35ce583c9e8ed52b49fd22fb6614fea0f43 +size 5112 diff --git a/config.json b/config.json index 258a5dd33a9cd50769867ae18e3f1c8164c04074..80afc097aeac0205c9feeb402b87040f5d89be74 100644 --- a/config.json +++ b/config.json @@ -39,7 +39,7 @@ "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", - "transformers_version": "4.42.3", + "transformers_version": "4.41.2", "use_cache": true, "vocab_size": 32000 } diff --git a/runs/Jul31_19-17-11_5ac8ef822596/events.out.tfevents.1722453432.5ac8ef822596.34.0 b/runs/Jul31_19-17-11_5ac8ef822596/events.out.tfevents.1722453432.5ac8ef822596.34.0 new file mode 100644 index 0000000000000000000000000000000000000000..c54b0cd9d64ed9be791c6e49c9ffdba2c04b17f8 --- /dev/null +++ b/runs/Jul31_19-17-11_5ac8ef822596/events.out.tfevents.1722453432.5ac8ef822596.34.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5f7bb873641ef66789d00130eb603f6040b919bc37acfddcae0830bb8a51487 +size 11877