diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..4772e5b625535584ec24c83bbcb79dba9cc131b4 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,23 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +llama_Chinese_English/checkpoint-200/tokenizer.json filter=lfs diff=lfs merge=lfs -text +llama_Chinese_French/checkpoint-200/tokenizer.json filter=lfs diff=lfs merge=lfs -text +llama_Chinese_German/checkpoint-200/tokenizer.json filter=lfs diff=lfs merge=lfs -text +llama_Chinese_Vietnamese/checkpoint-200/tokenizer.json filter=lfs diff=lfs merge=lfs -text +llama_English_Chinese/checkpoint-300/tokenizer.json filter=lfs diff=lfs merge=lfs -text +llama_English_French/checkpoint-300/tokenizer.json filter=lfs diff=lfs merge=lfs -text +llama_English_German/checkpoint-300/tokenizer.json filter=lfs diff=lfs merge=lfs -text +llama_English_Vietnamese/checkpoint-300/tokenizer.json filter=lfs diff=lfs merge=lfs -text +llama_French_Chinese/checkpoint-200/tokenizer.json filter=lfs diff=lfs merge=lfs -text +llama_French_English/checkpoint-200/tokenizer.json filter=lfs diff=lfs merge=lfs -text +llama_French_German/checkpoint-200/tokenizer.json filter=lfs diff=lfs merge=lfs -text +llama_French_Vietnamese/checkpoint-200/tokenizer.json filter=lfs diff=lfs merge=lfs -text +llama_German_Chinese/checkpoint-200/tokenizer.json filter=lfs diff=lfs merge=lfs -text +llama_German_English/checkpoint-200/tokenizer.json filter=lfs diff=lfs merge=lfs -text +llama_German_French/checkpoint-200/tokenizer.json filter=lfs diff=lfs merge=lfs -text +llama_German_Vietnamese/checkpoint-200/tokenizer.json filter=lfs diff=lfs merge=lfs -text +llama_Vietnamese_Chinese/checkpoint-200/tokenizer.json filter=lfs diff=lfs merge=lfs -text +llama_Vietnamese_English/checkpoint-200/tokenizer.json filter=lfs diff=lfs merge=lfs -text +llama_Vietnamese_French/checkpoint-200/tokenizer.json filter=lfs diff=lfs merge=lfs -text +llama_Vietnamese_German/checkpoint-200/tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/llama_Chinese_English/checkpoint-200/README.md b/llama_Chinese_English/checkpoint-200/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ec91fdd050e2a5613ef04bb6c8fc77a63b8b10a1 --- /dev/null +++ b/llama_Chinese_English/checkpoint-200/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.14.0 \ No newline at end of file diff --git a/llama_Chinese_English/checkpoint-200/adapter_config.json b/llama_Chinese_English/checkpoint-200/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b080b6a947e5b6f68bcf4f14471d4372ee8ffc7e --- /dev/null +++ b/llama_Chinese_English/checkpoint-200/adapter_config.json @@ -0,0 +1,37 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit", + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "gate_proj", + "o_proj", + "down_proj", + "k_proj", + "up_proj", + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/llama_Chinese_English/checkpoint-200/adapter_model.safetensors b/llama_Chinese_English/checkpoint-200/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c531135d37639fb255f27a17697875e710ad3f08 --- /dev/null +++ b/llama_Chinese_English/checkpoint-200/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d18e1242104445ef2d5a9ad1d14375a0b4d94fb2d25d947d42e2fabd1f090ba +size 167832240 diff --git a/llama_Chinese_English/checkpoint-200/optimizer.pt b/llama_Chinese_English/checkpoint-200/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..cffd8b9f33dd81b2b1cae926df0c3c078dbe07c3 --- /dev/null +++ b/llama_Chinese_English/checkpoint-200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99dc4c3753e84cb24d1c6e79212e76a3a6c773038e270254f1136f670ae78b5a +size 85723284 diff --git a/llama_Chinese_English/checkpoint-200/rng_state.pth b/llama_Chinese_English/checkpoint-200/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..f21f2ff1c1a816463781d51760f8156e041f5979 --- /dev/null +++ b/llama_Chinese_English/checkpoint-200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4981123ff3cf7bd5b7f76839e90e4776f747ca4c38dcb41876fa010c0dea8b23 +size 14244 diff --git a/llama_Chinese_English/checkpoint-200/scheduler.pt b/llama_Chinese_English/checkpoint-200/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6ad11d6d5288d2841ba96a6d7d3838c80c80097a --- /dev/null +++ b/llama_Chinese_English/checkpoint-200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96 +size 1064 diff --git a/llama_Chinese_English/checkpoint-200/special_tokens_map.json b/llama_Chinese_English/checkpoint-200/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/llama_Chinese_English/checkpoint-200/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/llama_Chinese_English/checkpoint-200/tokenizer.json b/llama_Chinese_English/checkpoint-200/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/llama_Chinese_English/checkpoint-200/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/llama_Chinese_English/checkpoint-200/tokenizer_config.json b/llama_Chinese_English/checkpoint-200/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..447bd30f21e8c20e9e6497d6cfc650a3881e26a6 --- /dev/null +++ b/llama_Chinese_English/checkpoint-200/tokenizer_config.json @@ -0,0 +1,2064 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/llama_Chinese_English/checkpoint-200/trainer_state.json b/llama_Chinese_English/checkpoint-200/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..094e4b18e77ac9b2d3ae4252544d384df10fde5b --- /dev/null +++ b/llama_Chinese_English/checkpoint-200/trainer_state.json @@ -0,0 +1,1433 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.5787781350482315, + "eval_steps": 500, + "global_step": 200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.012861736334405145, + "grad_norm": 1.1440598964691162, + "learning_rate": 0.0001, + "loss": 2.6398, + "step": 1 + }, + { + "epoch": 0.02572347266881029, + "grad_norm": 1.078170657157898, + "learning_rate": 9.949748743718594e-05, + "loss": 2.6721, + "step": 2 + }, + { + "epoch": 0.03858520900321544, + "grad_norm": 0.9559459686279297, + "learning_rate": 9.899497487437186e-05, + "loss": 2.6911, + "step": 3 + }, + { + "epoch": 0.05144694533762058, + "grad_norm": 1.0017387866973877, + "learning_rate": 9.84924623115578e-05, + "loss": 2.479, + "step": 4 + }, + { + "epoch": 0.06430868167202572, + "grad_norm": 1.0636959075927734, + "learning_rate": 9.798994974874372e-05, + "loss": 2.335, + "step": 5 + }, + { + "epoch": 0.07717041800643087, + "grad_norm": 1.0521775484085083, + "learning_rate": 9.748743718592965e-05, + "loss": 2.1896, + "step": 6 + }, + { + "epoch": 0.09003215434083602, + "grad_norm": 1.0728856325149536, + "learning_rate": 9.698492462311559e-05, + "loss": 2.0778, + "step": 7 + }, + { + "epoch": 0.10289389067524116, + "grad_norm": 1.031778335571289, + "learning_rate": 9.64824120603015e-05, + "loss": 1.9751, + "step": 8 + }, + { + "epoch": 0.1157556270096463, + "grad_norm": 1.076965093612671, + "learning_rate": 9.597989949748745e-05, + "loss": 1.7265, + "step": 9 + }, + { + "epoch": 0.12861736334405144, + "grad_norm": 1.214233636856079, + "learning_rate": 9.547738693467337e-05, + "loss": 1.729, + "step": 10 + }, + { + "epoch": 0.1414790996784566, + "grad_norm": 0.7060278058052063, + "learning_rate": 9.49748743718593e-05, + "loss": 1.9542, + "step": 11 + }, + { + "epoch": 0.15434083601286175, + "grad_norm": 0.827420711517334, + "learning_rate": 9.447236180904523e-05, + "loss": 1.4983, + "step": 12 + }, + { + "epoch": 0.16720257234726688, + "grad_norm": 0.6821541786193848, + "learning_rate": 9.396984924623115e-05, + "loss": 1.4999, + "step": 13 + }, + { + "epoch": 0.18006430868167203, + "grad_norm": 0.7033599615097046, + "learning_rate": 9.34673366834171e-05, + "loss": 1.4026, + "step": 14 + }, + { + "epoch": 0.19292604501607716, + "grad_norm": 0.80780029296875, + "learning_rate": 9.296482412060302e-05, + "loss": 1.4809, + "step": 15 + }, + { + "epoch": 0.2057877813504823, + "grad_norm": 0.7556090950965881, + "learning_rate": 9.246231155778895e-05, + "loss": 1.4065, + "step": 16 + }, + { + "epoch": 0.21864951768488747, + "grad_norm": 0.6941238641738892, + "learning_rate": 9.195979899497488e-05, + "loss": 1.4728, + "step": 17 + }, + { + "epoch": 0.2315112540192926, + "grad_norm": 0.7150428295135498, + "learning_rate": 9.14572864321608e-05, + "loss": 1.3528, + "step": 18 + }, + { + "epoch": 0.24437299035369775, + "grad_norm": 0.5909086465835571, + "learning_rate": 9.095477386934675e-05, + "loss": 1.3859, + "step": 19 + }, + { + "epoch": 0.2572347266881029, + "grad_norm": 0.7633894681930542, + "learning_rate": 9.045226130653267e-05, + "loss": 1.3405, + "step": 20 + }, + { + "epoch": 0.27009646302250806, + "grad_norm": 0.6418241262435913, + "learning_rate": 8.99497487437186e-05, + "loss": 1.3162, + "step": 21 + }, + { + "epoch": 0.2829581993569132, + "grad_norm": 0.8278843760490417, + "learning_rate": 8.944723618090453e-05, + "loss": 1.3934, + "step": 22 + }, + { + "epoch": 0.2958199356913183, + "grad_norm": 0.7349154353141785, + "learning_rate": 8.894472361809045e-05, + "loss": 1.3461, + "step": 23 + }, + { + "epoch": 0.3086816720257235, + "grad_norm": 0.736482560634613, + "learning_rate": 8.84422110552764e-05, + "loss": 1.2912, + "step": 24 + }, + { + "epoch": 0.3215434083601286, + "grad_norm": 0.8143345713615417, + "learning_rate": 8.793969849246232e-05, + "loss": 1.3445, + "step": 25 + }, + { + "epoch": 0.33440514469453375, + "grad_norm": 0.6922888159751892, + "learning_rate": 8.743718592964825e-05, + "loss": 1.36, + "step": 26 + }, + { + "epoch": 0.34726688102893893, + "grad_norm": 0.7785637378692627, + "learning_rate": 8.693467336683418e-05, + "loss": 1.276, + "step": 27 + }, + { + "epoch": 0.36012861736334406, + "grad_norm": 0.7577537298202515, + "learning_rate": 8.64321608040201e-05, + "loss": 1.2323, + "step": 28 + }, + { + "epoch": 0.3729903536977492, + "grad_norm": 0.7919734716415405, + "learning_rate": 8.592964824120603e-05, + "loss": 1.4053, + "step": 29 + }, + { + "epoch": 0.3858520900321543, + "grad_norm": 0.821456789970398, + "learning_rate": 8.542713567839196e-05, + "loss": 1.251, + "step": 30 + }, + { + "epoch": 0.3987138263665595, + "grad_norm": 0.8000316023826599, + "learning_rate": 8.49246231155779e-05, + "loss": 1.3022, + "step": 31 + }, + { + "epoch": 0.4115755627009646, + "grad_norm": 0.7909243702888489, + "learning_rate": 8.442211055276383e-05, + "loss": 1.2509, + "step": 32 + }, + { + "epoch": 0.42443729903536975, + "grad_norm": 0.896696150302887, + "learning_rate": 8.391959798994975e-05, + "loss": 1.2068, + "step": 33 + }, + { + "epoch": 0.43729903536977494, + "grad_norm": 0.8683031797409058, + "learning_rate": 8.341708542713568e-05, + "loss": 1.3849, + "step": 34 + }, + { + "epoch": 0.45016077170418006, + "grad_norm": 0.8988001346588135, + "learning_rate": 8.291457286432161e-05, + "loss": 1.1935, + "step": 35 + }, + { + "epoch": 0.4630225080385852, + "grad_norm": 0.8507208824157715, + "learning_rate": 8.241206030150754e-05, + "loss": 1.2605, + "step": 36 + }, + { + "epoch": 0.4758842443729904, + "grad_norm": 0.9080713987350464, + "learning_rate": 8.190954773869348e-05, + "loss": 1.3304, + "step": 37 + }, + { + "epoch": 0.4887459807073955, + "grad_norm": 0.9472977519035339, + "learning_rate": 8.14070351758794e-05, + "loss": 1.3325, + "step": 38 + }, + { + "epoch": 0.5016077170418006, + "grad_norm": 0.9067745804786682, + "learning_rate": 8.090452261306533e-05, + "loss": 1.195, + "step": 39 + }, + { + "epoch": 0.5144694533762058, + "grad_norm": 0.7469335198402405, + "learning_rate": 8.040201005025126e-05, + "loss": 1.348, + "step": 40 + }, + { + "epoch": 0.5273311897106109, + "grad_norm": 0.915685772895813, + "learning_rate": 7.989949748743719e-05, + "loss": 1.2406, + "step": 41 + }, + { + "epoch": 0.5401929260450161, + "grad_norm": 0.6561890244483948, + "learning_rate": 7.939698492462313e-05, + "loss": 1.4992, + "step": 42 + }, + { + "epoch": 0.5530546623794212, + "grad_norm": 0.7920067310333252, + "learning_rate": 7.889447236180904e-05, + "loss": 1.1802, + "step": 43 + }, + { + "epoch": 0.5659163987138264, + "grad_norm": 0.7714186906814575, + "learning_rate": 7.839195979899498e-05, + "loss": 1.2086, + "step": 44 + }, + { + "epoch": 0.5787781350482315, + "grad_norm": 0.7048550844192505, + "learning_rate": 7.788944723618091e-05, + "loss": 1.166, + "step": 45 + }, + { + "epoch": 0.5916398713826366, + "grad_norm": 0.5653746128082275, + "learning_rate": 7.738693467336684e-05, + "loss": 1.3876, + "step": 46 + }, + { + "epoch": 0.6045016077170418, + "grad_norm": 0.6872064471244812, + "learning_rate": 7.688442211055277e-05, + "loss": 1.1946, + "step": 47 + }, + { + "epoch": 0.617363344051447, + "grad_norm": 0.6310857534408569, + "learning_rate": 7.638190954773869e-05, + "loss": 1.1509, + "step": 48 + }, + { + "epoch": 0.6302250803858521, + "grad_norm": 0.7433875203132629, + "learning_rate": 7.587939698492463e-05, + "loss": 1.1792, + "step": 49 + }, + { + "epoch": 0.6430868167202572, + "grad_norm": 0.7774860262870789, + "learning_rate": 7.537688442211056e-05, + "loss": 1.3531, + "step": 50 + }, + { + "epoch": 0.6559485530546624, + "grad_norm": 0.6565365195274353, + "learning_rate": 7.487437185929649e-05, + "loss": 1.2448, + "step": 51 + }, + { + "epoch": 0.6688102893890675, + "grad_norm": 0.6934945583343506, + "learning_rate": 7.437185929648241e-05, + "loss": 1.3109, + "step": 52 + }, + { + "epoch": 0.6816720257234726, + "grad_norm": 0.662295401096344, + "learning_rate": 7.386934673366834e-05, + "loss": 1.3463, + "step": 53 + }, + { + "epoch": 0.6945337620578779, + "grad_norm": 0.6406717300415039, + "learning_rate": 7.336683417085427e-05, + "loss": 1.0897, + "step": 54 + }, + { + "epoch": 0.707395498392283, + "grad_norm": 0.7470242977142334, + "learning_rate": 7.28643216080402e-05, + "loss": 1.1834, + "step": 55 + }, + { + "epoch": 0.7202572347266881, + "grad_norm": 0.6413742899894714, + "learning_rate": 7.236180904522614e-05, + "loss": 1.313, + "step": 56 + }, + { + "epoch": 0.7331189710610932, + "grad_norm": 0.64141845703125, + "learning_rate": 7.185929648241206e-05, + "loss": 1.322, + "step": 57 + }, + { + "epoch": 0.7459807073954984, + "grad_norm": 0.6849397420883179, + "learning_rate": 7.135678391959799e-05, + "loss": 1.1466, + "step": 58 + }, + { + "epoch": 0.7588424437299035, + "grad_norm": 0.7390387058258057, + "learning_rate": 7.085427135678392e-05, + "loss": 1.1692, + "step": 59 + }, + { + "epoch": 0.7717041800643086, + "grad_norm": 0.7512691617012024, + "learning_rate": 7.035175879396985e-05, + "loss": 1.2447, + "step": 60 + }, + { + "epoch": 0.7845659163987139, + "grad_norm": 0.7736090421676636, + "learning_rate": 6.984924623115579e-05, + "loss": 1.1723, + "step": 61 + }, + { + "epoch": 0.797427652733119, + "grad_norm": 0.7576032280921936, + "learning_rate": 6.93467336683417e-05, + "loss": 1.2074, + "step": 62 + }, + { + "epoch": 0.8102893890675241, + "grad_norm": 0.7597335577011108, + "learning_rate": 6.884422110552764e-05, + "loss": 1.1425, + "step": 63 + }, + { + "epoch": 0.8231511254019293, + "grad_norm": 0.774594247341156, + "learning_rate": 6.834170854271357e-05, + "loss": 1.3168, + "step": 64 + }, + { + "epoch": 0.8360128617363344, + "grad_norm": 0.7835249900817871, + "learning_rate": 6.78391959798995e-05, + "loss": 0.9976, + "step": 65 + }, + { + "epoch": 0.8488745980707395, + "grad_norm": 0.915006160736084, + "learning_rate": 6.733668341708544e-05, + "loss": 1.2015, + "step": 66 + }, + { + "epoch": 0.8617363344051447, + "grad_norm": 0.7381256818771362, + "learning_rate": 6.683417085427135e-05, + "loss": 1.1394, + "step": 67 + }, + { + "epoch": 0.8745980707395499, + "grad_norm": 0.6990655064582825, + "learning_rate": 6.633165829145729e-05, + "loss": 1.1879, + "step": 68 + }, + { + "epoch": 0.887459807073955, + "grad_norm": 0.7526246309280396, + "learning_rate": 6.582914572864322e-05, + "loss": 1.2836, + "step": 69 + }, + { + "epoch": 0.9003215434083601, + "grad_norm": 0.6938768029212952, + "learning_rate": 6.532663316582915e-05, + "loss": 1.1957, + "step": 70 + }, + { + "epoch": 0.9131832797427653, + "grad_norm": 0.7444002032279968, + "learning_rate": 6.482412060301508e-05, + "loss": 1.2171, + "step": 71 + }, + { + "epoch": 0.9260450160771704, + "grad_norm": 0.7161276340484619, + "learning_rate": 6.4321608040201e-05, + "loss": 1.1213, + "step": 72 + }, + { + "epoch": 0.9389067524115756, + "grad_norm": 0.7644099593162537, + "learning_rate": 6.381909547738694e-05, + "loss": 1.0821, + "step": 73 + }, + { + "epoch": 0.9517684887459807, + "grad_norm": 0.6624590754508972, + "learning_rate": 6.331658291457287e-05, + "loss": 1.233, + "step": 74 + }, + { + "epoch": 0.9646302250803859, + "grad_norm": 0.7556670308113098, + "learning_rate": 6.28140703517588e-05, + "loss": 1.2724, + "step": 75 + }, + { + "epoch": 0.977491961414791, + "grad_norm": 0.7611079812049866, + "learning_rate": 6.231155778894473e-05, + "loss": 1.1659, + "step": 76 + }, + { + "epoch": 0.9903536977491961, + "grad_norm": 0.7112507224082947, + "learning_rate": 6.180904522613065e-05, + "loss": 1.1363, + "step": 77 + }, + { + "epoch": 1.0064308681672025, + "grad_norm": 1.1092808246612549, + "learning_rate": 6.130653266331658e-05, + "loss": 1.6764, + "step": 78 + }, + { + "epoch": 1.0192926045016077, + "grad_norm": 0.5934129357337952, + "learning_rate": 6.080402010050251e-05, + "loss": 1.0156, + "step": 79 + }, + { + "epoch": 1.0321543408360128, + "grad_norm": 0.6461953520774841, + "learning_rate": 6.030150753768844e-05, + "loss": 1.116, + "step": 80 + }, + { + "epoch": 1.045016077170418, + "grad_norm": 0.6718643307685852, + "learning_rate": 5.979899497487438e-05, + "loss": 1.1288, + "step": 81 + }, + { + "epoch": 1.0578778135048232, + "grad_norm": 0.6698076128959656, + "learning_rate": 5.929648241206031e-05, + "loss": 1.1071, + "step": 82 + }, + { + "epoch": 1.0707395498392283, + "grad_norm": 0.630225419998169, + "learning_rate": 5.879396984924623e-05, + "loss": 1.0615, + "step": 83 + }, + { + "epoch": 1.0836012861736335, + "grad_norm": 0.7957659959793091, + "learning_rate": 5.829145728643216e-05, + "loss": 1.1099, + "step": 84 + }, + { + "epoch": 1.0964630225080385, + "grad_norm": 0.7359949946403503, + "learning_rate": 5.778894472361809e-05, + "loss": 1.1107, + "step": 85 + }, + { + "epoch": 1.1093247588424437, + "grad_norm": 0.7295353412628174, + "learning_rate": 5.728643216080403e-05, + "loss": 1.093, + "step": 86 + }, + { + "epoch": 1.122186495176849, + "grad_norm": 0.6929362416267395, + "learning_rate": 5.6783919597989955e-05, + "loss": 1.0855, + "step": 87 + }, + { + "epoch": 1.135048231511254, + "grad_norm": 0.6977247595787048, + "learning_rate": 5.628140703517588e-05, + "loss": 1.1056, + "step": 88 + }, + { + "epoch": 1.1479099678456592, + "grad_norm": 0.7273219227790833, + "learning_rate": 5.577889447236181e-05, + "loss": 1.1319, + "step": 89 + }, + { + "epoch": 1.1607717041800643, + "grad_norm": 0.7561770081520081, + "learning_rate": 5.527638190954774e-05, + "loss": 1.1406, + "step": 90 + }, + { + "epoch": 1.1736334405144695, + "grad_norm": 0.7354375720024109, + "learning_rate": 5.477386934673368e-05, + "loss": 1.8666, + "step": 91 + }, + { + "epoch": 1.1864951768488745, + "grad_norm": 0.6309502124786377, + "learning_rate": 5.4271356783919604e-05, + "loss": 0.9311, + "step": 92 + }, + { + "epoch": 1.1993569131832797, + "grad_norm": 0.6576774716377258, + "learning_rate": 5.376884422110553e-05, + "loss": 0.9488, + "step": 93 + }, + { + "epoch": 1.212218649517685, + "grad_norm": 0.8312212228775024, + "learning_rate": 5.3266331658291455e-05, + "loss": 1.2609, + "step": 94 + }, + { + "epoch": 1.22508038585209, + "grad_norm": 0.6623194813728333, + "learning_rate": 5.276381909547739e-05, + "loss": 0.9066, + "step": 95 + }, + { + "epoch": 1.2379421221864952, + "grad_norm": 0.8220139741897583, + "learning_rate": 5.226130653266332e-05, + "loss": 1.2762, + "step": 96 + }, + { + "epoch": 1.2508038585209003, + "grad_norm": 0.8111358880996704, + "learning_rate": 5.175879396984925e-05, + "loss": 1.1121, + "step": 97 + }, + { + "epoch": 1.2636655948553055, + "grad_norm": 0.8110450506210327, + "learning_rate": 5.125628140703518e-05, + "loss": 1.1078, + "step": 98 + }, + { + "epoch": 1.2765273311897105, + "grad_norm": 0.7321817874908447, + "learning_rate": 5.0753768844221104e-05, + "loss": 1.1252, + "step": 99 + }, + { + "epoch": 1.2893890675241158, + "grad_norm": 0.7316034436225891, + "learning_rate": 5.0251256281407036e-05, + "loss": 1.0643, + "step": 100 + }, + { + "epoch": 1.302250803858521, + "grad_norm": 0.7192660570144653, + "learning_rate": 4.974874371859297e-05, + "loss": 1.1715, + "step": 101 + }, + { + "epoch": 1.315112540192926, + "grad_norm": 0.7240356206893921, + "learning_rate": 4.92462311557789e-05, + "loss": 1.0688, + "step": 102 + }, + { + "epoch": 1.3279742765273312, + "grad_norm": 0.6924866437911987, + "learning_rate": 4.874371859296483e-05, + "loss": 0.9077, + "step": 103 + }, + { + "epoch": 1.3408360128617363, + "grad_norm": 0.7501733303070068, + "learning_rate": 4.824120603015075e-05, + "loss": 1.5946, + "step": 104 + }, + { + "epoch": 1.3536977491961415, + "grad_norm": 0.807327926158905, + "learning_rate": 4.7738693467336685e-05, + "loss": 1.1421, + "step": 105 + }, + { + "epoch": 1.3665594855305465, + "grad_norm": 0.7556862235069275, + "learning_rate": 4.723618090452262e-05, + "loss": 0.9923, + "step": 106 + }, + { + "epoch": 1.3794212218649518, + "grad_norm": 0.7407500743865967, + "learning_rate": 4.673366834170855e-05, + "loss": 0.9658, + "step": 107 + }, + { + "epoch": 1.392282958199357, + "grad_norm": 0.7646963596343994, + "learning_rate": 4.6231155778894475e-05, + "loss": 1.0564, + "step": 108 + }, + { + "epoch": 1.405144694533762, + "grad_norm": 0.782166063785553, + "learning_rate": 4.57286432160804e-05, + "loss": 0.9601, + "step": 109 + }, + { + "epoch": 1.4180064308681672, + "grad_norm": 0.7942412495613098, + "learning_rate": 4.522613065326633e-05, + "loss": 1.0054, + "step": 110 + }, + { + "epoch": 1.4308681672025725, + "grad_norm": 0.7960532307624817, + "learning_rate": 4.4723618090452266e-05, + "loss": 1.0791, + "step": 111 + }, + { + "epoch": 1.4437299035369775, + "grad_norm": 0.7725895643234253, + "learning_rate": 4.42211055276382e-05, + "loss": 1.0099, + "step": 112 + }, + { + "epoch": 1.4565916398713825, + "grad_norm": 0.8653061985969543, + "learning_rate": 4.3718592964824124e-05, + "loss": 1.168, + "step": 113 + }, + { + "epoch": 1.4694533762057878, + "grad_norm": 0.8689178824424744, + "learning_rate": 4.321608040201005e-05, + "loss": 1.1026, + "step": 114 + }, + { + "epoch": 1.482315112540193, + "grad_norm": 0.823765754699707, + "learning_rate": 4.271356783919598e-05, + "loss": 1.3272, + "step": 115 + }, + { + "epoch": 1.495176848874598, + "grad_norm": 0.715006947517395, + "learning_rate": 4.2211055276381914e-05, + "loss": 0.9572, + "step": 116 + }, + { + "epoch": 1.5080385852090032, + "grad_norm": 0.9381377696990967, + "learning_rate": 4.170854271356784e-05, + "loss": 1.0988, + "step": 117 + }, + { + "epoch": 1.5209003215434085, + "grad_norm": 0.7912129759788513, + "learning_rate": 4.120603015075377e-05, + "loss": 1.0784, + "step": 118 + }, + { + "epoch": 1.5337620578778135, + "grad_norm": 0.8803995251655579, + "learning_rate": 4.07035175879397e-05, + "loss": 1.1845, + "step": 119 + }, + { + "epoch": 1.5466237942122185, + "grad_norm": 0.7034085988998413, + "learning_rate": 4.020100502512563e-05, + "loss": 1.016, + "step": 120 + }, + { + "epoch": 1.5594855305466238, + "grad_norm": 0.8203994035720825, + "learning_rate": 3.969849246231156e-05, + "loss": 1.063, + "step": 121 + }, + { + "epoch": 1.572347266881029, + "grad_norm": 0.712227463722229, + "learning_rate": 3.919597989949749e-05, + "loss": 1.033, + "step": 122 + }, + { + "epoch": 1.585209003215434, + "grad_norm": 0.7563191056251526, + "learning_rate": 3.869346733668342e-05, + "loss": 1.0089, + "step": 123 + }, + { + "epoch": 1.5980707395498392, + "grad_norm": 0.8773857355117798, + "learning_rate": 3.8190954773869346e-05, + "loss": 1.4975, + "step": 124 + }, + { + "epoch": 1.6109324758842445, + "grad_norm": 0.6820680499076843, + "learning_rate": 3.768844221105528e-05, + "loss": 0.8996, + "step": 125 + }, + { + "epoch": 1.6237942122186495, + "grad_norm": 0.7820584774017334, + "learning_rate": 3.7185929648241204e-05, + "loss": 0.9505, + "step": 126 + }, + { + "epoch": 1.6366559485530545, + "grad_norm": 0.7456291913986206, + "learning_rate": 3.668341708542714e-05, + "loss": 0.9537, + "step": 127 + }, + { + "epoch": 1.6495176848874598, + "grad_norm": 0.9636368155479431, + "learning_rate": 3.618090452261307e-05, + "loss": 1.1609, + "step": 128 + }, + { + "epoch": 1.662379421221865, + "grad_norm": 0.83909672498703, + "learning_rate": 3.5678391959798995e-05, + "loss": 1.0519, + "step": 129 + }, + { + "epoch": 1.67524115755627, + "grad_norm": 0.9138084053993225, + "learning_rate": 3.517587939698493e-05, + "loss": 0.977, + "step": 130 + }, + { + "epoch": 1.6881028938906752, + "grad_norm": 0.8310115337371826, + "learning_rate": 3.467336683417085e-05, + "loss": 1.0701, + "step": 131 + }, + { + "epoch": 1.7009646302250805, + "grad_norm": 0.8289808630943298, + "learning_rate": 3.4170854271356785e-05, + "loss": 1.0607, + "step": 132 + }, + { + "epoch": 1.7138263665594855, + "grad_norm": 0.9004020094871521, + "learning_rate": 3.366834170854272e-05, + "loss": 1.3209, + "step": 133 + }, + { + "epoch": 1.7266881028938905, + "grad_norm": 0.7358340620994568, + "learning_rate": 3.3165829145728643e-05, + "loss": 0.9646, + "step": 134 + }, + { + "epoch": 1.739549839228296, + "grad_norm": 0.8724610805511475, + "learning_rate": 3.2663316582914576e-05, + "loss": 0.9834, + "step": 135 + }, + { + "epoch": 1.752411575562701, + "grad_norm": 0.9433433413505554, + "learning_rate": 3.21608040201005e-05, + "loss": 0.9332, + "step": 136 + }, + { + "epoch": 1.765273311897106, + "grad_norm": 0.9258175492286682, + "learning_rate": 3.1658291457286434e-05, + "loss": 1.0992, + "step": 137 + }, + { + "epoch": 1.7781350482315113, + "grad_norm": 0.8666226267814636, + "learning_rate": 3.1155778894472366e-05, + "loss": 1.0032, + "step": 138 + }, + { + "epoch": 1.7909967845659165, + "grad_norm": 0.9296693205833435, + "learning_rate": 3.065326633165829e-05, + "loss": 1.082, + "step": 139 + }, + { + "epoch": 1.8038585209003215, + "grad_norm": 0.8250362873077393, + "learning_rate": 3.015075376884422e-05, + "loss": 0.8665, + "step": 140 + }, + { + "epoch": 1.8167202572347267, + "grad_norm": 0.9356509447097778, + "learning_rate": 2.9648241206030153e-05, + "loss": 1.1933, + "step": 141 + }, + { + "epoch": 1.829581993569132, + "grad_norm": 0.935892641544342, + "learning_rate": 2.914572864321608e-05, + "loss": 1.0981, + "step": 142 + }, + { + "epoch": 1.842443729903537, + "grad_norm": 0.8205680251121521, + "learning_rate": 2.8643216080402015e-05, + "loss": 1.0501, + "step": 143 + }, + { + "epoch": 1.855305466237942, + "grad_norm": 0.9360036253929138, + "learning_rate": 2.814070351758794e-05, + "loss": 1.1749, + "step": 144 + }, + { + "epoch": 1.8681672025723473, + "grad_norm": 0.9020069241523743, + "learning_rate": 2.763819095477387e-05, + "loss": 1.0402, + "step": 145 + }, + { + "epoch": 1.8810289389067525, + "grad_norm": 0.7428980469703674, + "learning_rate": 2.7135678391959802e-05, + "loss": 0.9236, + "step": 146 + }, + { + "epoch": 1.8938906752411575, + "grad_norm": 0.8405928611755371, + "learning_rate": 2.6633165829145728e-05, + "loss": 1.2336, + "step": 147 + }, + { + "epoch": 1.9067524115755627, + "grad_norm": 0.954319417476654, + "learning_rate": 2.613065326633166e-05, + "loss": 1.0338, + "step": 148 + }, + { + "epoch": 1.919614147909968, + "grad_norm": 0.836933970451355, + "learning_rate": 2.562814070351759e-05, + "loss": 0.9513, + "step": 149 + }, + { + "epoch": 1.932475884244373, + "grad_norm": 0.8751674294471741, + "learning_rate": 2.5125628140703518e-05, + "loss": 1.1418, + "step": 150 + }, + { + "epoch": 1.945337620578778, + "grad_norm": 0.7700965404510498, + "learning_rate": 2.462311557788945e-05, + "loss": 1.0421, + "step": 151 + }, + { + "epoch": 1.9581993569131833, + "grad_norm": 0.8978580832481384, + "learning_rate": 2.4120603015075376e-05, + "loss": 1.124, + "step": 152 + }, + { + "epoch": 1.9710610932475885, + "grad_norm": 0.8578283786773682, + "learning_rate": 2.361809045226131e-05, + "loss": 1.1622, + "step": 153 + }, + { + "epoch": 1.9839228295819935, + "grad_norm": 0.8631901741027832, + "learning_rate": 2.3115577889447238e-05, + "loss": 1.0854, + "step": 154 + }, + { + "epoch": 1.9967845659163987, + "grad_norm": 1.506197452545166, + "learning_rate": 2.2613065326633167e-05, + "loss": 1.5602, + "step": 155 + }, + { + "epoch": 2.012861736334405, + "grad_norm": 0.8579190969467163, + "learning_rate": 2.21105527638191e-05, + "loss": 1.0135, + "step": 156 + }, + { + "epoch": 2.0257234726688105, + "grad_norm": 0.791259229183197, + "learning_rate": 2.1608040201005025e-05, + "loss": 1.063, + "step": 157 + }, + { + "epoch": 2.0385852090032155, + "grad_norm": 0.7824810743331909, + "learning_rate": 2.1105527638190957e-05, + "loss": 0.9797, + "step": 158 + }, + { + "epoch": 2.0514469453376205, + "grad_norm": 0.7896823287010193, + "learning_rate": 2.0603015075376886e-05, + "loss": 0.9882, + "step": 159 + }, + { + "epoch": 2.0643086816720255, + "grad_norm": 0.8175792098045349, + "learning_rate": 2.0100502512562815e-05, + "loss": 0.9608, + "step": 160 + }, + { + "epoch": 2.077170418006431, + "grad_norm": 0.819749116897583, + "learning_rate": 1.9597989949748744e-05, + "loss": 1.0491, + "step": 161 + }, + { + "epoch": 2.090032154340836, + "grad_norm": 0.8855689764022827, + "learning_rate": 1.9095477386934673e-05, + "loss": 1.0228, + "step": 162 + }, + { + "epoch": 2.102893890675241, + "grad_norm": 0.8208020925521851, + "learning_rate": 1.8592964824120602e-05, + "loss": 0.9609, + "step": 163 + }, + { + "epoch": 2.1157556270096465, + "grad_norm": 0.7887352108955383, + "learning_rate": 1.8090452261306535e-05, + "loss": 0.9935, + "step": 164 + }, + { + "epoch": 2.1286173633440515, + "grad_norm": 0.7535598874092102, + "learning_rate": 1.7587939698492464e-05, + "loss": 1.0082, + "step": 165 + }, + { + "epoch": 2.1414790996784565, + "grad_norm": 0.8124526739120483, + "learning_rate": 1.7085427135678393e-05, + "loss": 1.0349, + "step": 166 + }, + { + "epoch": 2.154340836012862, + "grad_norm": 0.8419150114059448, + "learning_rate": 1.6582914572864322e-05, + "loss": 0.9734, + "step": 167 + }, + { + "epoch": 2.167202572347267, + "grad_norm": 0.7929818034172058, + "learning_rate": 1.608040201005025e-05, + "loss": 0.9844, + "step": 168 + }, + { + "epoch": 2.180064308681672, + "grad_norm": 0.8568313121795654, + "learning_rate": 1.5577889447236183e-05, + "loss": 0.9512, + "step": 169 + }, + { + "epoch": 2.192926045016077, + "grad_norm": 0.7779914736747742, + "learning_rate": 1.507537688442211e-05, + "loss": 1.4218, + "step": 170 + }, + { + "epoch": 2.2057877813504825, + "grad_norm": 0.8760497570037842, + "learning_rate": 1.457286432160804e-05, + "loss": 0.9962, + "step": 171 + }, + { + "epoch": 2.2186495176848875, + "grad_norm": 0.9114797711372375, + "learning_rate": 1.407035175879397e-05, + "loss": 0.9851, + "step": 172 + }, + { + "epoch": 2.2315112540192925, + "grad_norm": 0.9769560694694519, + "learning_rate": 1.3567839195979901e-05, + "loss": 0.9286, + "step": 173 + }, + { + "epoch": 2.244372990353698, + "grad_norm": 0.8264016509056091, + "learning_rate": 1.306532663316583e-05, + "loss": 1.105, + "step": 174 + }, + { + "epoch": 2.257234726688103, + "grad_norm": 0.845151960849762, + "learning_rate": 1.2562814070351759e-05, + "loss": 1.0246, + "step": 175 + }, + { + "epoch": 2.270096463022508, + "grad_norm": 0.8815337419509888, + "learning_rate": 1.2060301507537688e-05, + "loss": 1.123, + "step": 176 + }, + { + "epoch": 2.282958199356913, + "grad_norm": 0.8236774802207947, + "learning_rate": 1.1557788944723619e-05, + "loss": 1.0557, + "step": 177 + }, + { + "epoch": 2.2958199356913185, + "grad_norm": 0.9845472574234009, + "learning_rate": 1.105527638190955e-05, + "loss": 1.0297, + "step": 178 + }, + { + "epoch": 2.3086816720257235, + "grad_norm": 0.9996894001960754, + "learning_rate": 1.0552763819095479e-05, + "loss": 0.9584, + "step": 179 + }, + { + "epoch": 2.3215434083601285, + "grad_norm": 0.873965859413147, + "learning_rate": 1.0050251256281408e-05, + "loss": 0.9794, + "step": 180 + }, + { + "epoch": 2.334405144694534, + "grad_norm": 0.8431399464607239, + "learning_rate": 9.547738693467337e-06, + "loss": 0.982, + "step": 181 + }, + { + "epoch": 2.347266881028939, + "grad_norm": 0.8434001207351685, + "learning_rate": 9.045226130653267e-06, + "loss": 0.9264, + "step": 182 + }, + { + "epoch": 2.360128617363344, + "grad_norm": 0.9110084176063538, + "learning_rate": 8.542713567839196e-06, + "loss": 0.9953, + "step": 183 + }, + { + "epoch": 2.372990353697749, + "grad_norm": 0.7942981719970703, + "learning_rate": 8.040201005025125e-06, + "loss": 1.0177, + "step": 184 + }, + { + "epoch": 2.3858520900321545, + "grad_norm": 0.8465241193771362, + "learning_rate": 7.537688442211055e-06, + "loss": 0.9383, + "step": 185 + }, + { + "epoch": 2.3987138263665595, + "grad_norm": 0.9114559888839722, + "learning_rate": 7.035175879396985e-06, + "loss": 1.0373, + "step": 186 + }, + { + "epoch": 2.4115755627009645, + "grad_norm": 0.9119040369987488, + "learning_rate": 6.532663316582915e-06, + "loss": 0.9853, + "step": 187 + }, + { + "epoch": 2.42443729903537, + "grad_norm": 0.8535617589950562, + "learning_rate": 6.030150753768844e-06, + "loss": 1.0043, + "step": 188 + }, + { + "epoch": 2.437299035369775, + "grad_norm": 0.8427352905273438, + "learning_rate": 5.527638190954775e-06, + "loss": 0.9835, + "step": 189 + }, + { + "epoch": 2.45016077170418, + "grad_norm": 0.8029307723045349, + "learning_rate": 5.025125628140704e-06, + "loss": 1.0006, + "step": 190 + }, + { + "epoch": 2.463022508038585, + "grad_norm": 0.7396455407142639, + "learning_rate": 4.522613065326634e-06, + "loss": 1.2503, + "step": 191 + }, + { + "epoch": 2.4758842443729905, + "grad_norm": 0.9325175881385803, + "learning_rate": 4.020100502512563e-06, + "loss": 1.014, + "step": 192 + }, + { + "epoch": 2.4887459807073955, + "grad_norm": 0.8711210489273071, + "learning_rate": 3.5175879396984926e-06, + "loss": 0.8779, + "step": 193 + }, + { + "epoch": 2.5016077170418005, + "grad_norm": 0.9022113680839539, + "learning_rate": 3.015075376884422e-06, + "loss": 1.0297, + "step": 194 + }, + { + "epoch": 2.514469453376206, + "grad_norm": 0.8506854176521301, + "learning_rate": 2.512562814070352e-06, + "loss": 0.8196, + "step": 195 + }, + { + "epoch": 2.527331189710611, + "grad_norm": 0.9505279064178467, + "learning_rate": 2.0100502512562813e-06, + "loss": 0.9124, + "step": 196 + }, + { + "epoch": 2.540192926045016, + "grad_norm": 0.8015897870063782, + "learning_rate": 1.507537688442211e-06, + "loss": 0.9492, + "step": 197 + }, + { + "epoch": 2.553054662379421, + "grad_norm": 0.8406469821929932, + "learning_rate": 1.0050251256281407e-06, + "loss": 0.9474, + "step": 198 + }, + { + "epoch": 2.5659163987138265, + "grad_norm": 0.8296630382537842, + "learning_rate": 5.025125628140703e-07, + "loss": 0.9759, + "step": 199 + }, + { + "epoch": 2.5787781350482315, + "grad_norm": 0.8576996922492981, + "learning_rate": 0.0, + "loss": 0.849, + "step": 200 + } + ], + "logging_steps": 1, + "max_steps": 200, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 2.879413794353971e+16, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/llama_Chinese_English/checkpoint-200/training_args.bin b/llama_Chinese_English/checkpoint-200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..58fdae2d7dd3ece27f1be381e144d8c869fa79f0 --- /dev/null +++ b/llama_Chinese_English/checkpoint-200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c610e47fa336614ddba2c1c07877834b0eb0ae724aee37fded72708f837dbe5c +size 5624 diff --git a/llama_Chinese_French/checkpoint-200/README.md b/llama_Chinese_French/checkpoint-200/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ec91fdd050e2a5613ef04bb6c8fc77a63b8b10a1 --- /dev/null +++ b/llama_Chinese_French/checkpoint-200/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.14.0 \ No newline at end of file diff --git a/llama_Chinese_French/checkpoint-200/adapter_config.json b/llama_Chinese_French/checkpoint-200/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a0d3c22806259f918d05b412a389889183038bd --- /dev/null +++ b/llama_Chinese_French/checkpoint-200/adapter_config.json @@ -0,0 +1,37 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit", + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "k_proj", + "o_proj", + "up_proj", + "down_proj", + "gate_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/llama_Chinese_French/checkpoint-200/adapter_model.safetensors b/llama_Chinese_French/checkpoint-200/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5f3c1f390fdaf244282303d60e2b626cef1636e2 --- /dev/null +++ b/llama_Chinese_French/checkpoint-200/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0470e28ac90ca18f9ba953cb2e4efed8f89a146bdd92c694ed638b7e2b1286fb +size 167832240 diff --git a/llama_Chinese_French/checkpoint-200/optimizer.pt b/llama_Chinese_French/checkpoint-200/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..cabb3137ae49891f4330282be33c195a27715907 --- /dev/null +++ b/llama_Chinese_French/checkpoint-200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5c2e9778a0e3b2b1be33729c2005ec77831b1c7de587c3c1cbf8c593f1d57db +size 85723284 diff --git a/llama_Chinese_French/checkpoint-200/rng_state.pth b/llama_Chinese_French/checkpoint-200/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..f21f2ff1c1a816463781d51760f8156e041f5979 --- /dev/null +++ b/llama_Chinese_French/checkpoint-200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4981123ff3cf7bd5b7f76839e90e4776f747ca4c38dcb41876fa010c0dea8b23 +size 14244 diff --git a/llama_Chinese_French/checkpoint-200/scheduler.pt b/llama_Chinese_French/checkpoint-200/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6ad11d6d5288d2841ba96a6d7d3838c80c80097a --- /dev/null +++ b/llama_Chinese_French/checkpoint-200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96 +size 1064 diff --git a/llama_Chinese_French/checkpoint-200/special_tokens_map.json b/llama_Chinese_French/checkpoint-200/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/llama_Chinese_French/checkpoint-200/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/llama_Chinese_French/checkpoint-200/tokenizer.json b/llama_Chinese_French/checkpoint-200/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/llama_Chinese_French/checkpoint-200/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/llama_Chinese_French/checkpoint-200/tokenizer_config.json b/llama_Chinese_French/checkpoint-200/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..447bd30f21e8c20e9e6497d6cfc650a3881e26a6 --- /dev/null +++ b/llama_Chinese_French/checkpoint-200/tokenizer_config.json @@ -0,0 +1,2064 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/llama_Chinese_French/checkpoint-200/trainer_state.json b/llama_Chinese_French/checkpoint-200/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3d06a2175cf2b01edfcdf1b04ce97d7c13df58be --- /dev/null +++ b/llama_Chinese_French/checkpoint-200/trainer_state.json @@ -0,0 +1,1433 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.5787781350482315, + "eval_steps": 500, + "global_step": 200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.012861736334405145, + "grad_norm": 0.9951606392860413, + "learning_rate": 0.0001, + "loss": 2.4564, + "step": 1 + }, + { + "epoch": 0.02572347266881029, + "grad_norm": 0.9671347141265869, + "learning_rate": 9.949748743718594e-05, + "loss": 2.5442, + "step": 2 + }, + { + "epoch": 0.03858520900321544, + "grad_norm": 0.8948544263839722, + "learning_rate": 9.899497487437186e-05, + "loss": 2.5636, + "step": 3 + }, + { + "epoch": 0.05144694533762058, + "grad_norm": 0.9088455438613892, + "learning_rate": 9.84924623115578e-05, + "loss": 2.3504, + "step": 4 + }, + { + "epoch": 0.06430868167202572, + "grad_norm": 0.975287139415741, + "learning_rate": 9.798994974874372e-05, + "loss": 2.2434, + "step": 5 + }, + { + "epoch": 0.07717041800643087, + "grad_norm": 0.9808040261268616, + "learning_rate": 9.748743718592965e-05, + "loss": 2.0649, + "step": 6 + }, + { + "epoch": 0.09003215434083602, + "grad_norm": 0.9832410216331482, + "learning_rate": 9.698492462311559e-05, + "loss": 1.965, + "step": 7 + }, + { + "epoch": 0.10289389067524116, + "grad_norm": 1.0465540885925293, + "learning_rate": 9.64824120603015e-05, + "loss": 1.8993, + "step": 8 + }, + { + "epoch": 0.1157556270096463, + "grad_norm": 2.0614616870880127, + "learning_rate": 9.597989949748745e-05, + "loss": 1.6755, + "step": 9 + }, + { + "epoch": 0.12861736334405144, + "grad_norm": 1.052133560180664, + "learning_rate": 9.547738693467337e-05, + "loss": 1.6498, + "step": 10 + }, + { + "epoch": 0.1414790996784566, + "grad_norm": 0.650432825088501, + "learning_rate": 9.49748743718593e-05, + "loss": 1.9211, + "step": 11 + }, + { + "epoch": 0.15434083601286175, + "grad_norm": 0.8694685697555542, + "learning_rate": 9.447236180904523e-05, + "loss": 1.4525, + "step": 12 + }, + { + "epoch": 0.16720257234726688, + "grad_norm": 0.6451851725578308, + "learning_rate": 9.396984924623115e-05, + "loss": 1.414, + "step": 13 + }, + { + "epoch": 0.18006430868167203, + "grad_norm": 0.6098871827125549, + "learning_rate": 9.34673366834171e-05, + "loss": 1.3398, + "step": 14 + }, + { + "epoch": 0.19292604501607716, + "grad_norm": 0.7779257893562317, + "learning_rate": 9.296482412060302e-05, + "loss": 1.4537, + "step": 15 + }, + { + "epoch": 0.2057877813504823, + "grad_norm": 0.6430632472038269, + "learning_rate": 9.246231155778895e-05, + "loss": 1.3543, + "step": 16 + }, + { + "epoch": 0.21864951768488747, + "grad_norm": 0.6381723880767822, + "learning_rate": 9.195979899497488e-05, + "loss": 1.4254, + "step": 17 + }, + { + "epoch": 0.2315112540192926, + "grad_norm": 0.7117458581924438, + "learning_rate": 9.14572864321608e-05, + "loss": 1.3229, + "step": 18 + }, + { + "epoch": 0.24437299035369775, + "grad_norm": 0.5543938875198364, + "learning_rate": 9.095477386934675e-05, + "loss": 1.3437, + "step": 19 + }, + { + "epoch": 0.2572347266881029, + "grad_norm": 0.7158637642860413, + "learning_rate": 9.045226130653267e-05, + "loss": 1.2772, + "step": 20 + }, + { + "epoch": 0.27009646302250806, + "grad_norm": 0.6783093810081482, + "learning_rate": 8.99497487437186e-05, + "loss": 1.2783, + "step": 21 + }, + { + "epoch": 0.2829581993569132, + "grad_norm": 0.6671101450920105, + "learning_rate": 8.944723618090453e-05, + "loss": 1.3316, + "step": 22 + }, + { + "epoch": 0.2958199356913183, + "grad_norm": 0.71722012758255, + "learning_rate": 8.894472361809045e-05, + "loss": 1.3419, + "step": 23 + }, + { + "epoch": 0.3086816720257235, + "grad_norm": 0.6375122666358948, + "learning_rate": 8.84422110552764e-05, + "loss": 1.2423, + "step": 24 + }, + { + "epoch": 0.3215434083601286, + "grad_norm": 0.7293832898139954, + "learning_rate": 8.793969849246232e-05, + "loss": 1.3095, + "step": 25 + }, + { + "epoch": 0.33440514469453375, + "grad_norm": 0.6099777817726135, + "learning_rate": 8.743718592964825e-05, + "loss": 1.287, + "step": 26 + }, + { + "epoch": 0.34726688102893893, + "grad_norm": 0.7195896506309509, + "learning_rate": 8.693467336683418e-05, + "loss": 1.2524, + "step": 27 + }, + { + "epoch": 0.36012861736334406, + "grad_norm": 0.6922698020935059, + "learning_rate": 8.64321608040201e-05, + "loss": 1.2027, + "step": 28 + }, + { + "epoch": 0.3729903536977492, + "grad_norm": 0.7022033929824829, + "learning_rate": 8.592964824120603e-05, + "loss": 1.3402, + "step": 29 + }, + { + "epoch": 0.3858520900321543, + "grad_norm": 0.7927314639091492, + "learning_rate": 8.542713567839196e-05, + "loss": 1.2233, + "step": 30 + }, + { + "epoch": 0.3987138263665595, + "grad_norm": 0.7387583255767822, + "learning_rate": 8.49246231155779e-05, + "loss": 1.2487, + "step": 31 + }, + { + "epoch": 0.4115755627009646, + "grad_norm": 0.7108672261238098, + "learning_rate": 8.442211055276383e-05, + "loss": 1.2038, + "step": 32 + }, + { + "epoch": 0.42443729903536975, + "grad_norm": 0.8682803511619568, + "learning_rate": 8.391959798994975e-05, + "loss": 1.1447, + "step": 33 + }, + { + "epoch": 0.43729903536977494, + "grad_norm": 0.7687588930130005, + "learning_rate": 8.341708542713568e-05, + "loss": 1.3467, + "step": 34 + }, + { + "epoch": 0.45016077170418006, + "grad_norm": 0.7896663546562195, + "learning_rate": 8.291457286432161e-05, + "loss": 1.1263, + "step": 35 + }, + { + "epoch": 0.4630225080385852, + "grad_norm": 0.8078703880310059, + "learning_rate": 8.241206030150754e-05, + "loss": 1.2355, + "step": 36 + }, + { + "epoch": 0.4758842443729904, + "grad_norm": 0.7795388102531433, + "learning_rate": 8.190954773869348e-05, + "loss": 1.2717, + "step": 37 + }, + { + "epoch": 0.4887459807073955, + "grad_norm": 0.8915436267852783, + "learning_rate": 8.14070351758794e-05, + "loss": 1.2856, + "step": 38 + }, + { + "epoch": 0.5016077170418006, + "grad_norm": 0.8009978532791138, + "learning_rate": 8.090452261306533e-05, + "loss": 1.1603, + "step": 39 + }, + { + "epoch": 0.5144694533762058, + "grad_norm": 0.7390609383583069, + "learning_rate": 8.040201005025126e-05, + "loss": 1.2806, + "step": 40 + }, + { + "epoch": 0.5273311897106109, + "grad_norm": 0.9151616096496582, + "learning_rate": 7.989949748743719e-05, + "loss": 1.1964, + "step": 41 + }, + { + "epoch": 0.5401929260450161, + "grad_norm": 0.635924220085144, + "learning_rate": 7.939698492462313e-05, + "loss": 1.4545, + "step": 42 + }, + { + "epoch": 0.5530546623794212, + "grad_norm": 0.829750657081604, + "learning_rate": 7.889447236180904e-05, + "loss": 1.1377, + "step": 43 + }, + { + "epoch": 0.5659163987138264, + "grad_norm": 0.7639206051826477, + "learning_rate": 7.839195979899498e-05, + "loss": 1.1665, + "step": 44 + }, + { + "epoch": 0.5787781350482315, + "grad_norm": 0.6754475831985474, + "learning_rate": 7.788944723618091e-05, + "loss": 1.1466, + "step": 45 + }, + { + "epoch": 0.5916398713826366, + "grad_norm": 0.5332170128822327, + "learning_rate": 7.738693467336684e-05, + "loss": 1.32, + "step": 46 + }, + { + "epoch": 0.6045016077170418, + "grad_norm": 0.6341368556022644, + "learning_rate": 7.688442211055277e-05, + "loss": 1.1433, + "step": 47 + }, + { + "epoch": 0.617363344051447, + "grad_norm": 0.6452201008796692, + "learning_rate": 7.638190954773869e-05, + "loss": 1.1301, + "step": 48 + }, + { + "epoch": 0.6302250803858521, + "grad_norm": 0.7142246961593628, + "learning_rate": 7.587939698492463e-05, + "loss": 1.1357, + "step": 49 + }, + { + "epoch": 0.6430868167202572, + "grad_norm": 0.6682009696960449, + "learning_rate": 7.537688442211056e-05, + "loss": 1.2959, + "step": 50 + }, + { + "epoch": 0.6559485530546624, + "grad_norm": 0.5942954421043396, + "learning_rate": 7.487437185929649e-05, + "loss": 1.2229, + "step": 51 + }, + { + "epoch": 0.6688102893890675, + "grad_norm": 0.5940057635307312, + "learning_rate": 7.437185929648241e-05, + "loss": 1.2565, + "step": 52 + }, + { + "epoch": 0.6816720257234726, + "grad_norm": 0.6409434676170349, + "learning_rate": 7.386934673366834e-05, + "loss": 1.273, + "step": 53 + }, + { + "epoch": 0.6945337620578779, + "grad_norm": 0.5849536061286926, + "learning_rate": 7.336683417085427e-05, + "loss": 1.0188, + "step": 54 + }, + { + "epoch": 0.707395498392283, + "grad_norm": 0.677545964717865, + "learning_rate": 7.28643216080402e-05, + "loss": 1.1269, + "step": 55 + }, + { + "epoch": 0.7202572347266881, + "grad_norm": 0.6222040057182312, + "learning_rate": 7.236180904522614e-05, + "loss": 1.2673, + "step": 56 + }, + { + "epoch": 0.7331189710610932, + "grad_norm": 0.5852852463722229, + "learning_rate": 7.185929648241206e-05, + "loss": 1.2633, + "step": 57 + }, + { + "epoch": 0.7459807073954984, + "grad_norm": 0.6711986064910889, + "learning_rate": 7.135678391959799e-05, + "loss": 1.1057, + "step": 58 + }, + { + "epoch": 0.7588424437299035, + "grad_norm": 0.6572586894035339, + "learning_rate": 7.085427135678392e-05, + "loss": 1.1354, + "step": 59 + }, + { + "epoch": 0.7717041800643086, + "grad_norm": 0.6903595924377441, + "learning_rate": 7.035175879396985e-05, + "loss": 1.1718, + "step": 60 + }, + { + "epoch": 0.7845659163987139, + "grad_norm": 0.65706467628479, + "learning_rate": 6.984924623115579e-05, + "loss": 1.128, + "step": 61 + }, + { + "epoch": 0.797427652733119, + "grad_norm": 0.7002871632575989, + "learning_rate": 6.93467336683417e-05, + "loss": 1.154, + "step": 62 + }, + { + "epoch": 0.8102893890675241, + "grad_norm": 0.6884977221488953, + "learning_rate": 6.884422110552764e-05, + "loss": 1.1083, + "step": 63 + }, + { + "epoch": 0.8231511254019293, + "grad_norm": 0.7252687215805054, + "learning_rate": 6.834170854271357e-05, + "loss": 1.2786, + "step": 64 + }, + { + "epoch": 0.8360128617363344, + "grad_norm": 0.692872166633606, + "learning_rate": 6.78391959798995e-05, + "loss": 0.9494, + "step": 65 + }, + { + "epoch": 0.8488745980707395, + "grad_norm": 0.836986243724823, + "learning_rate": 6.733668341708544e-05, + "loss": 1.1501, + "step": 66 + }, + { + "epoch": 0.8617363344051447, + "grad_norm": 0.7169510126113892, + "learning_rate": 6.683417085427135e-05, + "loss": 1.0943, + "step": 67 + }, + { + "epoch": 0.8745980707395499, + "grad_norm": 0.6880966424942017, + "learning_rate": 6.633165829145729e-05, + "loss": 1.1755, + "step": 68 + }, + { + "epoch": 0.887459807073955, + "grad_norm": 0.6764377355575562, + "learning_rate": 6.582914572864322e-05, + "loss": 1.2418, + "step": 69 + }, + { + "epoch": 0.9003215434083601, + "grad_norm": 0.6112034320831299, + "learning_rate": 6.532663316582915e-05, + "loss": 1.1294, + "step": 70 + }, + { + "epoch": 0.9131832797427653, + "grad_norm": 0.7059026956558228, + "learning_rate": 6.482412060301508e-05, + "loss": 1.1561, + "step": 71 + }, + { + "epoch": 0.9260450160771704, + "grad_norm": 0.7074644565582275, + "learning_rate": 6.4321608040201e-05, + "loss": 1.0726, + "step": 72 + }, + { + "epoch": 0.9389067524115756, + "grad_norm": 0.8158409595489502, + "learning_rate": 6.381909547738694e-05, + "loss": 1.0443, + "step": 73 + }, + { + "epoch": 0.9517684887459807, + "grad_norm": 0.6347314119338989, + "learning_rate": 6.331658291457287e-05, + "loss": 1.1715, + "step": 74 + }, + { + "epoch": 0.9646302250803859, + "grad_norm": 0.7131938338279724, + "learning_rate": 6.28140703517588e-05, + "loss": 1.2283, + "step": 75 + }, + { + "epoch": 0.977491961414791, + "grad_norm": 0.7753063440322876, + "learning_rate": 6.231155778894473e-05, + "loss": 1.1375, + "step": 76 + }, + { + "epoch": 0.9903536977491961, + "grad_norm": 0.6545727252960205, + "learning_rate": 6.180904522613065e-05, + "loss": 1.0889, + "step": 77 + }, + { + "epoch": 1.0064308681672025, + "grad_norm": 1.0661736726760864, + "learning_rate": 6.130653266331658e-05, + "loss": 1.6334, + "step": 78 + }, + { + "epoch": 1.0192926045016077, + "grad_norm": 0.5409541726112366, + "learning_rate": 6.080402010050251e-05, + "loss": 0.9748, + "step": 79 + }, + { + "epoch": 1.0321543408360128, + "grad_norm": 0.6088462471961975, + "learning_rate": 6.030150753768844e-05, + "loss": 1.0846, + "step": 80 + }, + { + "epoch": 1.045016077170418, + "grad_norm": 0.6366555094718933, + "learning_rate": 5.979899497487438e-05, + "loss": 1.0867, + "step": 81 + }, + { + "epoch": 1.0578778135048232, + "grad_norm": 0.677960991859436, + "learning_rate": 5.929648241206031e-05, + "loss": 1.0855, + "step": 82 + }, + { + "epoch": 1.0707395498392283, + "grad_norm": 0.5949239134788513, + "learning_rate": 5.879396984924623e-05, + "loss": 1.0251, + "step": 83 + }, + { + "epoch": 1.0836012861736335, + "grad_norm": 0.7160579562187195, + "learning_rate": 5.829145728643216e-05, + "loss": 1.0727, + "step": 84 + }, + { + "epoch": 1.0964630225080385, + "grad_norm": 0.7298890352249146, + "learning_rate": 5.778894472361809e-05, + "loss": 1.093, + "step": 85 + }, + { + "epoch": 1.1093247588424437, + "grad_norm": 0.6917928457260132, + "learning_rate": 5.728643216080403e-05, + "loss": 1.0503, + "step": 86 + }, + { + "epoch": 1.122186495176849, + "grad_norm": 0.6670376658439636, + "learning_rate": 5.6783919597989955e-05, + "loss": 1.0448, + "step": 87 + }, + { + "epoch": 1.135048231511254, + "grad_norm": 0.6643639206886292, + "learning_rate": 5.628140703517588e-05, + "loss": 1.0596, + "step": 88 + }, + { + "epoch": 1.1479099678456592, + "grad_norm": 0.6676069498062134, + "learning_rate": 5.577889447236181e-05, + "loss": 1.081, + "step": 89 + }, + { + "epoch": 1.1607717041800643, + "grad_norm": 0.6921442747116089, + "learning_rate": 5.527638190954774e-05, + "loss": 1.0948, + "step": 90 + }, + { + "epoch": 1.1736334405144695, + "grad_norm": 0.7262521386146545, + "learning_rate": 5.477386934673368e-05, + "loss": 1.7814, + "step": 91 + }, + { + "epoch": 1.1864951768488745, + "grad_norm": 0.5920513868331909, + "learning_rate": 5.4271356783919604e-05, + "loss": 0.892, + "step": 92 + }, + { + "epoch": 1.1993569131832797, + "grad_norm": 0.6159147024154663, + "learning_rate": 5.376884422110553e-05, + "loss": 0.902, + "step": 93 + }, + { + "epoch": 1.212218649517685, + "grad_norm": 0.7935417890548706, + "learning_rate": 5.3266331658291455e-05, + "loss": 1.2126, + "step": 94 + }, + { + "epoch": 1.22508038585209, + "grad_norm": 0.6167990565299988, + "learning_rate": 5.276381909547739e-05, + "loss": 0.8746, + "step": 95 + }, + { + "epoch": 1.2379421221864952, + "grad_norm": 0.7907259464263916, + "learning_rate": 5.226130653266332e-05, + "loss": 1.2391, + "step": 96 + }, + { + "epoch": 1.2508038585209003, + "grad_norm": 0.8164305090904236, + "learning_rate": 5.175879396984925e-05, + "loss": 1.074, + "step": 97 + }, + { + "epoch": 1.2636655948553055, + "grad_norm": 0.8410254120826721, + "learning_rate": 5.125628140703518e-05, + "loss": 1.0546, + "step": 98 + }, + { + "epoch": 1.2765273311897105, + "grad_norm": 0.6917083859443665, + "learning_rate": 5.0753768844221104e-05, + "loss": 1.0823, + "step": 99 + }, + { + "epoch": 1.2893890675241158, + "grad_norm": 0.6826218366622925, + "learning_rate": 5.0251256281407036e-05, + "loss": 1.0135, + "step": 100 + }, + { + "epoch": 1.302250803858521, + "grad_norm": 0.6940116286277771, + "learning_rate": 4.974874371859297e-05, + "loss": 1.1325, + "step": 101 + }, + { + "epoch": 1.315112540192926, + "grad_norm": 0.6695630550384521, + "learning_rate": 4.92462311557789e-05, + "loss": 1.0213, + "step": 102 + }, + { + "epoch": 1.3279742765273312, + "grad_norm": 0.5696994066238403, + "learning_rate": 4.874371859296483e-05, + "loss": 0.8817, + "step": 103 + }, + { + "epoch": 1.3408360128617363, + "grad_norm": 0.7198184132575989, + "learning_rate": 4.824120603015075e-05, + "loss": 1.5291, + "step": 104 + }, + { + "epoch": 1.3536977491961415, + "grad_norm": 0.7395120859146118, + "learning_rate": 4.7738693467336685e-05, + "loss": 1.0921, + "step": 105 + }, + { + "epoch": 1.3665594855305465, + "grad_norm": 0.7207233905792236, + "learning_rate": 4.723618090452262e-05, + "loss": 0.9703, + "step": 106 + }, + { + "epoch": 1.3794212218649518, + "grad_norm": 0.6898490786552429, + "learning_rate": 4.673366834170855e-05, + "loss": 0.9412, + "step": 107 + }, + { + "epoch": 1.392282958199357, + "grad_norm": 0.6867828965187073, + "learning_rate": 4.6231155778894475e-05, + "loss": 0.9868, + "step": 108 + }, + { + "epoch": 1.405144694533762, + "grad_norm": 0.7548035383224487, + "learning_rate": 4.57286432160804e-05, + "loss": 0.9273, + "step": 109 + }, + { + "epoch": 1.4180064308681672, + "grad_norm": 0.7568983435630798, + "learning_rate": 4.522613065326633e-05, + "loss": 0.9694, + "step": 110 + }, + { + "epoch": 1.4308681672025725, + "grad_norm": 0.7726122736930847, + "learning_rate": 4.4723618090452266e-05, + "loss": 1.0544, + "step": 111 + }, + { + "epoch": 1.4437299035369775, + "grad_norm": 0.7325587272644043, + "learning_rate": 4.42211055276382e-05, + "loss": 0.9753, + "step": 112 + }, + { + "epoch": 1.4565916398713825, + "grad_norm": 0.8253093361854553, + "learning_rate": 4.3718592964824124e-05, + "loss": 1.1213, + "step": 113 + }, + { + "epoch": 1.4694533762057878, + "grad_norm": 0.7820125818252563, + "learning_rate": 4.321608040201005e-05, + "loss": 1.0241, + "step": 114 + }, + { + "epoch": 1.482315112540193, + "grad_norm": 0.793615460395813, + "learning_rate": 4.271356783919598e-05, + "loss": 1.2663, + "step": 115 + }, + { + "epoch": 1.495176848874598, + "grad_norm": 0.7086387872695923, + "learning_rate": 4.2211055276381914e-05, + "loss": 0.9192, + "step": 116 + }, + { + "epoch": 1.5080385852090032, + "grad_norm": 0.8336376547813416, + "learning_rate": 4.170854271356784e-05, + "loss": 1.0549, + "step": 117 + }, + { + "epoch": 1.5209003215434085, + "grad_norm": 0.7523891925811768, + "learning_rate": 4.120603015075377e-05, + "loss": 1.025, + "step": 118 + }, + { + "epoch": 1.5337620578778135, + "grad_norm": 0.8176757097244263, + "learning_rate": 4.07035175879397e-05, + "loss": 1.1298, + "step": 119 + }, + { + "epoch": 1.5466237942122185, + "grad_norm": 0.6839028000831604, + "learning_rate": 4.020100502512563e-05, + "loss": 0.9771, + "step": 120 + }, + { + "epoch": 1.5594855305466238, + "grad_norm": 0.8084897398948669, + "learning_rate": 3.969849246231156e-05, + "loss": 1.0422, + "step": 121 + }, + { + "epoch": 1.572347266881029, + "grad_norm": 0.6881428360939026, + "learning_rate": 3.919597989949749e-05, + "loss": 0.9939, + "step": 122 + }, + { + "epoch": 1.585209003215434, + "grad_norm": 0.7132687568664551, + "learning_rate": 3.869346733668342e-05, + "loss": 0.9658, + "step": 123 + }, + { + "epoch": 1.5980707395498392, + "grad_norm": 0.8140119314193726, + "learning_rate": 3.8190954773869346e-05, + "loss": 1.4144, + "step": 124 + }, + { + "epoch": 1.6109324758842445, + "grad_norm": 0.6113239526748657, + "learning_rate": 3.768844221105528e-05, + "loss": 0.8371, + "step": 125 + }, + { + "epoch": 1.6237942122186495, + "grad_norm": 0.7522210478782654, + "learning_rate": 3.7185929648241204e-05, + "loss": 0.9428, + "step": 126 + }, + { + "epoch": 1.6366559485530545, + "grad_norm": 0.7036418318748474, + "learning_rate": 3.668341708542714e-05, + "loss": 0.9228, + "step": 127 + }, + { + "epoch": 1.6495176848874598, + "grad_norm": 0.8932844996452332, + "learning_rate": 3.618090452261307e-05, + "loss": 1.1151, + "step": 128 + }, + { + "epoch": 1.662379421221865, + "grad_norm": 0.7899701595306396, + "learning_rate": 3.5678391959798995e-05, + "loss": 1.0189, + "step": 129 + }, + { + "epoch": 1.67524115755627, + "grad_norm": 0.86736661195755, + "learning_rate": 3.517587939698493e-05, + "loss": 0.9549, + "step": 130 + }, + { + "epoch": 1.6881028938906752, + "grad_norm": 0.7810158729553223, + "learning_rate": 3.467336683417085e-05, + "loss": 1.0268, + "step": 131 + }, + { + "epoch": 1.7009646302250805, + "grad_norm": 0.7750527858734131, + "learning_rate": 3.4170854271356785e-05, + "loss": 1.016, + "step": 132 + }, + { + "epoch": 1.7138263665594855, + "grad_norm": 0.8790460824966431, + "learning_rate": 3.366834170854272e-05, + "loss": 1.2774, + "step": 133 + }, + { + "epoch": 1.7266881028938905, + "grad_norm": 0.6732961535453796, + "learning_rate": 3.3165829145728643e-05, + "loss": 0.901, + "step": 134 + }, + { + "epoch": 1.739549839228296, + "grad_norm": 0.7882741093635559, + "learning_rate": 3.2663316582914576e-05, + "loss": 0.9445, + "step": 135 + }, + { + "epoch": 1.752411575562701, + "grad_norm": 0.8920542001724243, + "learning_rate": 3.21608040201005e-05, + "loss": 0.8925, + "step": 136 + }, + { + "epoch": 1.765273311897106, + "grad_norm": 0.8697175979614258, + "learning_rate": 3.1658291457286434e-05, + "loss": 1.041, + "step": 137 + }, + { + "epoch": 1.7781350482315113, + "grad_norm": 0.7766188979148865, + "learning_rate": 3.1155778894472366e-05, + "loss": 0.9612, + "step": 138 + }, + { + "epoch": 1.7909967845659165, + "grad_norm": 0.8278536796569824, + "learning_rate": 3.065326633165829e-05, + "loss": 1.0559, + "step": 139 + }, + { + "epoch": 1.8038585209003215, + "grad_norm": 0.7597684264183044, + "learning_rate": 3.015075376884422e-05, + "loss": 0.8443, + "step": 140 + }, + { + "epoch": 1.8167202572347267, + "grad_norm": 0.9739409685134888, + "learning_rate": 2.9648241206030153e-05, + "loss": 1.1462, + "step": 141 + }, + { + "epoch": 1.829581993569132, + "grad_norm": 0.8837987184524536, + "learning_rate": 2.914572864321608e-05, + "loss": 1.0439, + "step": 142 + }, + { + "epoch": 1.842443729903537, + "grad_norm": 0.7506547570228577, + "learning_rate": 2.8643216080402015e-05, + "loss": 1.022, + "step": 143 + }, + { + "epoch": 1.855305466237942, + "grad_norm": 0.8791114687919617, + "learning_rate": 2.814070351758794e-05, + "loss": 1.1417, + "step": 144 + }, + { + "epoch": 1.8681672025723473, + "grad_norm": 0.8518108129501343, + "learning_rate": 2.763819095477387e-05, + "loss": 1.0243, + "step": 145 + }, + { + "epoch": 1.8810289389067525, + "grad_norm": 0.7354499101638794, + "learning_rate": 2.7135678391959802e-05, + "loss": 0.9033, + "step": 146 + }, + { + "epoch": 1.8938906752411575, + "grad_norm": 0.8178240656852722, + "learning_rate": 2.6633165829145728e-05, + "loss": 1.165, + "step": 147 + }, + { + "epoch": 1.9067524115755627, + "grad_norm": 0.9056537747383118, + "learning_rate": 2.613065326633166e-05, + "loss": 0.9944, + "step": 148 + }, + { + "epoch": 1.919614147909968, + "grad_norm": 0.7785031199455261, + "learning_rate": 2.562814070351759e-05, + "loss": 0.9118, + "step": 149 + }, + { + "epoch": 1.932475884244373, + "grad_norm": 0.815324068069458, + "learning_rate": 2.5125628140703518e-05, + "loss": 1.09, + "step": 150 + }, + { + "epoch": 1.945337620578778, + "grad_norm": 0.7084099054336548, + "learning_rate": 2.462311557788945e-05, + "loss": 0.9843, + "step": 151 + }, + { + "epoch": 1.9581993569131833, + "grad_norm": 0.832200825214386, + "learning_rate": 2.4120603015075376e-05, + "loss": 1.0607, + "step": 152 + }, + { + "epoch": 1.9710610932475885, + "grad_norm": 0.8333700299263, + "learning_rate": 2.361809045226131e-05, + "loss": 1.1185, + "step": 153 + }, + { + "epoch": 1.9839228295819935, + "grad_norm": 0.8199861645698547, + "learning_rate": 2.3115577889447238e-05, + "loss": 1.0445, + "step": 154 + }, + { + "epoch": 1.9967845659163987, + "grad_norm": 1.3603229522705078, + "learning_rate": 2.2613065326633167e-05, + "loss": 1.4961, + "step": 155 + }, + { + "epoch": 2.012861736334405, + "grad_norm": 0.8145521283149719, + "learning_rate": 2.21105527638191e-05, + "loss": 0.9617, + "step": 156 + }, + { + "epoch": 2.0257234726688105, + "grad_norm": 0.741971492767334, + "learning_rate": 2.1608040201005025e-05, + "loss": 1.0142, + "step": 157 + }, + { + "epoch": 2.0385852090032155, + "grad_norm": 0.7437772750854492, + "learning_rate": 2.1105527638190957e-05, + "loss": 0.9444, + "step": 158 + }, + { + "epoch": 2.0514469453376205, + "grad_norm": 0.7695031762123108, + "learning_rate": 2.0603015075376886e-05, + "loss": 0.9844, + "step": 159 + }, + { + "epoch": 2.0643086816720255, + "grad_norm": 0.795947253704071, + "learning_rate": 2.0100502512562815e-05, + "loss": 0.9224, + "step": 160 + }, + { + "epoch": 2.077170418006431, + "grad_norm": 0.7487471699714661, + "learning_rate": 1.9597989949748744e-05, + "loss": 0.9984, + "step": 161 + }, + { + "epoch": 2.090032154340836, + "grad_norm": 0.8159248232841492, + "learning_rate": 1.9095477386934673e-05, + "loss": 0.9995, + "step": 162 + }, + { + "epoch": 2.102893890675241, + "grad_norm": 0.7632979154586792, + "learning_rate": 1.8592964824120602e-05, + "loss": 0.9259, + "step": 163 + }, + { + "epoch": 2.1157556270096465, + "grad_norm": 0.7518342733383179, + "learning_rate": 1.8090452261306535e-05, + "loss": 0.9617, + "step": 164 + }, + { + "epoch": 2.1286173633440515, + "grad_norm": 0.744655430316925, + "learning_rate": 1.7587939698492464e-05, + "loss": 0.9578, + "step": 165 + }, + { + "epoch": 2.1414790996784565, + "grad_norm": 0.7938129305839539, + "learning_rate": 1.7085427135678393e-05, + "loss": 0.9698, + "step": 166 + }, + { + "epoch": 2.154340836012862, + "grad_norm": 0.7613552212715149, + "learning_rate": 1.6582914572864322e-05, + "loss": 0.9379, + "step": 167 + }, + { + "epoch": 2.167202572347267, + "grad_norm": 0.7046363353729248, + "learning_rate": 1.608040201005025e-05, + "loss": 0.9297, + "step": 168 + }, + { + "epoch": 2.180064308681672, + "grad_norm": 0.8412237763404846, + "learning_rate": 1.5577889447236183e-05, + "loss": 0.9214, + "step": 169 + }, + { + "epoch": 2.192926045016077, + "grad_norm": 0.7263858318328857, + "learning_rate": 1.507537688442211e-05, + "loss": 1.3853, + "step": 170 + }, + { + "epoch": 2.2057877813504825, + "grad_norm": 0.8270084261894226, + "learning_rate": 1.457286432160804e-05, + "loss": 0.9484, + "step": 171 + }, + { + "epoch": 2.2186495176848875, + "grad_norm": 0.8701935410499573, + "learning_rate": 1.407035175879397e-05, + "loss": 0.9546, + "step": 172 + }, + { + "epoch": 2.2315112540192925, + "grad_norm": 0.887917697429657, + "learning_rate": 1.3567839195979901e-05, + "loss": 0.9008, + "step": 173 + }, + { + "epoch": 2.244372990353698, + "grad_norm": 0.7514311075210571, + "learning_rate": 1.306532663316583e-05, + "loss": 1.0446, + "step": 174 + }, + { + "epoch": 2.257234726688103, + "grad_norm": 0.8387101292610168, + "learning_rate": 1.2562814070351759e-05, + "loss": 0.9765, + "step": 175 + }, + { + "epoch": 2.270096463022508, + "grad_norm": 0.8040310144424438, + "learning_rate": 1.2060301507537688e-05, + "loss": 1.0726, + "step": 176 + }, + { + "epoch": 2.282958199356913, + "grad_norm": 0.7957134246826172, + "learning_rate": 1.1557788944723619e-05, + "loss": 1.0003, + "step": 177 + }, + { + "epoch": 2.2958199356913185, + "grad_norm": 0.9072922468185425, + "learning_rate": 1.105527638190955e-05, + "loss": 1.0043, + "step": 178 + }, + { + "epoch": 2.3086816720257235, + "grad_norm": 0.9117444157600403, + "learning_rate": 1.0552763819095479e-05, + "loss": 0.9208, + "step": 179 + }, + { + "epoch": 2.3215434083601285, + "grad_norm": 0.8038241863250732, + "learning_rate": 1.0050251256281408e-05, + "loss": 0.9437, + "step": 180 + }, + { + "epoch": 2.334405144694534, + "grad_norm": 0.7639140486717224, + "learning_rate": 9.547738693467337e-06, + "loss": 0.9291, + "step": 181 + }, + { + "epoch": 2.347266881028939, + "grad_norm": 0.7883135080337524, + "learning_rate": 9.045226130653267e-06, + "loss": 0.8877, + "step": 182 + }, + { + "epoch": 2.360128617363344, + "grad_norm": 0.8600010275840759, + "learning_rate": 8.542713567839196e-06, + "loss": 0.9616, + "step": 183 + }, + { + "epoch": 2.372990353697749, + "grad_norm": 0.7619961500167847, + "learning_rate": 8.040201005025125e-06, + "loss": 0.9905, + "step": 184 + }, + { + "epoch": 2.3858520900321545, + "grad_norm": 0.7982620596885681, + "learning_rate": 7.537688442211055e-06, + "loss": 0.8733, + "step": 185 + }, + { + "epoch": 2.3987138263665595, + "grad_norm": 0.839229941368103, + "learning_rate": 7.035175879396985e-06, + "loss": 1.0003, + "step": 186 + }, + { + "epoch": 2.4115755627009645, + "grad_norm": 0.8543558716773987, + "learning_rate": 6.532663316582915e-06, + "loss": 0.9495, + "step": 187 + }, + { + "epoch": 2.42443729903537, + "grad_norm": 0.8323649764060974, + "learning_rate": 6.030150753768844e-06, + "loss": 0.9723, + "step": 188 + }, + { + "epoch": 2.437299035369775, + "grad_norm": 0.8163830637931824, + "learning_rate": 5.527638190954775e-06, + "loss": 0.9558, + "step": 189 + }, + { + "epoch": 2.45016077170418, + "grad_norm": 0.7704145908355713, + "learning_rate": 5.025125628140704e-06, + "loss": 0.9669, + "step": 190 + }, + { + "epoch": 2.463022508038585, + "grad_norm": 0.677905261516571, + "learning_rate": 4.522613065326634e-06, + "loss": 1.202, + "step": 191 + }, + { + "epoch": 2.4758842443729905, + "grad_norm": 0.8544199466705322, + "learning_rate": 4.020100502512563e-06, + "loss": 0.9866, + "step": 192 + }, + { + "epoch": 2.4887459807073955, + "grad_norm": 0.7817060351371765, + "learning_rate": 3.5175879396984926e-06, + "loss": 0.8434, + "step": 193 + }, + { + "epoch": 2.5016077170418005, + "grad_norm": 0.8981476426124573, + "learning_rate": 3.015075376884422e-06, + "loss": 1.002, + "step": 194 + }, + { + "epoch": 2.514469453376206, + "grad_norm": 0.7981364130973816, + "learning_rate": 2.512562814070352e-06, + "loss": 0.7909, + "step": 195 + }, + { + "epoch": 2.527331189710611, + "grad_norm": 0.937846302986145, + "learning_rate": 2.0100502512562813e-06, + "loss": 0.8897, + "step": 196 + }, + { + "epoch": 2.540192926045016, + "grad_norm": 0.7505702376365662, + "learning_rate": 1.507537688442211e-06, + "loss": 0.9036, + "step": 197 + }, + { + "epoch": 2.553054662379421, + "grad_norm": 0.7898029685020447, + "learning_rate": 1.0050251256281407e-06, + "loss": 0.8951, + "step": 198 + }, + { + "epoch": 2.5659163987138265, + "grad_norm": 0.7744051814079285, + "learning_rate": 5.025125628140703e-07, + "loss": 0.9436, + "step": 199 + }, + { + "epoch": 2.5787781350482315, + "grad_norm": 0.8044281601905823, + "learning_rate": 0.0, + "loss": 0.7958, + "step": 200 + } + ], + "logging_steps": 1, + "max_steps": 200, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 3.204351730561843e+16, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/llama_Chinese_French/checkpoint-200/training_args.bin b/llama_Chinese_French/checkpoint-200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..8439b90d8b46f41d35e89ea805bf2ce97f2a591b --- /dev/null +++ b/llama_Chinese_French/checkpoint-200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c3eb1a0219f7689fe3e6d4bb5144169d337c6b1674b6e1aa739775179ecaa20 +size 5624 diff --git a/llama_Chinese_German/checkpoint-200/README.md b/llama_Chinese_German/checkpoint-200/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ec91fdd050e2a5613ef04bb6c8fc77a63b8b10a1 --- /dev/null +++ b/llama_Chinese_German/checkpoint-200/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.14.0 \ No newline at end of file diff --git a/llama_Chinese_German/checkpoint-200/adapter_config.json b/llama_Chinese_German/checkpoint-200/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..7dc201845eb2a89cce47b700c8aefbb92f9e53b0 --- /dev/null +++ b/llama_Chinese_German/checkpoint-200/adapter_config.json @@ -0,0 +1,37 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit", + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "down_proj", + "v_proj", + "gate_proj", + "o_proj", + "q_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/llama_Chinese_German/checkpoint-200/adapter_model.safetensors b/llama_Chinese_German/checkpoint-200/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5730ffb6983f643e1ba09880461e2830b78b41cf --- /dev/null +++ b/llama_Chinese_German/checkpoint-200/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbb3f8fc39a8f1c1ec50c5ccdeaa359b58deebf857edbdabdfed839fff995c9d +size 167832240 diff --git a/llama_Chinese_German/checkpoint-200/optimizer.pt b/llama_Chinese_German/checkpoint-200/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..632ee3236d1517d1a19310bcda5d5356683af373 --- /dev/null +++ b/llama_Chinese_German/checkpoint-200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b19da44cca7f6b6a11e09a3ade136235f06ce64be4c2d03eb0e50ae8a2082af8 +size 85723284 diff --git a/llama_Chinese_German/checkpoint-200/rng_state.pth b/llama_Chinese_German/checkpoint-200/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..f21f2ff1c1a816463781d51760f8156e041f5979 --- /dev/null +++ b/llama_Chinese_German/checkpoint-200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4981123ff3cf7bd5b7f76839e90e4776f747ca4c38dcb41876fa010c0dea8b23 +size 14244 diff --git a/llama_Chinese_German/checkpoint-200/scheduler.pt b/llama_Chinese_German/checkpoint-200/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6ad11d6d5288d2841ba96a6d7d3838c80c80097a --- /dev/null +++ b/llama_Chinese_German/checkpoint-200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96 +size 1064 diff --git a/llama_Chinese_German/checkpoint-200/special_tokens_map.json b/llama_Chinese_German/checkpoint-200/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/llama_Chinese_German/checkpoint-200/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/llama_Chinese_German/checkpoint-200/tokenizer.json b/llama_Chinese_German/checkpoint-200/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/llama_Chinese_German/checkpoint-200/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/llama_Chinese_German/checkpoint-200/tokenizer_config.json b/llama_Chinese_German/checkpoint-200/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..447bd30f21e8c20e9e6497d6cfc650a3881e26a6 --- /dev/null +++ b/llama_Chinese_German/checkpoint-200/tokenizer_config.json @@ -0,0 +1,2064 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/llama_Chinese_German/checkpoint-200/trainer_state.json b/llama_Chinese_German/checkpoint-200/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..411a84745bb845f358251445c8853138a56ffbb5 --- /dev/null +++ b/llama_Chinese_German/checkpoint-200/trainer_state.json @@ -0,0 +1,1433 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.5787781350482315, + "eval_steps": 500, + "global_step": 200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.012861736334405145, + "grad_norm": 1.01576566696167, + "learning_rate": 0.0001, + "loss": 2.5387, + "step": 1 + }, + { + "epoch": 0.02572347266881029, + "grad_norm": 0.9670435190200806, + "learning_rate": 9.949748743718594e-05, + "loss": 2.5987, + "step": 2 + }, + { + "epoch": 0.03858520900321544, + "grad_norm": 0.9164174795150757, + "learning_rate": 9.899497487437186e-05, + "loss": 2.6476, + "step": 3 + }, + { + "epoch": 0.05144694533762058, + "grad_norm": 0.9619408845901489, + "learning_rate": 9.84924623115578e-05, + "loss": 2.4606, + "step": 4 + }, + { + "epoch": 0.06430868167202572, + "grad_norm": 1.0056397914886475, + "learning_rate": 9.798994974874372e-05, + "loss": 2.2964, + "step": 5 + }, + { + "epoch": 0.07717041800643087, + "grad_norm": 1.057576298713684, + "learning_rate": 9.748743718592965e-05, + "loss": 2.1459, + "step": 6 + }, + { + "epoch": 0.09003215434083602, + "grad_norm": 1.0946511030197144, + "learning_rate": 9.698492462311559e-05, + "loss": 2.0283, + "step": 7 + }, + { + "epoch": 0.10289389067524116, + "grad_norm": 1.3670672178268433, + "learning_rate": 9.64824120603015e-05, + "loss": 1.9742, + "step": 8 + }, + { + "epoch": 0.1157556270096463, + "grad_norm": 1.074001431465149, + "learning_rate": 9.597989949748745e-05, + "loss": 1.7042, + "step": 9 + }, + { + "epoch": 0.12861736334405144, + "grad_norm": 0.9767855405807495, + "learning_rate": 9.547738693467337e-05, + "loss": 1.7204, + "step": 10 + }, + { + "epoch": 0.1414790996784566, + "grad_norm": 0.7178863286972046, + "learning_rate": 9.49748743718593e-05, + "loss": 1.9529, + "step": 11 + }, + { + "epoch": 0.15434083601286175, + "grad_norm": 0.8454244136810303, + "learning_rate": 9.447236180904523e-05, + "loss": 1.4926, + "step": 12 + }, + { + "epoch": 0.16720257234726688, + "grad_norm": 0.7085447907447815, + "learning_rate": 9.396984924623115e-05, + "loss": 1.4696, + "step": 13 + }, + { + "epoch": 0.18006430868167203, + "grad_norm": 0.643786609172821, + "learning_rate": 9.34673366834171e-05, + "loss": 1.4026, + "step": 14 + }, + { + "epoch": 0.19292604501607716, + "grad_norm": 0.6433325409889221, + "learning_rate": 9.296482412060302e-05, + "loss": 1.467, + "step": 15 + }, + { + "epoch": 0.2057877813504823, + "grad_norm": 0.6449655294418335, + "learning_rate": 9.246231155778895e-05, + "loss": 1.3711, + "step": 16 + }, + { + "epoch": 0.21864951768488747, + "grad_norm": 0.6271617412567139, + "learning_rate": 9.195979899497488e-05, + "loss": 1.4382, + "step": 17 + }, + { + "epoch": 0.2315112540192926, + "grad_norm": 0.7128543257713318, + "learning_rate": 9.14572864321608e-05, + "loss": 1.3602, + "step": 18 + }, + { + "epoch": 0.24437299035369775, + "grad_norm": 0.5839459300041199, + "learning_rate": 9.095477386934675e-05, + "loss": 1.3542, + "step": 19 + }, + { + "epoch": 0.2572347266881029, + "grad_norm": 0.7175548672676086, + "learning_rate": 9.045226130653267e-05, + "loss": 1.3507, + "step": 20 + }, + { + "epoch": 0.27009646302250806, + "grad_norm": 0.6383489370346069, + "learning_rate": 8.99497487437186e-05, + "loss": 1.3026, + "step": 21 + }, + { + "epoch": 0.2829581993569132, + "grad_norm": 0.6702913045883179, + "learning_rate": 8.944723618090453e-05, + "loss": 1.3607, + "step": 22 + }, + { + "epoch": 0.2958199356913183, + "grad_norm": 0.6579256653785706, + "learning_rate": 8.894472361809045e-05, + "loss": 1.3742, + "step": 23 + }, + { + "epoch": 0.3086816720257235, + "grad_norm": 0.6812928318977356, + "learning_rate": 8.84422110552764e-05, + "loss": 1.2819, + "step": 24 + }, + { + "epoch": 0.3215434083601286, + "grad_norm": 0.7199240326881409, + "learning_rate": 8.793969849246232e-05, + "loss": 1.3359, + "step": 25 + }, + { + "epoch": 0.33440514469453375, + "grad_norm": 0.6149807572364807, + "learning_rate": 8.743718592964825e-05, + "loss": 1.3266, + "step": 26 + }, + { + "epoch": 0.34726688102893893, + "grad_norm": 0.7447224855422974, + "learning_rate": 8.693467336683418e-05, + "loss": 1.2672, + "step": 27 + }, + { + "epoch": 0.36012861736334406, + "grad_norm": 0.7494439482688904, + "learning_rate": 8.64321608040201e-05, + "loss": 1.2761, + "step": 28 + }, + { + "epoch": 0.3729903536977492, + "grad_norm": 0.6944273710250854, + "learning_rate": 8.592964824120603e-05, + "loss": 1.372, + "step": 29 + }, + { + "epoch": 0.3858520900321543, + "grad_norm": 0.7790990471839905, + "learning_rate": 8.542713567839196e-05, + "loss": 1.2512, + "step": 30 + }, + { + "epoch": 0.3987138263665595, + "grad_norm": 0.7430320978164673, + "learning_rate": 8.49246231155779e-05, + "loss": 1.2877, + "step": 31 + }, + { + "epoch": 0.4115755627009646, + "grad_norm": 0.7314161658287048, + "learning_rate": 8.442211055276383e-05, + "loss": 1.2391, + "step": 32 + }, + { + "epoch": 0.42443729903536975, + "grad_norm": 0.9817017912864685, + "learning_rate": 8.391959798994975e-05, + "loss": 1.2111, + "step": 33 + }, + { + "epoch": 0.43729903536977494, + "grad_norm": 0.7930232286453247, + "learning_rate": 8.341708542713568e-05, + "loss": 1.3572, + "step": 34 + }, + { + "epoch": 0.45016077170418006, + "grad_norm": 0.8271546959877014, + "learning_rate": 8.291457286432161e-05, + "loss": 1.1759, + "step": 35 + }, + { + "epoch": 0.4630225080385852, + "grad_norm": 0.8044170141220093, + "learning_rate": 8.241206030150754e-05, + "loss": 1.2679, + "step": 36 + }, + { + "epoch": 0.4758842443729904, + "grad_norm": 0.838305652141571, + "learning_rate": 8.190954773869348e-05, + "loss": 1.3342, + "step": 37 + }, + { + "epoch": 0.4887459807073955, + "grad_norm": 0.8596311211585999, + "learning_rate": 8.14070351758794e-05, + "loss": 1.3192, + "step": 38 + }, + { + "epoch": 0.5016077170418006, + "grad_norm": 0.8743759393692017, + "learning_rate": 8.090452261306533e-05, + "loss": 1.1979, + "step": 39 + }, + { + "epoch": 0.5144694533762058, + "grad_norm": 0.7089235186576843, + "learning_rate": 8.040201005025126e-05, + "loss": 1.3027, + "step": 40 + }, + { + "epoch": 0.5273311897106109, + "grad_norm": 1.0054506063461304, + "learning_rate": 7.989949748743719e-05, + "loss": 1.2378, + "step": 41 + }, + { + "epoch": 0.5401929260450161, + "grad_norm": 0.6515071392059326, + "learning_rate": 7.939698492462313e-05, + "loss": 1.4929, + "step": 42 + }, + { + "epoch": 0.5530546623794212, + "grad_norm": 0.7868378758430481, + "learning_rate": 7.889447236180904e-05, + "loss": 1.178, + "step": 43 + }, + { + "epoch": 0.5659163987138264, + "grad_norm": 0.7902660369873047, + "learning_rate": 7.839195979899498e-05, + "loss": 1.1859, + "step": 44 + }, + { + "epoch": 0.5787781350482315, + "grad_norm": 0.6925104260444641, + "learning_rate": 7.788944723618091e-05, + "loss": 1.1702, + "step": 45 + }, + { + "epoch": 0.5916398713826366, + "grad_norm": 0.5484269857406616, + "learning_rate": 7.738693467336684e-05, + "loss": 1.3692, + "step": 46 + }, + { + "epoch": 0.6045016077170418, + "grad_norm": 0.6749467253684998, + "learning_rate": 7.688442211055277e-05, + "loss": 1.2008, + "step": 47 + }, + { + "epoch": 0.617363344051447, + "grad_norm": 0.6152440905570984, + "learning_rate": 7.638190954773869e-05, + "loss": 1.1451, + "step": 48 + }, + { + "epoch": 0.6302250803858521, + "grad_norm": 0.7041923999786377, + "learning_rate": 7.587939698492463e-05, + "loss": 1.1663, + "step": 49 + }, + { + "epoch": 0.6430868167202572, + "grad_norm": 0.6972315311431885, + "learning_rate": 7.537688442211056e-05, + "loss": 1.3466, + "step": 50 + }, + { + "epoch": 0.6559485530546624, + "grad_norm": 0.6475012302398682, + "learning_rate": 7.487437185929649e-05, + "loss": 1.2642, + "step": 51 + }, + { + "epoch": 0.6688102893890675, + "grad_norm": 0.6452779173851013, + "learning_rate": 7.437185929648241e-05, + "loss": 1.2998, + "step": 52 + }, + { + "epoch": 0.6816720257234726, + "grad_norm": 0.6301996111869812, + "learning_rate": 7.386934673366834e-05, + "loss": 1.3006, + "step": 53 + }, + { + "epoch": 0.6945337620578779, + "grad_norm": 0.5954070687294006, + "learning_rate": 7.336683417085427e-05, + "loss": 1.0755, + "step": 54 + }, + { + "epoch": 0.707395498392283, + "grad_norm": 0.6645363569259644, + "learning_rate": 7.28643216080402e-05, + "loss": 1.1456, + "step": 55 + }, + { + "epoch": 0.7202572347266881, + "grad_norm": 0.6070973873138428, + "learning_rate": 7.236180904522614e-05, + "loss": 1.2833, + "step": 56 + }, + { + "epoch": 0.7331189710610932, + "grad_norm": 0.6045801639556885, + "learning_rate": 7.185929648241206e-05, + "loss": 1.3244, + "step": 57 + }, + { + "epoch": 0.7459807073954984, + "grad_norm": 0.6441887021064758, + "learning_rate": 7.135678391959799e-05, + "loss": 1.1441, + "step": 58 + }, + { + "epoch": 0.7588424437299035, + "grad_norm": 0.6649569272994995, + "learning_rate": 7.085427135678392e-05, + "loss": 1.1341, + "step": 59 + }, + { + "epoch": 0.7717041800643086, + "grad_norm": 0.6963558793067932, + "learning_rate": 7.035175879396985e-05, + "loss": 1.2546, + "step": 60 + }, + { + "epoch": 0.7845659163987139, + "grad_norm": 0.6504542231559753, + "learning_rate": 6.984924623115579e-05, + "loss": 1.15, + "step": 61 + }, + { + "epoch": 0.797427652733119, + "grad_norm": 0.7124640941619873, + "learning_rate": 6.93467336683417e-05, + "loss": 1.2119, + "step": 62 + }, + { + "epoch": 0.8102893890675241, + "grad_norm": 0.7108010649681091, + "learning_rate": 6.884422110552764e-05, + "loss": 1.1421, + "step": 63 + }, + { + "epoch": 0.8231511254019293, + "grad_norm": 0.7506201267242432, + "learning_rate": 6.834170854271357e-05, + "loss": 1.2839, + "step": 64 + }, + { + "epoch": 0.8360128617363344, + "grad_norm": 0.7054311037063599, + "learning_rate": 6.78391959798995e-05, + "loss": 0.9867, + "step": 65 + }, + { + "epoch": 0.8488745980707395, + "grad_norm": 0.822084903717041, + "learning_rate": 6.733668341708544e-05, + "loss": 1.1776, + "step": 66 + }, + { + "epoch": 0.8617363344051447, + "grad_norm": 0.6928122043609619, + "learning_rate": 6.683417085427135e-05, + "loss": 1.1274, + "step": 67 + }, + { + "epoch": 0.8745980707395499, + "grad_norm": 0.6704013347625732, + "learning_rate": 6.633165829145729e-05, + "loss": 1.2038, + "step": 68 + }, + { + "epoch": 0.887459807073955, + "grad_norm": 0.6860978007316589, + "learning_rate": 6.582914572864322e-05, + "loss": 1.2733, + "step": 69 + }, + { + "epoch": 0.9003215434083601, + "grad_norm": 0.6388479471206665, + "learning_rate": 6.532663316582915e-05, + "loss": 1.1757, + "step": 70 + }, + { + "epoch": 0.9131832797427653, + "grad_norm": 0.69051593542099, + "learning_rate": 6.482412060301508e-05, + "loss": 1.2217, + "step": 71 + }, + { + "epoch": 0.9260450160771704, + "grad_norm": 0.6623289585113525, + "learning_rate": 6.4321608040201e-05, + "loss": 1.104, + "step": 72 + }, + { + "epoch": 0.9389067524115756, + "grad_norm": 0.7594994902610779, + "learning_rate": 6.381909547738694e-05, + "loss": 1.0978, + "step": 73 + }, + { + "epoch": 0.9517684887459807, + "grad_norm": 0.6275720596313477, + "learning_rate": 6.331658291457287e-05, + "loss": 1.2087, + "step": 74 + }, + { + "epoch": 0.9646302250803859, + "grad_norm": 0.7241957187652588, + "learning_rate": 6.28140703517588e-05, + "loss": 1.2698, + "step": 75 + }, + { + "epoch": 0.977491961414791, + "grad_norm": 0.7378215193748474, + "learning_rate": 6.231155778894473e-05, + "loss": 1.1645, + "step": 76 + }, + { + "epoch": 0.9903536977491961, + "grad_norm": 0.6214041709899902, + "learning_rate": 6.180904522613065e-05, + "loss": 1.1051, + "step": 77 + }, + { + "epoch": 1.0064308681672025, + "grad_norm": 1.040342926979065, + "learning_rate": 6.130653266331658e-05, + "loss": 1.7028, + "step": 78 + }, + { + "epoch": 1.0192926045016077, + "grad_norm": 0.5702113509178162, + "learning_rate": 6.080402010050251e-05, + "loss": 1.018, + "step": 79 + }, + { + "epoch": 1.0321543408360128, + "grad_norm": 0.6255866289138794, + "learning_rate": 6.030150753768844e-05, + "loss": 1.1143, + "step": 80 + }, + { + "epoch": 1.045016077170418, + "grad_norm": 0.6321098208427429, + "learning_rate": 5.979899497487438e-05, + "loss": 1.1178, + "step": 81 + }, + { + "epoch": 1.0578778135048232, + "grad_norm": 0.6427117586135864, + "learning_rate": 5.929648241206031e-05, + "loss": 1.0878, + "step": 82 + }, + { + "epoch": 1.0707395498392283, + "grad_norm": 0.6140854358673096, + "learning_rate": 5.879396984924623e-05, + "loss": 1.0582, + "step": 83 + }, + { + "epoch": 1.0836012861736335, + "grad_norm": 0.7560706734657288, + "learning_rate": 5.829145728643216e-05, + "loss": 1.1167, + "step": 84 + }, + { + "epoch": 1.0964630225080385, + "grad_norm": 0.7023528218269348, + "learning_rate": 5.778894472361809e-05, + "loss": 1.1229, + "step": 85 + }, + { + "epoch": 1.1093247588424437, + "grad_norm": 0.7082664966583252, + "learning_rate": 5.728643216080403e-05, + "loss": 1.0817, + "step": 86 + }, + { + "epoch": 1.122186495176849, + "grad_norm": 0.6439070701599121, + "learning_rate": 5.6783919597989955e-05, + "loss": 1.0582, + "step": 87 + }, + { + "epoch": 1.135048231511254, + "grad_norm": 0.6616405248641968, + "learning_rate": 5.628140703517588e-05, + "loss": 1.0984, + "step": 88 + }, + { + "epoch": 1.1479099678456592, + "grad_norm": 0.6637163758277893, + "learning_rate": 5.577889447236181e-05, + "loss": 1.1048, + "step": 89 + }, + { + "epoch": 1.1607717041800643, + "grad_norm": 0.7235276103019714, + "learning_rate": 5.527638190954774e-05, + "loss": 1.1275, + "step": 90 + }, + { + "epoch": 1.1736334405144695, + "grad_norm": 0.7490517497062683, + "learning_rate": 5.477386934673368e-05, + "loss": 1.84, + "step": 91 + }, + { + "epoch": 1.1864951768488745, + "grad_norm": 0.5786312818527222, + "learning_rate": 5.4271356783919604e-05, + "loss": 0.9336, + "step": 92 + }, + { + "epoch": 1.1993569131832797, + "grad_norm": 0.6154367327690125, + "learning_rate": 5.376884422110553e-05, + "loss": 0.9234, + "step": 93 + }, + { + "epoch": 1.212218649517685, + "grad_norm": 0.8267420530319214, + "learning_rate": 5.3266331658291455e-05, + "loss": 1.248, + "step": 94 + }, + { + "epoch": 1.22508038585209, + "grad_norm": 0.6347982287406921, + "learning_rate": 5.276381909547739e-05, + "loss": 0.9043, + "step": 95 + }, + { + "epoch": 1.2379421221864952, + "grad_norm": 0.8247315287590027, + "learning_rate": 5.226130653266332e-05, + "loss": 1.264, + "step": 96 + }, + { + "epoch": 1.2508038585209003, + "grad_norm": 0.7495781183242798, + "learning_rate": 5.175879396984925e-05, + "loss": 1.0917, + "step": 97 + }, + { + "epoch": 1.2636655948553055, + "grad_norm": 0.7721897959709167, + "learning_rate": 5.125628140703518e-05, + "loss": 1.094, + "step": 98 + }, + { + "epoch": 1.2765273311897105, + "grad_norm": 0.7058795094490051, + "learning_rate": 5.0753768844221104e-05, + "loss": 1.1089, + "step": 99 + }, + { + "epoch": 1.2893890675241158, + "grad_norm": 0.7193276286125183, + "learning_rate": 5.0251256281407036e-05, + "loss": 1.0483, + "step": 100 + }, + { + "epoch": 1.302250803858521, + "grad_norm": 0.6918763518333435, + "learning_rate": 4.974874371859297e-05, + "loss": 1.1502, + "step": 101 + }, + { + "epoch": 1.315112540192926, + "grad_norm": 0.660120964050293, + "learning_rate": 4.92462311557789e-05, + "loss": 1.0676, + "step": 102 + }, + { + "epoch": 1.3279742765273312, + "grad_norm": 0.6186777949333191, + "learning_rate": 4.874371859296483e-05, + "loss": 0.9034, + "step": 103 + }, + { + "epoch": 1.3408360128617363, + "grad_norm": 0.6991158723831177, + "learning_rate": 4.824120603015075e-05, + "loss": 1.5727, + "step": 104 + }, + { + "epoch": 1.3536977491961415, + "grad_norm": 0.761214017868042, + "learning_rate": 4.7738693467336685e-05, + "loss": 1.1251, + "step": 105 + }, + { + "epoch": 1.3665594855305465, + "grad_norm": 0.7299149036407471, + "learning_rate": 4.723618090452262e-05, + "loss": 1.0284, + "step": 106 + }, + { + "epoch": 1.3794212218649518, + "grad_norm": 0.7297985553741455, + "learning_rate": 4.673366834170855e-05, + "loss": 0.9865, + "step": 107 + }, + { + "epoch": 1.392282958199357, + "grad_norm": 0.7173044085502625, + "learning_rate": 4.6231155778894475e-05, + "loss": 1.0298, + "step": 108 + }, + { + "epoch": 1.405144694533762, + "grad_norm": 0.7294162511825562, + "learning_rate": 4.57286432160804e-05, + "loss": 0.9537, + "step": 109 + }, + { + "epoch": 1.4180064308681672, + "grad_norm": 0.7480007410049438, + "learning_rate": 4.522613065326633e-05, + "loss": 1.0083, + "step": 110 + }, + { + "epoch": 1.4308681672025725, + "grad_norm": 0.759207010269165, + "learning_rate": 4.4723618090452266e-05, + "loss": 1.0797, + "step": 111 + }, + { + "epoch": 1.4437299035369775, + "grad_norm": 0.7591779828071594, + "learning_rate": 4.42211055276382e-05, + "loss": 1.0126, + "step": 112 + }, + { + "epoch": 1.4565916398713825, + "grad_norm": 0.8304324746131897, + "learning_rate": 4.3718592964824124e-05, + "loss": 1.177, + "step": 113 + }, + { + "epoch": 1.4694533762057878, + "grad_norm": 0.8174604773521423, + "learning_rate": 4.321608040201005e-05, + "loss": 1.0825, + "step": 114 + }, + { + "epoch": 1.482315112540193, + "grad_norm": 0.7986916303634644, + "learning_rate": 4.271356783919598e-05, + "loss": 1.3034, + "step": 115 + }, + { + "epoch": 1.495176848874598, + "grad_norm": 0.6727350354194641, + "learning_rate": 4.2211055276381914e-05, + "loss": 0.9362, + "step": 116 + }, + { + "epoch": 1.5080385852090032, + "grad_norm": 0.8454386591911316, + "learning_rate": 4.170854271356784e-05, + "loss": 1.0968, + "step": 117 + }, + { + "epoch": 1.5209003215434085, + "grad_norm": 0.7886800169944763, + "learning_rate": 4.120603015075377e-05, + "loss": 1.079, + "step": 118 + }, + { + "epoch": 1.5337620578778135, + "grad_norm": 0.8228936195373535, + "learning_rate": 4.07035175879397e-05, + "loss": 1.1753, + "step": 119 + }, + { + "epoch": 1.5466237942122185, + "grad_norm": 0.7093372344970703, + "learning_rate": 4.020100502512563e-05, + "loss": 1.0219, + "step": 120 + }, + { + "epoch": 1.5594855305466238, + "grad_norm": 0.7544362545013428, + "learning_rate": 3.969849246231156e-05, + "loss": 1.0304, + "step": 121 + }, + { + "epoch": 1.572347266881029, + "grad_norm": 0.6867285370826721, + "learning_rate": 3.919597989949749e-05, + "loss": 1.0124, + "step": 122 + }, + { + "epoch": 1.585209003215434, + "grad_norm": 0.71519935131073, + "learning_rate": 3.869346733668342e-05, + "loss": 0.9987, + "step": 123 + }, + { + "epoch": 1.5980707395498392, + "grad_norm": 0.8166177868843079, + "learning_rate": 3.8190954773869346e-05, + "loss": 1.4594, + "step": 124 + }, + { + "epoch": 1.6109324758842445, + "grad_norm": 0.6581661105155945, + "learning_rate": 3.768844221105528e-05, + "loss": 0.879, + "step": 125 + }, + { + "epoch": 1.6237942122186495, + "grad_norm": 0.7463251948356628, + "learning_rate": 3.7185929648241204e-05, + "loss": 0.9512, + "step": 126 + }, + { + "epoch": 1.6366559485530545, + "grad_norm": 0.7250362634658813, + "learning_rate": 3.668341708542714e-05, + "loss": 0.941, + "step": 127 + }, + { + "epoch": 1.6495176848874598, + "grad_norm": 0.921393871307373, + "learning_rate": 3.618090452261307e-05, + "loss": 1.1333, + "step": 128 + }, + { + "epoch": 1.662379421221865, + "grad_norm": 0.7928050756454468, + "learning_rate": 3.5678391959798995e-05, + "loss": 1.0495, + "step": 129 + }, + { + "epoch": 1.67524115755627, + "grad_norm": 0.8830085396766663, + "learning_rate": 3.517587939698493e-05, + "loss": 0.9941, + "step": 130 + }, + { + "epoch": 1.6881028938906752, + "grad_norm": 0.8071272969245911, + "learning_rate": 3.467336683417085e-05, + "loss": 1.0694, + "step": 131 + }, + { + "epoch": 1.7009646302250805, + "grad_norm": 0.7889872789382935, + "learning_rate": 3.4170854271356785e-05, + "loss": 1.0608, + "step": 132 + }, + { + "epoch": 1.7138263665594855, + "grad_norm": 0.8442770838737488, + "learning_rate": 3.366834170854272e-05, + "loss": 1.3121, + "step": 133 + }, + { + "epoch": 1.7266881028938905, + "grad_norm": 0.6982027292251587, + "learning_rate": 3.3165829145728643e-05, + "loss": 0.9552, + "step": 134 + }, + { + "epoch": 1.739549839228296, + "grad_norm": 0.8337914347648621, + "learning_rate": 3.2663316582914576e-05, + "loss": 0.9776, + "step": 135 + }, + { + "epoch": 1.752411575562701, + "grad_norm": 0.8767706751823425, + "learning_rate": 3.21608040201005e-05, + "loss": 0.9149, + "step": 136 + }, + { + "epoch": 1.765273311897106, + "grad_norm": 0.9060734510421753, + "learning_rate": 3.1658291457286434e-05, + "loss": 1.1089, + "step": 137 + }, + { + "epoch": 1.7781350482315113, + "grad_norm": 0.8203456401824951, + "learning_rate": 3.1155778894472366e-05, + "loss": 0.9947, + "step": 138 + }, + { + "epoch": 1.7909967845659165, + "grad_norm": 0.8340582847595215, + "learning_rate": 3.065326633165829e-05, + "loss": 1.0813, + "step": 139 + }, + { + "epoch": 1.8038585209003215, + "grad_norm": 0.7712675929069519, + "learning_rate": 3.015075376884422e-05, + "loss": 0.8633, + "step": 140 + }, + { + "epoch": 1.8167202572347267, + "grad_norm": 0.9218320846557617, + "learning_rate": 2.9648241206030153e-05, + "loss": 1.1976, + "step": 141 + }, + { + "epoch": 1.829581993569132, + "grad_norm": 0.8755046725273132, + "learning_rate": 2.914572864321608e-05, + "loss": 1.0908, + "step": 142 + }, + { + "epoch": 1.842443729903537, + "grad_norm": 0.783729076385498, + "learning_rate": 2.8643216080402015e-05, + "loss": 1.0551, + "step": 143 + }, + { + "epoch": 1.855305466237942, + "grad_norm": 0.8805728554725647, + "learning_rate": 2.814070351758794e-05, + "loss": 1.1579, + "step": 144 + }, + { + "epoch": 1.8681672025723473, + "grad_norm": 0.9049750566482544, + "learning_rate": 2.763819095477387e-05, + "loss": 1.042, + "step": 145 + }, + { + "epoch": 1.8810289389067525, + "grad_norm": 0.7460048794746399, + "learning_rate": 2.7135678391959802e-05, + "loss": 0.9356, + "step": 146 + }, + { + "epoch": 1.8938906752411575, + "grad_norm": 0.8165715932846069, + "learning_rate": 2.6633165829145728e-05, + "loss": 1.2145, + "step": 147 + }, + { + "epoch": 1.9067524115755627, + "grad_norm": 0.8942787051200867, + "learning_rate": 2.613065326633166e-05, + "loss": 1.0566, + "step": 148 + }, + { + "epoch": 1.919614147909968, + "grad_norm": 0.8007097244262695, + "learning_rate": 2.562814070351759e-05, + "loss": 0.9359, + "step": 149 + }, + { + "epoch": 1.932475884244373, + "grad_norm": 0.81815105676651, + "learning_rate": 2.5125628140703518e-05, + "loss": 1.1235, + "step": 150 + }, + { + "epoch": 1.945337620578778, + "grad_norm": 0.7318486571311951, + "learning_rate": 2.462311557788945e-05, + "loss": 0.9921, + "step": 151 + }, + { + "epoch": 1.9581993569131833, + "grad_norm": 0.8647810220718384, + "learning_rate": 2.4120603015075376e-05, + "loss": 1.0954, + "step": 152 + }, + { + "epoch": 1.9710610932475885, + "grad_norm": 0.8506655097007751, + "learning_rate": 2.361809045226131e-05, + "loss": 1.188, + "step": 153 + }, + { + "epoch": 1.9839228295819935, + "grad_norm": 0.8003736138343811, + "learning_rate": 2.3115577889447238e-05, + "loss": 1.0691, + "step": 154 + }, + { + "epoch": 1.9967845659163987, + "grad_norm": 1.349818229675293, + "learning_rate": 2.2613065326633167e-05, + "loss": 1.5416, + "step": 155 + }, + { + "epoch": 2.012861736334405, + "grad_norm": 0.828117847442627, + "learning_rate": 2.21105527638191e-05, + "loss": 1.002, + "step": 156 + }, + { + "epoch": 2.0257234726688105, + "grad_norm": 0.7363523840904236, + "learning_rate": 2.1608040201005025e-05, + "loss": 1.0205, + "step": 157 + }, + { + "epoch": 2.0385852090032155, + "grad_norm": 0.7621555328369141, + "learning_rate": 2.1105527638190957e-05, + "loss": 0.9794, + "step": 158 + }, + { + "epoch": 2.0514469453376205, + "grad_norm": 0.7734480500221252, + "learning_rate": 2.0603015075376886e-05, + "loss": 0.9914, + "step": 159 + }, + { + "epoch": 2.0643086816720255, + "grad_norm": 0.7937270402908325, + "learning_rate": 2.0100502512562815e-05, + "loss": 0.9798, + "step": 160 + }, + { + "epoch": 2.077170418006431, + "grad_norm": 0.7764402627944946, + "learning_rate": 1.9597989949748744e-05, + "loss": 1.053, + "step": 161 + }, + { + "epoch": 2.090032154340836, + "grad_norm": 0.79380202293396, + "learning_rate": 1.9095477386934673e-05, + "loss": 1.0097, + "step": 162 + }, + { + "epoch": 2.102893890675241, + "grad_norm": 0.7844691276550293, + "learning_rate": 1.8592964824120602e-05, + "loss": 0.9642, + "step": 163 + }, + { + "epoch": 2.1157556270096465, + "grad_norm": 0.7374359965324402, + "learning_rate": 1.8090452261306535e-05, + "loss": 0.9937, + "step": 164 + }, + { + "epoch": 2.1286173633440515, + "grad_norm": 0.7238391041755676, + "learning_rate": 1.7587939698492464e-05, + "loss": 0.9851, + "step": 165 + }, + { + "epoch": 2.1414790996784565, + "grad_norm": 0.7812396883964539, + "learning_rate": 1.7085427135678393e-05, + "loss": 1.0145, + "step": 166 + }, + { + "epoch": 2.154340836012862, + "grad_norm": 0.7844581007957458, + "learning_rate": 1.6582914572864322e-05, + "loss": 0.9512, + "step": 167 + }, + { + "epoch": 2.167202572347267, + "grad_norm": 0.755712628364563, + "learning_rate": 1.608040201005025e-05, + "loss": 0.9955, + "step": 168 + }, + { + "epoch": 2.180064308681672, + "grad_norm": 0.8489721417427063, + "learning_rate": 1.5577889447236183e-05, + "loss": 0.9573, + "step": 169 + }, + { + "epoch": 2.192926045016077, + "grad_norm": 0.7579408884048462, + "learning_rate": 1.507537688442211e-05, + "loss": 1.4051, + "step": 170 + }, + { + "epoch": 2.2057877813504825, + "grad_norm": 0.8217175602912903, + "learning_rate": 1.457286432160804e-05, + "loss": 0.9797, + "step": 171 + }, + { + "epoch": 2.2186495176848875, + "grad_norm": 0.7924138903617859, + "learning_rate": 1.407035175879397e-05, + "loss": 0.9756, + "step": 172 + }, + { + "epoch": 2.2315112540192925, + "grad_norm": 0.880486786365509, + "learning_rate": 1.3567839195979901e-05, + "loss": 0.9023, + "step": 173 + }, + { + "epoch": 2.244372990353698, + "grad_norm": 0.7890939712524414, + "learning_rate": 1.306532663316583e-05, + "loss": 1.0993, + "step": 174 + }, + { + "epoch": 2.257234726688103, + "grad_norm": 0.8257628083229065, + "learning_rate": 1.2562814070351759e-05, + "loss": 1.0258, + "step": 175 + }, + { + "epoch": 2.270096463022508, + "grad_norm": 0.8197484016418457, + "learning_rate": 1.2060301507537688e-05, + "loss": 1.1068, + "step": 176 + }, + { + "epoch": 2.282958199356913, + "grad_norm": 0.7964540719985962, + "learning_rate": 1.1557788944723619e-05, + "loss": 1.0245, + "step": 177 + }, + { + "epoch": 2.2958199356913185, + "grad_norm": 0.9317320585250854, + "learning_rate": 1.105527638190955e-05, + "loss": 1.033, + "step": 178 + }, + { + "epoch": 2.3086816720257235, + "grad_norm": 0.9429613351821899, + "learning_rate": 1.0552763819095479e-05, + "loss": 0.9338, + "step": 179 + }, + { + "epoch": 2.3215434083601285, + "grad_norm": 0.8410852551460266, + "learning_rate": 1.0050251256281408e-05, + "loss": 0.9694, + "step": 180 + }, + { + "epoch": 2.334405144694534, + "grad_norm": 0.8171876072883606, + "learning_rate": 9.547738693467337e-06, + "loss": 0.9673, + "step": 181 + }, + { + "epoch": 2.347266881028939, + "grad_norm": 0.8042727112770081, + "learning_rate": 9.045226130653267e-06, + "loss": 0.9262, + "step": 182 + }, + { + "epoch": 2.360128617363344, + "grad_norm": 0.878738284111023, + "learning_rate": 8.542713567839196e-06, + "loss": 0.9727, + "step": 183 + }, + { + "epoch": 2.372990353697749, + "grad_norm": 0.7750037908554077, + "learning_rate": 8.040201005025125e-06, + "loss": 1.0057, + "step": 184 + }, + { + "epoch": 2.3858520900321545, + "grad_norm": 0.8242013454437256, + "learning_rate": 7.537688442211055e-06, + "loss": 0.9303, + "step": 185 + }, + { + "epoch": 2.3987138263665595, + "grad_norm": 0.8518984317779541, + "learning_rate": 7.035175879396985e-06, + "loss": 1.022, + "step": 186 + }, + { + "epoch": 2.4115755627009645, + "grad_norm": 0.8745433688163757, + "learning_rate": 6.532663316582915e-06, + "loss": 0.9579, + "step": 187 + }, + { + "epoch": 2.42443729903537, + "grad_norm": 0.8355763554573059, + "learning_rate": 6.030150753768844e-06, + "loss": 0.9966, + "step": 188 + }, + { + "epoch": 2.437299035369775, + "grad_norm": 0.8143788576126099, + "learning_rate": 5.527638190954775e-06, + "loss": 0.9584, + "step": 189 + }, + { + "epoch": 2.45016077170418, + "grad_norm": 0.7886014580726624, + "learning_rate": 5.025125628140704e-06, + "loss": 1.0076, + "step": 190 + }, + { + "epoch": 2.463022508038585, + "grad_norm": 0.6886643767356873, + "learning_rate": 4.522613065326634e-06, + "loss": 1.2421, + "step": 191 + }, + { + "epoch": 2.4758842443729905, + "grad_norm": 0.885189414024353, + "learning_rate": 4.020100502512563e-06, + "loss": 1.0043, + "step": 192 + }, + { + "epoch": 2.4887459807073955, + "grad_norm": 0.8111695647239685, + "learning_rate": 3.5175879396984926e-06, + "loss": 0.8624, + "step": 193 + }, + { + "epoch": 2.5016077170418005, + "grad_norm": 0.8801529407501221, + "learning_rate": 3.015075376884422e-06, + "loss": 1.0303, + "step": 194 + }, + { + "epoch": 2.514469453376206, + "grad_norm": 0.8241980075836182, + "learning_rate": 2.512562814070352e-06, + "loss": 0.8123, + "step": 195 + }, + { + "epoch": 2.527331189710611, + "grad_norm": 0.9087265133857727, + "learning_rate": 2.0100502512562813e-06, + "loss": 0.9173, + "step": 196 + }, + { + "epoch": 2.540192926045016, + "grad_norm": 0.7739095687866211, + "learning_rate": 1.507537688442211e-06, + "loss": 0.9368, + "step": 197 + }, + { + "epoch": 2.553054662379421, + "grad_norm": 0.799752950668335, + "learning_rate": 1.0050251256281407e-06, + "loss": 0.9238, + "step": 198 + }, + { + "epoch": 2.5659163987138265, + "grad_norm": 0.8783621191978455, + "learning_rate": 5.025125628140703e-07, + "loss": 0.9649, + "step": 199 + }, + { + "epoch": 2.5787781350482315, + "grad_norm": 0.8313027620315552, + "learning_rate": 0.0, + "loss": 0.8397, + "step": 200 + } + ], + "logging_steps": 1, + "max_steps": 200, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 3.180733053370368e+16, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/llama_Chinese_German/checkpoint-200/training_args.bin b/llama_Chinese_German/checkpoint-200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..cdcd5010148def470b13966fe71b0afe3cb0514e --- /dev/null +++ b/llama_Chinese_German/checkpoint-200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:051cbe68c12caa32406dbbc2c74aa557dc8880707776876bd08b74da9e9b8f85 +size 5624 diff --git a/llama_Chinese_Vietnamese/checkpoint-200/README.md b/llama_Chinese_Vietnamese/checkpoint-200/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ec91fdd050e2a5613ef04bb6c8fc77a63b8b10a1 --- /dev/null +++ b/llama_Chinese_Vietnamese/checkpoint-200/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.14.0 \ No newline at end of file diff --git a/llama_Chinese_Vietnamese/checkpoint-200/adapter_config.json b/llama_Chinese_Vietnamese/checkpoint-200/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b456ced2149cc3e9d30421b513412bd48b1d480f --- /dev/null +++ b/llama_Chinese_Vietnamese/checkpoint-200/adapter_config.json @@ -0,0 +1,37 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit", + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "q_proj", + "v_proj", + "gate_proj", + "k_proj", + "up_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/llama_Chinese_Vietnamese/checkpoint-200/adapter_model.safetensors b/llama_Chinese_Vietnamese/checkpoint-200/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4cd98afc090cdf91c9709dd76d0d54be21fc8248 --- /dev/null +++ b/llama_Chinese_Vietnamese/checkpoint-200/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85e1e75b180dbfaf919c6827d9b70c6919f3cb87193270924ebfbfe2bba094a5 +size 167832240 diff --git a/llama_Chinese_Vietnamese/checkpoint-200/optimizer.pt b/llama_Chinese_Vietnamese/checkpoint-200/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0b5e04c0c983913926c095b98f15b056eae9f007 --- /dev/null +++ b/llama_Chinese_Vietnamese/checkpoint-200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf643f67363f0881712fc17c6873e8332777cf084cc94ecaad05bb34e074bb75 +size 85723284 diff --git a/llama_Chinese_Vietnamese/checkpoint-200/rng_state.pth b/llama_Chinese_Vietnamese/checkpoint-200/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..f21f2ff1c1a816463781d51760f8156e041f5979 --- /dev/null +++ b/llama_Chinese_Vietnamese/checkpoint-200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4981123ff3cf7bd5b7f76839e90e4776f747ca4c38dcb41876fa010c0dea8b23 +size 14244 diff --git a/llama_Chinese_Vietnamese/checkpoint-200/scheduler.pt b/llama_Chinese_Vietnamese/checkpoint-200/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6ad11d6d5288d2841ba96a6d7d3838c80c80097a --- /dev/null +++ b/llama_Chinese_Vietnamese/checkpoint-200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96 +size 1064 diff --git a/llama_Chinese_Vietnamese/checkpoint-200/special_tokens_map.json b/llama_Chinese_Vietnamese/checkpoint-200/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/llama_Chinese_Vietnamese/checkpoint-200/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/llama_Chinese_Vietnamese/checkpoint-200/tokenizer.json b/llama_Chinese_Vietnamese/checkpoint-200/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/llama_Chinese_Vietnamese/checkpoint-200/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/llama_Chinese_Vietnamese/checkpoint-200/tokenizer_config.json b/llama_Chinese_Vietnamese/checkpoint-200/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..447bd30f21e8c20e9e6497d6cfc650a3881e26a6 --- /dev/null +++ b/llama_Chinese_Vietnamese/checkpoint-200/tokenizer_config.json @@ -0,0 +1,2064 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/llama_Chinese_Vietnamese/checkpoint-200/trainer_state.json b/llama_Chinese_Vietnamese/checkpoint-200/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..7c7d6d70df529aae3368305a07a1d2a21d1a8550 --- /dev/null +++ b/llama_Chinese_Vietnamese/checkpoint-200/trainer_state.json @@ -0,0 +1,1433 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.5787781350482315, + "eval_steps": 500, + "global_step": 200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.012861736334405145, + "grad_norm": 1.0649385452270508, + "learning_rate": 0.0001, + "loss": 2.6309, + "step": 1 + }, + { + "epoch": 0.02572347266881029, + "grad_norm": 0.9491050243377686, + "learning_rate": 9.949748743718594e-05, + "loss": 2.6373, + "step": 2 + }, + { + "epoch": 0.03858520900321544, + "grad_norm": 0.9376218318939209, + "learning_rate": 9.899497487437186e-05, + "loss": 2.7692, + "step": 3 + }, + { + "epoch": 0.05144694533762058, + "grad_norm": 1.0216950178146362, + "learning_rate": 9.84924623115578e-05, + "loss": 2.5093, + "step": 4 + }, + { + "epoch": 0.06430868167202572, + "grad_norm": 1.054303526878357, + "learning_rate": 9.798994974874372e-05, + "loss": 2.371, + "step": 5 + }, + { + "epoch": 0.07717041800643087, + "grad_norm": 1.059228777885437, + "learning_rate": 9.748743718592965e-05, + "loss": 2.2294, + "step": 6 + }, + { + "epoch": 0.09003215434083602, + "grad_norm": 1.0482760667800903, + "learning_rate": 9.698492462311559e-05, + "loss": 2.0812, + "step": 7 + }, + { + "epoch": 0.10289389067524116, + "grad_norm": 1.2738276720046997, + "learning_rate": 9.64824120603015e-05, + "loss": 2.0018, + "step": 8 + }, + { + "epoch": 0.1157556270096463, + "grad_norm": 1.0511283874511719, + "learning_rate": 9.597989949748745e-05, + "loss": 1.7588, + "step": 9 + }, + { + "epoch": 0.12861736334405144, + "grad_norm": 0.9217624068260193, + "learning_rate": 9.547738693467337e-05, + "loss": 1.7429, + "step": 10 + }, + { + "epoch": 0.1414790996784566, + "grad_norm": 0.685434103012085, + "learning_rate": 9.49748743718593e-05, + "loss": 1.9808, + "step": 11 + }, + { + "epoch": 0.15434083601286175, + "grad_norm": 0.8728280067443848, + "learning_rate": 9.447236180904523e-05, + "loss": 1.5599, + "step": 12 + }, + { + "epoch": 0.16720257234726688, + "grad_norm": 0.6496291160583496, + "learning_rate": 9.396984924623115e-05, + "loss": 1.5217, + "step": 13 + }, + { + "epoch": 0.18006430868167203, + "grad_norm": 0.632135808467865, + "learning_rate": 9.34673366834171e-05, + "loss": 1.4064, + "step": 14 + }, + { + "epoch": 0.19292604501607716, + "grad_norm": 0.6891590356826782, + "learning_rate": 9.296482412060302e-05, + "loss": 1.5262, + "step": 15 + }, + { + "epoch": 0.2057877813504823, + "grad_norm": 0.6528868079185486, + "learning_rate": 9.246231155778895e-05, + "loss": 1.4092, + "step": 16 + }, + { + "epoch": 0.21864951768488747, + "grad_norm": 0.7315002083778381, + "learning_rate": 9.195979899497488e-05, + "loss": 1.533, + "step": 17 + }, + { + "epoch": 0.2315112540192926, + "grad_norm": 0.6680942177772522, + "learning_rate": 9.14572864321608e-05, + "loss": 1.3878, + "step": 18 + }, + { + "epoch": 0.24437299035369775, + "grad_norm": 0.5954132676124573, + "learning_rate": 9.095477386934675e-05, + "loss": 1.4235, + "step": 19 + }, + { + "epoch": 0.2572347266881029, + "grad_norm": 0.7016865611076355, + "learning_rate": 9.045226130653267e-05, + "loss": 1.349, + "step": 20 + }, + { + "epoch": 0.27009646302250806, + "grad_norm": 0.6608514189720154, + "learning_rate": 8.99497487437186e-05, + "loss": 1.3343, + "step": 21 + }, + { + "epoch": 0.2829581993569132, + "grad_norm": 0.7280902862548828, + "learning_rate": 8.944723618090453e-05, + "loss": 1.3994, + "step": 22 + }, + { + "epoch": 0.2958199356913183, + "grad_norm": 0.7174026966094971, + "learning_rate": 8.894472361809045e-05, + "loss": 1.4069, + "step": 23 + }, + { + "epoch": 0.3086816720257235, + "grad_norm": 0.7075796723365784, + "learning_rate": 8.84422110552764e-05, + "loss": 1.3257, + "step": 24 + }, + { + "epoch": 0.3215434083601286, + "grad_norm": 0.7438945174217224, + "learning_rate": 8.793969849246232e-05, + "loss": 1.3734, + "step": 25 + }, + { + "epoch": 0.33440514469453375, + "grad_norm": 0.669459342956543, + "learning_rate": 8.743718592964825e-05, + "loss": 1.3843, + "step": 26 + }, + { + "epoch": 0.34726688102893893, + "grad_norm": 0.7187721729278564, + "learning_rate": 8.693467336683418e-05, + "loss": 1.2812, + "step": 27 + }, + { + "epoch": 0.36012861736334406, + "grad_norm": 0.7727590799331665, + "learning_rate": 8.64321608040201e-05, + "loss": 1.3305, + "step": 28 + }, + { + "epoch": 0.3729903536977492, + "grad_norm": 0.7686368227005005, + "learning_rate": 8.592964824120603e-05, + "loss": 1.4395, + "step": 29 + }, + { + "epoch": 0.3858520900321543, + "grad_norm": 0.8188142776489258, + "learning_rate": 8.542713567839196e-05, + "loss": 1.3322, + "step": 30 + }, + { + "epoch": 0.3987138263665595, + "grad_norm": 0.7664804458618164, + "learning_rate": 8.49246231155779e-05, + "loss": 1.3569, + "step": 31 + }, + { + "epoch": 0.4115755627009646, + "grad_norm": 0.7511367797851562, + "learning_rate": 8.442211055276383e-05, + "loss": 1.2811, + "step": 32 + }, + { + "epoch": 0.42443729903536975, + "grad_norm": 0.8608831167221069, + "learning_rate": 8.391959798994975e-05, + "loss": 1.2244, + "step": 33 + }, + { + "epoch": 0.43729903536977494, + "grad_norm": 0.8403910398483276, + "learning_rate": 8.341708542713568e-05, + "loss": 1.4243, + "step": 34 + }, + { + "epoch": 0.45016077170418006, + "grad_norm": 0.8469493985176086, + "learning_rate": 8.291457286432161e-05, + "loss": 1.2308, + "step": 35 + }, + { + "epoch": 0.4630225080385852, + "grad_norm": 0.8314189314842224, + "learning_rate": 8.241206030150754e-05, + "loss": 1.3045, + "step": 36 + }, + { + "epoch": 0.4758842443729904, + "grad_norm": 0.8816382884979248, + "learning_rate": 8.190954773869348e-05, + "loss": 1.3571, + "step": 37 + }, + { + "epoch": 0.4887459807073955, + "grad_norm": 0.8717392086982727, + "learning_rate": 8.14070351758794e-05, + "loss": 1.3506, + "step": 38 + }, + { + "epoch": 0.5016077170418006, + "grad_norm": 0.8880780935287476, + "learning_rate": 8.090452261306533e-05, + "loss": 1.2376, + "step": 39 + }, + { + "epoch": 0.5144694533762058, + "grad_norm": 0.7529111504554749, + "learning_rate": 8.040201005025126e-05, + "loss": 1.3724, + "step": 40 + }, + { + "epoch": 0.5273311897106109, + "grad_norm": 1.0163393020629883, + "learning_rate": 7.989949748743719e-05, + "loss": 1.2542, + "step": 41 + }, + { + "epoch": 0.5401929260450161, + "grad_norm": 0.651192307472229, + "learning_rate": 7.939698492462313e-05, + "loss": 1.5108, + "step": 42 + }, + { + "epoch": 0.5530546623794212, + "grad_norm": 0.8402130007743835, + "learning_rate": 7.889447236180904e-05, + "loss": 1.1996, + "step": 43 + }, + { + "epoch": 0.5659163987138264, + "grad_norm": 0.7727481722831726, + "learning_rate": 7.839195979899498e-05, + "loss": 1.2289, + "step": 44 + }, + { + "epoch": 0.5787781350482315, + "grad_norm": 0.6933633685112, + "learning_rate": 7.788944723618091e-05, + "loss": 1.1982, + "step": 45 + }, + { + "epoch": 0.5916398713826366, + "grad_norm": 0.5633453130722046, + "learning_rate": 7.738693467336684e-05, + "loss": 1.4112, + "step": 46 + }, + { + "epoch": 0.6045016077170418, + "grad_norm": 0.7134105563163757, + "learning_rate": 7.688442211055277e-05, + "loss": 1.2367, + "step": 47 + }, + { + "epoch": 0.617363344051447, + "grad_norm": 0.6546292304992676, + "learning_rate": 7.638190954773869e-05, + "loss": 1.1962, + "step": 48 + }, + { + "epoch": 0.6302250803858521, + "grad_norm": 0.7516946196556091, + "learning_rate": 7.587939698492463e-05, + "loss": 1.1747, + "step": 49 + }, + { + "epoch": 0.6430868167202572, + "grad_norm": 0.718727171421051, + "learning_rate": 7.537688442211056e-05, + "loss": 1.3876, + "step": 50 + }, + { + "epoch": 0.6559485530546624, + "grad_norm": 0.6199938058853149, + "learning_rate": 7.487437185929649e-05, + "loss": 1.2323, + "step": 51 + }, + { + "epoch": 0.6688102893890675, + "grad_norm": 0.7272217869758606, + "learning_rate": 7.437185929648241e-05, + "loss": 1.3287, + "step": 52 + }, + { + "epoch": 0.6816720257234726, + "grad_norm": 0.6393375992774963, + "learning_rate": 7.386934673366834e-05, + "loss": 1.3454, + "step": 53 + }, + { + "epoch": 0.6945337620578779, + "grad_norm": 0.6419456005096436, + "learning_rate": 7.336683417085427e-05, + "loss": 1.1051, + "step": 54 + }, + { + "epoch": 0.707395498392283, + "grad_norm": 0.7256225943565369, + "learning_rate": 7.28643216080402e-05, + "loss": 1.2236, + "step": 55 + }, + { + "epoch": 0.7202572347266881, + "grad_norm": 0.6486414670944214, + "learning_rate": 7.236180904522614e-05, + "loss": 1.3265, + "step": 56 + }, + { + "epoch": 0.7331189710610932, + "grad_norm": 0.6492426991462708, + "learning_rate": 7.185929648241206e-05, + "loss": 1.3362, + "step": 57 + }, + { + "epoch": 0.7459807073954984, + "grad_norm": 0.7237244248390198, + "learning_rate": 7.135678391959799e-05, + "loss": 1.1568, + "step": 58 + }, + { + "epoch": 0.7588424437299035, + "grad_norm": 0.7400479912757874, + "learning_rate": 7.085427135678392e-05, + "loss": 1.2116, + "step": 59 + }, + { + "epoch": 0.7717041800643086, + "grad_norm": 0.7453139424324036, + "learning_rate": 7.035175879396985e-05, + "loss": 1.293, + "step": 60 + }, + { + "epoch": 0.7845659163987139, + "grad_norm": 0.7200148105621338, + "learning_rate": 6.984924623115579e-05, + "loss": 1.1902, + "step": 61 + }, + { + "epoch": 0.797427652733119, + "grad_norm": 0.753768801689148, + "learning_rate": 6.93467336683417e-05, + "loss": 1.218, + "step": 62 + }, + { + "epoch": 0.8102893890675241, + "grad_norm": 0.7190577387809753, + "learning_rate": 6.884422110552764e-05, + "loss": 1.2068, + "step": 63 + }, + { + "epoch": 0.8231511254019293, + "grad_norm": 0.7753349542617798, + "learning_rate": 6.834170854271357e-05, + "loss": 1.3279, + "step": 64 + }, + { + "epoch": 0.8360128617363344, + "grad_norm": 0.7965415120124817, + "learning_rate": 6.78391959798995e-05, + "loss": 1.0254, + "step": 65 + }, + { + "epoch": 0.8488745980707395, + "grad_norm": 0.8631449937820435, + "learning_rate": 6.733668341708544e-05, + "loss": 1.2029, + "step": 66 + }, + { + "epoch": 0.8617363344051447, + "grad_norm": 0.7210382223129272, + "learning_rate": 6.683417085427135e-05, + "loss": 1.1602, + "step": 67 + }, + { + "epoch": 0.8745980707395499, + "grad_norm": 0.6991996765136719, + "learning_rate": 6.633165829145729e-05, + "loss": 1.2122, + "step": 68 + }, + { + "epoch": 0.887459807073955, + "grad_norm": 0.6861876249313354, + "learning_rate": 6.582914572864322e-05, + "loss": 1.2715, + "step": 69 + }, + { + "epoch": 0.9003215434083601, + "grad_norm": 0.7111251354217529, + "learning_rate": 6.532663316582915e-05, + "loss": 1.1894, + "step": 70 + }, + { + "epoch": 0.9131832797427653, + "grad_norm": 0.7575141787528992, + "learning_rate": 6.482412060301508e-05, + "loss": 1.2182, + "step": 71 + }, + { + "epoch": 0.9260450160771704, + "grad_norm": 0.6892881393432617, + "learning_rate": 6.4321608040201e-05, + "loss": 1.1138, + "step": 72 + }, + { + "epoch": 0.9389067524115756, + "grad_norm": 0.7526578903198242, + "learning_rate": 6.381909547738694e-05, + "loss": 1.1088, + "step": 73 + }, + { + "epoch": 0.9517684887459807, + "grad_norm": 0.6483733654022217, + "learning_rate": 6.331658291457287e-05, + "loss": 1.2254, + "step": 74 + }, + { + "epoch": 0.9646302250803859, + "grad_norm": 0.7623192071914673, + "learning_rate": 6.28140703517588e-05, + "loss": 1.2961, + "step": 75 + }, + { + "epoch": 0.977491961414791, + "grad_norm": 0.7486171126365662, + "learning_rate": 6.231155778894473e-05, + "loss": 1.1917, + "step": 76 + }, + { + "epoch": 0.9903536977491961, + "grad_norm": 0.6872939467430115, + "learning_rate": 6.180904522613065e-05, + "loss": 1.147, + "step": 77 + }, + { + "epoch": 1.0064308681672025, + "grad_norm": 1.3864794969558716, + "learning_rate": 6.130653266331658e-05, + "loss": 1.746, + "step": 78 + }, + { + "epoch": 1.0192926045016077, + "grad_norm": 0.5814358592033386, + "learning_rate": 6.080402010050251e-05, + "loss": 1.0255, + "step": 79 + }, + { + "epoch": 1.0321543408360128, + "grad_norm": 0.6417332887649536, + "learning_rate": 6.030150753768844e-05, + "loss": 1.1216, + "step": 80 + }, + { + "epoch": 1.045016077170418, + "grad_norm": 0.648380696773529, + "learning_rate": 5.979899497487438e-05, + "loss": 1.1417, + "step": 81 + }, + { + "epoch": 1.0578778135048232, + "grad_norm": 0.6810972094535828, + "learning_rate": 5.929648241206031e-05, + "loss": 1.1271, + "step": 82 + }, + { + "epoch": 1.0707395498392283, + "grad_norm": 0.6431122422218323, + "learning_rate": 5.879396984924623e-05, + "loss": 1.0729, + "step": 83 + }, + { + "epoch": 1.0836012861736335, + "grad_norm": 0.7915307283401489, + "learning_rate": 5.829145728643216e-05, + "loss": 1.1238, + "step": 84 + }, + { + "epoch": 1.0964630225080385, + "grad_norm": 0.7646147012710571, + "learning_rate": 5.778894472361809e-05, + "loss": 1.1225, + "step": 85 + }, + { + "epoch": 1.1093247588424437, + "grad_norm": 0.7343133687973022, + "learning_rate": 5.728643216080403e-05, + "loss": 1.1414, + "step": 86 + }, + { + "epoch": 1.122186495176849, + "grad_norm": 0.6509206891059875, + "learning_rate": 5.6783919597989955e-05, + "loss": 1.0904, + "step": 87 + }, + { + "epoch": 1.135048231511254, + "grad_norm": 0.6493409276008606, + "learning_rate": 5.628140703517588e-05, + "loss": 1.1185, + "step": 88 + }, + { + "epoch": 1.1479099678456592, + "grad_norm": 0.6985325813293457, + "learning_rate": 5.577889447236181e-05, + "loss": 1.1248, + "step": 89 + }, + { + "epoch": 1.1607717041800643, + "grad_norm": 0.7278170585632324, + "learning_rate": 5.527638190954774e-05, + "loss": 1.1493, + "step": 90 + }, + { + "epoch": 1.1736334405144695, + "grad_norm": 0.7814955115318298, + "learning_rate": 5.477386934673368e-05, + "loss": 1.8804, + "step": 91 + }, + { + "epoch": 1.1864951768488745, + "grad_norm": 0.6044358015060425, + "learning_rate": 5.4271356783919604e-05, + "loss": 0.9426, + "step": 92 + }, + { + "epoch": 1.1993569131832797, + "grad_norm": 0.6450907588005066, + "learning_rate": 5.376884422110553e-05, + "loss": 0.9643, + "step": 93 + }, + { + "epoch": 1.212218649517685, + "grad_norm": 0.845541775226593, + "learning_rate": 5.3266331658291455e-05, + "loss": 1.2485, + "step": 94 + }, + { + "epoch": 1.22508038585209, + "grad_norm": 0.653166651725769, + "learning_rate": 5.276381909547739e-05, + "loss": 0.9115, + "step": 95 + }, + { + "epoch": 1.2379421221864952, + "grad_norm": 0.8042985796928406, + "learning_rate": 5.226130653266332e-05, + "loss": 1.2722, + "step": 96 + }, + { + "epoch": 1.2508038585209003, + "grad_norm": 0.7926493287086487, + "learning_rate": 5.175879396984925e-05, + "loss": 1.1146, + "step": 97 + }, + { + "epoch": 1.2636655948553055, + "grad_norm": 0.7699697017669678, + "learning_rate": 5.125628140703518e-05, + "loss": 1.114, + "step": 98 + }, + { + "epoch": 1.2765273311897105, + "grad_norm": 0.7615493535995483, + "learning_rate": 5.0753768844221104e-05, + "loss": 1.1514, + "step": 99 + }, + { + "epoch": 1.2893890675241158, + "grad_norm": 0.7418377995491028, + "learning_rate": 5.0251256281407036e-05, + "loss": 1.0847, + "step": 100 + }, + { + "epoch": 1.302250803858521, + "grad_norm": 0.7438411712646484, + "learning_rate": 4.974874371859297e-05, + "loss": 1.2096, + "step": 101 + }, + { + "epoch": 1.315112540192926, + "grad_norm": 0.7060010433197021, + "learning_rate": 4.92462311557789e-05, + "loss": 1.1058, + "step": 102 + }, + { + "epoch": 1.3279742765273312, + "grad_norm": 0.574945330619812, + "learning_rate": 4.874371859296483e-05, + "loss": 0.9299, + "step": 103 + }, + { + "epoch": 1.3408360128617363, + "grad_norm": 0.7485166788101196, + "learning_rate": 4.824120603015075e-05, + "loss": 1.5913, + "step": 104 + }, + { + "epoch": 1.3536977491961415, + "grad_norm": 0.7644198536872864, + "learning_rate": 4.7738693467336685e-05, + "loss": 1.1538, + "step": 105 + }, + { + "epoch": 1.3665594855305465, + "grad_norm": 0.7649131417274475, + "learning_rate": 4.723618090452262e-05, + "loss": 1.04, + "step": 106 + }, + { + "epoch": 1.3794212218649518, + "grad_norm": 0.7314745783805847, + "learning_rate": 4.673366834170855e-05, + "loss": 0.9756, + "step": 107 + }, + { + "epoch": 1.392282958199357, + "grad_norm": 0.7615334391593933, + "learning_rate": 4.6231155778894475e-05, + "loss": 1.053, + "step": 108 + }, + { + "epoch": 1.405144694533762, + "grad_norm": 0.7232903242111206, + "learning_rate": 4.57286432160804e-05, + "loss": 0.9716, + "step": 109 + }, + { + "epoch": 1.4180064308681672, + "grad_norm": 0.8094847798347473, + "learning_rate": 4.522613065326633e-05, + "loss": 1.0367, + "step": 110 + }, + { + "epoch": 1.4308681672025725, + "grad_norm": 0.950025200843811, + "learning_rate": 4.4723618090452266e-05, + "loss": 1.1011, + "step": 111 + }, + { + "epoch": 1.4437299035369775, + "grad_norm": 0.7514937520027161, + "learning_rate": 4.42211055276382e-05, + "loss": 1.0202, + "step": 112 + }, + { + "epoch": 1.4565916398713825, + "grad_norm": 0.836333692073822, + "learning_rate": 4.3718592964824124e-05, + "loss": 1.1794, + "step": 113 + }, + { + "epoch": 1.4694533762057878, + "grad_norm": 0.8447477221488953, + "learning_rate": 4.321608040201005e-05, + "loss": 1.0942, + "step": 114 + }, + { + "epoch": 1.482315112540193, + "grad_norm": 0.8204519748687744, + "learning_rate": 4.271356783919598e-05, + "loss": 1.3409, + "step": 115 + }, + { + "epoch": 1.495176848874598, + "grad_norm": 0.730801522731781, + "learning_rate": 4.2211055276381914e-05, + "loss": 0.9899, + "step": 116 + }, + { + "epoch": 1.5080385852090032, + "grad_norm": 0.879811704158783, + "learning_rate": 4.170854271356784e-05, + "loss": 1.1081, + "step": 117 + }, + { + "epoch": 1.5209003215434085, + "grad_norm": 0.7798628807067871, + "learning_rate": 4.120603015075377e-05, + "loss": 1.0908, + "step": 118 + }, + { + "epoch": 1.5337620578778135, + "grad_norm": 0.870912492275238, + "learning_rate": 4.07035175879397e-05, + "loss": 1.2096, + "step": 119 + }, + { + "epoch": 1.5466237942122185, + "grad_norm": 0.7109503149986267, + "learning_rate": 4.020100502512563e-05, + "loss": 1.0298, + "step": 120 + }, + { + "epoch": 1.5594855305466238, + "grad_norm": 0.8116230964660645, + "learning_rate": 3.969849246231156e-05, + "loss": 1.0723, + "step": 121 + }, + { + "epoch": 1.572347266881029, + "grad_norm": 0.7052260637283325, + "learning_rate": 3.919597989949749e-05, + "loss": 1.0198, + "step": 122 + }, + { + "epoch": 1.585209003215434, + "grad_norm": 0.8413100838661194, + "learning_rate": 3.869346733668342e-05, + "loss": 1.0381, + "step": 123 + }, + { + "epoch": 1.5980707395498392, + "grad_norm": 0.8870115876197815, + "learning_rate": 3.8190954773869346e-05, + "loss": 1.5338, + "step": 124 + }, + { + "epoch": 1.6109324758842445, + "grad_norm": 0.6777501702308655, + "learning_rate": 3.768844221105528e-05, + "loss": 0.8878, + "step": 125 + }, + { + "epoch": 1.6237942122186495, + "grad_norm": 0.7898180484771729, + "learning_rate": 3.7185929648241204e-05, + "loss": 0.961, + "step": 126 + }, + { + "epoch": 1.6366559485530545, + "grad_norm": 0.7508682012557983, + "learning_rate": 3.668341708542714e-05, + "loss": 0.9697, + "step": 127 + }, + { + "epoch": 1.6495176848874598, + "grad_norm": 0.956596314907074, + "learning_rate": 3.618090452261307e-05, + "loss": 1.2065, + "step": 128 + }, + { + "epoch": 1.662379421221865, + "grad_norm": 0.8160056471824646, + "learning_rate": 3.5678391959798995e-05, + "loss": 1.0585, + "step": 129 + }, + { + "epoch": 1.67524115755627, + "grad_norm": 0.9093283414840698, + "learning_rate": 3.517587939698493e-05, + "loss": 1.008, + "step": 130 + }, + { + "epoch": 1.6881028938906752, + "grad_norm": 0.8321830630302429, + "learning_rate": 3.467336683417085e-05, + "loss": 1.1026, + "step": 131 + }, + { + "epoch": 1.7009646302250805, + "grad_norm": 0.7878942489624023, + "learning_rate": 3.4170854271356785e-05, + "loss": 1.0497, + "step": 132 + }, + { + "epoch": 1.7138263665594855, + "grad_norm": 0.88408362865448, + "learning_rate": 3.366834170854272e-05, + "loss": 1.3551, + "step": 133 + }, + { + "epoch": 1.7266881028938905, + "grad_norm": 0.7452175617218018, + "learning_rate": 3.3165829145728643e-05, + "loss": 0.9679, + "step": 134 + }, + { + "epoch": 1.739549839228296, + "grad_norm": 0.8924412727355957, + "learning_rate": 3.2663316582914576e-05, + "loss": 1.0265, + "step": 135 + }, + { + "epoch": 1.752411575562701, + "grad_norm": 0.9149259924888611, + "learning_rate": 3.21608040201005e-05, + "loss": 0.9405, + "step": 136 + }, + { + "epoch": 1.765273311897106, + "grad_norm": 0.935562252998352, + "learning_rate": 3.1658291457286434e-05, + "loss": 1.1063, + "step": 137 + }, + { + "epoch": 1.7781350482315113, + "grad_norm": 0.8658350110054016, + "learning_rate": 3.1155778894472366e-05, + "loss": 1.0239, + "step": 138 + }, + { + "epoch": 1.7909967845659165, + "grad_norm": 0.9327634572982788, + "learning_rate": 3.065326633165829e-05, + "loss": 1.1195, + "step": 139 + }, + { + "epoch": 1.8038585209003215, + "grad_norm": 0.8411900997161865, + "learning_rate": 3.015075376884422e-05, + "loss": 0.8957, + "step": 140 + }, + { + "epoch": 1.8167202572347267, + "grad_norm": 0.9331545829772949, + "learning_rate": 2.9648241206030153e-05, + "loss": 1.1823, + "step": 141 + }, + { + "epoch": 1.829581993569132, + "grad_norm": 0.9505443572998047, + "learning_rate": 2.914572864321608e-05, + "loss": 1.1083, + "step": 142 + }, + { + "epoch": 1.842443729903537, + "grad_norm": 0.7980955243110657, + "learning_rate": 2.8643216080402015e-05, + "loss": 1.074, + "step": 143 + }, + { + "epoch": 1.855305466237942, + "grad_norm": 0.9247193336486816, + "learning_rate": 2.814070351758794e-05, + "loss": 1.2129, + "step": 144 + }, + { + "epoch": 1.8681672025723473, + "grad_norm": 0.887328028678894, + "learning_rate": 2.763819095477387e-05, + "loss": 1.0688, + "step": 145 + }, + { + "epoch": 1.8810289389067525, + "grad_norm": 0.7812901735305786, + "learning_rate": 2.7135678391959802e-05, + "loss": 0.9337, + "step": 146 + }, + { + "epoch": 1.8938906752411575, + "grad_norm": 0.8578721880912781, + "learning_rate": 2.6633165829145728e-05, + "loss": 1.2328, + "step": 147 + }, + { + "epoch": 1.9067524115755627, + "grad_norm": 0.9639273285865784, + "learning_rate": 2.613065326633166e-05, + "loss": 1.0618, + "step": 148 + }, + { + "epoch": 1.919614147909968, + "grad_norm": 0.7975261807441711, + "learning_rate": 2.562814070351759e-05, + "loss": 0.9403, + "step": 149 + }, + { + "epoch": 1.932475884244373, + "grad_norm": 0.9210904240608215, + "learning_rate": 2.5125628140703518e-05, + "loss": 1.1497, + "step": 150 + }, + { + "epoch": 1.945337620578778, + "grad_norm": 0.7670096755027771, + "learning_rate": 2.462311557788945e-05, + "loss": 1.048, + "step": 151 + }, + { + "epoch": 1.9581993569131833, + "grad_norm": 0.9265581965446472, + "learning_rate": 2.4120603015075376e-05, + "loss": 1.1377, + "step": 152 + }, + { + "epoch": 1.9710610932475885, + "grad_norm": 0.853067934513092, + "learning_rate": 2.361809045226131e-05, + "loss": 1.1952, + "step": 153 + }, + { + "epoch": 1.9839228295819935, + "grad_norm": 0.8661015629768372, + "learning_rate": 2.3115577889447238e-05, + "loss": 1.094, + "step": 154 + }, + { + "epoch": 1.9967845659163987, + "grad_norm": 1.4559237957000732, + "learning_rate": 2.2613065326633167e-05, + "loss": 1.576, + "step": 155 + }, + { + "epoch": 2.012861736334405, + "grad_norm": 0.8805817365646362, + "learning_rate": 2.21105527638191e-05, + "loss": 1.0219, + "step": 156 + }, + { + "epoch": 2.0257234726688105, + "grad_norm": 0.7943904399871826, + "learning_rate": 2.1608040201005025e-05, + "loss": 1.0843, + "step": 157 + }, + { + "epoch": 2.0385852090032155, + "grad_norm": 0.8749082684516907, + "learning_rate": 2.1105527638190957e-05, + "loss": 1.0128, + "step": 158 + }, + { + "epoch": 2.0514469453376205, + "grad_norm": 0.80024653673172, + "learning_rate": 2.0603015075376886e-05, + "loss": 1.0139, + "step": 159 + }, + { + "epoch": 2.0643086816720255, + "grad_norm": 0.8480293154716492, + "learning_rate": 2.0100502512562815e-05, + "loss": 0.9936, + "step": 160 + }, + { + "epoch": 2.077170418006431, + "grad_norm": 0.800495445728302, + "learning_rate": 1.9597989949748744e-05, + "loss": 1.0542, + "step": 161 + }, + { + "epoch": 2.090032154340836, + "grad_norm": 0.8782421350479126, + "learning_rate": 1.9095477386934673e-05, + "loss": 1.0353, + "step": 162 + }, + { + "epoch": 2.102893890675241, + "grad_norm": 0.8080796599388123, + "learning_rate": 1.8592964824120602e-05, + "loss": 0.9628, + "step": 163 + }, + { + "epoch": 2.1157556270096465, + "grad_norm": 0.7773626446723938, + "learning_rate": 1.8090452261306535e-05, + "loss": 0.9945, + "step": 164 + }, + { + "epoch": 2.1286173633440515, + "grad_norm": 0.7648970484733582, + "learning_rate": 1.7587939698492464e-05, + "loss": 1.0015, + "step": 165 + }, + { + "epoch": 2.1414790996784565, + "grad_norm": 0.8139113187789917, + "learning_rate": 1.7085427135678393e-05, + "loss": 1.0132, + "step": 166 + }, + { + "epoch": 2.154340836012862, + "grad_norm": 0.8528121113777161, + "learning_rate": 1.6582914572864322e-05, + "loss": 1.004, + "step": 167 + }, + { + "epoch": 2.167202572347267, + "grad_norm": 0.7888818383216858, + "learning_rate": 1.608040201005025e-05, + "loss": 0.9956, + "step": 168 + }, + { + "epoch": 2.180064308681672, + "grad_norm": 0.8569130301475525, + "learning_rate": 1.5577889447236183e-05, + "loss": 0.9705, + "step": 169 + }, + { + "epoch": 2.192926045016077, + "grad_norm": 0.7740685939788818, + "learning_rate": 1.507537688442211e-05, + "loss": 1.4226, + "step": 170 + }, + { + "epoch": 2.2057877813504825, + "grad_norm": 0.8912209272384644, + "learning_rate": 1.457286432160804e-05, + "loss": 1.0053, + "step": 171 + }, + { + "epoch": 2.2186495176848875, + "grad_norm": 0.9198904633522034, + "learning_rate": 1.407035175879397e-05, + "loss": 1.0057, + "step": 172 + }, + { + "epoch": 2.2315112540192925, + "grad_norm": 0.8956185579299927, + "learning_rate": 1.3567839195979901e-05, + "loss": 0.9636, + "step": 173 + }, + { + "epoch": 2.244372990353698, + "grad_norm": 0.7728644609451294, + "learning_rate": 1.306532663316583e-05, + "loss": 1.1026, + "step": 174 + }, + { + "epoch": 2.257234726688103, + "grad_norm": 0.8695865273475647, + "learning_rate": 1.2562814070351759e-05, + "loss": 1.0474, + "step": 175 + }, + { + "epoch": 2.270096463022508, + "grad_norm": 0.8445467352867126, + "learning_rate": 1.2060301507537688e-05, + "loss": 1.1477, + "step": 176 + }, + { + "epoch": 2.282958199356913, + "grad_norm": 0.8170430660247803, + "learning_rate": 1.1557788944723619e-05, + "loss": 1.0361, + "step": 177 + }, + { + "epoch": 2.2958199356913185, + "grad_norm": 0.9592326283454895, + "learning_rate": 1.105527638190955e-05, + "loss": 1.0354, + "step": 178 + }, + { + "epoch": 2.3086816720257235, + "grad_norm": 0.9984281659126282, + "learning_rate": 1.0552763819095479e-05, + "loss": 0.977, + "step": 179 + }, + { + "epoch": 2.3215434083601285, + "grad_norm": 0.8793332576751709, + "learning_rate": 1.0050251256281408e-05, + "loss": 0.9957, + "step": 180 + }, + { + "epoch": 2.334405144694534, + "grad_norm": 0.8469133973121643, + "learning_rate": 9.547738693467337e-06, + "loss": 0.981, + "step": 181 + }, + { + "epoch": 2.347266881028939, + "grad_norm": 0.8359603881835938, + "learning_rate": 9.045226130653267e-06, + "loss": 0.921, + "step": 182 + }, + { + "epoch": 2.360128617363344, + "grad_norm": 0.9214036464691162, + "learning_rate": 8.542713567839196e-06, + "loss": 1.0061, + "step": 183 + }, + { + "epoch": 2.372990353697749, + "grad_norm": 0.8119861483573914, + "learning_rate": 8.040201005025125e-06, + "loss": 1.0287, + "step": 184 + }, + { + "epoch": 2.3858520900321545, + "grad_norm": 0.8458288908004761, + "learning_rate": 7.537688442211055e-06, + "loss": 0.9324, + "step": 185 + }, + { + "epoch": 2.3987138263665595, + "grad_norm": 0.9167879223823547, + "learning_rate": 7.035175879396985e-06, + "loss": 1.0395, + "step": 186 + }, + { + "epoch": 2.4115755627009645, + "grad_norm": 0.9315198659896851, + "learning_rate": 6.532663316582915e-06, + "loss": 0.9878, + "step": 187 + }, + { + "epoch": 2.42443729903537, + "grad_norm": 0.8909194469451904, + "learning_rate": 6.030150753768844e-06, + "loss": 1.0509, + "step": 188 + }, + { + "epoch": 2.437299035369775, + "grad_norm": 0.8457067012786865, + "learning_rate": 5.527638190954775e-06, + "loss": 0.9853, + "step": 189 + }, + { + "epoch": 2.45016077170418, + "grad_norm": 0.823922872543335, + "learning_rate": 5.025125628140704e-06, + "loss": 1.0355, + "step": 190 + }, + { + "epoch": 2.463022508038585, + "grad_norm": 0.7183964252471924, + "learning_rate": 4.522613065326634e-06, + "loss": 1.2571, + "step": 191 + }, + { + "epoch": 2.4758842443729905, + "grad_norm": 0.8723146915435791, + "learning_rate": 4.020100502512563e-06, + "loss": 1.0361, + "step": 192 + }, + { + "epoch": 2.4887459807073955, + "grad_norm": 0.8409279584884644, + "learning_rate": 3.5175879396984926e-06, + "loss": 0.9046, + "step": 193 + }, + { + "epoch": 2.5016077170418005, + "grad_norm": 0.9092235565185547, + "learning_rate": 3.015075376884422e-06, + "loss": 1.0147, + "step": 194 + }, + { + "epoch": 2.514469453376206, + "grad_norm": 0.849392831325531, + "learning_rate": 2.512562814070352e-06, + "loss": 0.8241, + "step": 195 + }, + { + "epoch": 2.527331189710611, + "grad_norm": 0.9866675734519958, + "learning_rate": 2.0100502512562813e-06, + "loss": 0.9559, + "step": 196 + }, + { + "epoch": 2.540192926045016, + "grad_norm": 0.8281249403953552, + "learning_rate": 1.507537688442211e-06, + "loss": 0.9611, + "step": 197 + }, + { + "epoch": 2.553054662379421, + "grad_norm": 0.834632158279419, + "learning_rate": 1.0050251256281407e-06, + "loss": 0.9536, + "step": 198 + }, + { + "epoch": 2.5659163987138265, + "grad_norm": 0.8354447484016418, + "learning_rate": 5.025125628140703e-07, + "loss": 0.9748, + "step": 199 + }, + { + "epoch": 2.5787781350482315, + "grad_norm": 0.8496025800704956, + "learning_rate": 0.0, + "loss": 0.8724, + "step": 200 + } + ], + "logging_steps": 1, + "max_steps": 200, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 3.0719132584574976e+16, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/llama_Chinese_Vietnamese/checkpoint-200/training_args.bin b/llama_Chinese_Vietnamese/checkpoint-200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b18cb8825881c3685c23abd65bb112ee9f3af151 --- /dev/null +++ b/llama_Chinese_Vietnamese/checkpoint-200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72be6b33a80c9a6db56bedd6ffb432695a7cbda15a70094c784d9c87ce00e905 +size 5624 diff --git a/llama_English_Chinese/checkpoint-300/README.md b/llama_English_Chinese/checkpoint-300/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ec91fdd050e2a5613ef04bb6c8fc77a63b8b10a1 --- /dev/null +++ b/llama_English_Chinese/checkpoint-300/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.14.0 \ No newline at end of file diff --git a/llama_English_Chinese/checkpoint-300/adapter_config.json b/llama_English_Chinese/checkpoint-300/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..345f816be31626100b7bdbdde09a56dddea9b915 --- /dev/null +++ b/llama_English_Chinese/checkpoint-300/adapter_config.json @@ -0,0 +1,37 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit", + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "o_proj", + "down_proj", + "up_proj", + "gate_proj", + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/llama_English_Chinese/checkpoint-300/adapter_model.safetensors b/llama_English_Chinese/checkpoint-300/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..696497ec1a3178a3a4e5ae801423a64e962ce340 --- /dev/null +++ b/llama_English_Chinese/checkpoint-300/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80d1767a6094843111ea8e940c2a645edfd5a4c9034394a224f3d7086d5d7a32 +size 167832240 diff --git a/llama_English_Chinese/checkpoint-300/optimizer.pt b/llama_English_Chinese/checkpoint-300/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..fa576286e44724aa58d3216302456a5536bc3d52 --- /dev/null +++ b/llama_English_Chinese/checkpoint-300/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c36c57946cf62d158c8b5aa174ba54b0d10143603dd28e43fd63d378110bbfa4 +size 85723732 diff --git a/llama_English_Chinese/checkpoint-300/rng_state.pth b/llama_English_Chinese/checkpoint-300/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1f8291cd6ce87668b786a72f3e93d072fbe54902 --- /dev/null +++ b/llama_English_Chinese/checkpoint-300/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4c917636c7a58af68a29056522a757e9f9b99005b776641aa157c536967817d +size 14244 diff --git a/llama_English_Chinese/checkpoint-300/scheduler.pt b/llama_English_Chinese/checkpoint-300/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6e209dcc25b944d4b85ca13bea87ea706231c655 --- /dev/null +++ b/llama_English_Chinese/checkpoint-300/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec7445dd881aa0e896fdc6ef61cf9c541f4299feaa6850ba7cac238afd6649f3 +size 1064 diff --git a/llama_English_Chinese/checkpoint-300/special_tokens_map.json b/llama_English_Chinese/checkpoint-300/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/llama_English_Chinese/checkpoint-300/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/llama_English_Chinese/checkpoint-300/tokenizer.json b/llama_English_Chinese/checkpoint-300/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/llama_English_Chinese/checkpoint-300/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/llama_English_Chinese/checkpoint-300/tokenizer_config.json b/llama_English_Chinese/checkpoint-300/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..447bd30f21e8c20e9e6497d6cfc650a3881e26a6 --- /dev/null +++ b/llama_English_Chinese/checkpoint-300/tokenizer_config.json @@ -0,0 +1,2064 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/llama_English_Chinese/checkpoint-300/trainer_state.json b/llama_English_Chinese/checkpoint-300/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..8d712fe06f245215715f352795dbad3b697b48aa --- /dev/null +++ b/llama_English_Chinese/checkpoint-300/trainer_state.json @@ -0,0 +1,2133 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.18814675446848542, + "eval_steps": 500, + "global_step": 300, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0006271558482282847, + "grad_norm": 1.0956496000289917, + "learning_rate": 0.0001, + "loss": 2.5869, + "step": 1 + }, + { + "epoch": 0.0012543116964565694, + "grad_norm": 0.983011782169342, + "learning_rate": 9.966555183946489e-05, + "loss": 2.3546, + "step": 2 + }, + { + "epoch": 0.0018814675446848542, + "grad_norm": 0.9799704551696777, + "learning_rate": 9.933110367892977e-05, + "loss": 2.5738, + "step": 3 + }, + { + "epoch": 0.002508623392913139, + "grad_norm": 1.0392695665359497, + "learning_rate": 9.899665551839465e-05, + "loss": 2.4259, + "step": 4 + }, + { + "epoch": 0.0031357792411414237, + "grad_norm": 1.086958408355713, + "learning_rate": 9.866220735785953e-05, + "loss": 2.3127, + "step": 5 + }, + { + "epoch": 0.0037629350893697085, + "grad_norm": 1.0895546674728394, + "learning_rate": 9.832775919732441e-05, + "loss": 1.9304, + "step": 6 + }, + { + "epoch": 0.004390090937597993, + "grad_norm": 1.3570847511291504, + "learning_rate": 9.799331103678931e-05, + "loss": 1.8656, + "step": 7 + }, + { + "epoch": 0.005017246785826278, + "grad_norm": 1.2285577058792114, + "learning_rate": 9.765886287625419e-05, + "loss": 1.615, + "step": 8 + }, + { + "epoch": 0.005644402634054563, + "grad_norm": 0.8086690306663513, + "learning_rate": 9.732441471571907e-05, + "loss": 1.6324, + "step": 9 + }, + { + "epoch": 0.006271558482282847, + "grad_norm": 0.8209547996520996, + "learning_rate": 9.698996655518396e-05, + "loss": 1.5731, + "step": 10 + }, + { + "epoch": 0.006898714330511132, + "grad_norm": 0.8158303499221802, + "learning_rate": 9.665551839464884e-05, + "loss": 1.5126, + "step": 11 + }, + { + "epoch": 0.007525870178739417, + "grad_norm": 0.8391895294189453, + "learning_rate": 9.632107023411372e-05, + "loss": 1.5076, + "step": 12 + }, + { + "epoch": 0.008153026026967701, + "grad_norm": 0.7997937798500061, + "learning_rate": 9.59866220735786e-05, + "loss": 1.4216, + "step": 13 + }, + { + "epoch": 0.008780181875195987, + "grad_norm": 0.5388962626457214, + "learning_rate": 9.565217391304348e-05, + "loss": 1.3764, + "step": 14 + }, + { + "epoch": 0.00940733772342427, + "grad_norm": 0.5464240312576294, + "learning_rate": 9.531772575250837e-05, + "loss": 1.3275, + "step": 15 + }, + { + "epoch": 0.010034493571652555, + "grad_norm": 0.5107612013816833, + "learning_rate": 9.498327759197325e-05, + "loss": 1.3888, + "step": 16 + }, + { + "epoch": 0.01066164941988084, + "grad_norm": 0.5795170664787292, + "learning_rate": 9.464882943143813e-05, + "loss": 1.3433, + "step": 17 + }, + { + "epoch": 0.011288805268109126, + "grad_norm": 0.5139371156692505, + "learning_rate": 9.431438127090302e-05, + "loss": 1.3202, + "step": 18 + }, + { + "epoch": 0.01191596111633741, + "grad_norm": 0.5735489130020142, + "learning_rate": 9.39799331103679e-05, + "loss": 1.3133, + "step": 19 + }, + { + "epoch": 0.012543116964565695, + "grad_norm": 0.5431280732154846, + "learning_rate": 9.364548494983279e-05, + "loss": 1.2475, + "step": 20 + }, + { + "epoch": 0.01317027281279398, + "grad_norm": 0.54478520154953, + "learning_rate": 9.331103678929767e-05, + "loss": 1.2586, + "step": 21 + }, + { + "epoch": 0.013797428661022263, + "grad_norm": 0.5716578960418701, + "learning_rate": 9.297658862876255e-05, + "loss": 1.3215, + "step": 22 + }, + { + "epoch": 0.014424584509250549, + "grad_norm": 0.5750141143798828, + "learning_rate": 9.264214046822743e-05, + "loss": 1.2867, + "step": 23 + }, + { + "epoch": 0.015051740357478834, + "grad_norm": 0.5213989019393921, + "learning_rate": 9.230769230769232e-05, + "loss": 1.3119, + "step": 24 + }, + { + "epoch": 0.01567889620570712, + "grad_norm": 0.5609740614891052, + "learning_rate": 9.19732441471572e-05, + "loss": 1.3458, + "step": 25 + }, + { + "epoch": 0.016306052053935403, + "grad_norm": 0.6240435242652893, + "learning_rate": 9.163879598662207e-05, + "loss": 1.2525, + "step": 26 + }, + { + "epoch": 0.016933207902163686, + "grad_norm": 0.6331408619880676, + "learning_rate": 9.130434782608696e-05, + "loss": 1.2585, + "step": 27 + }, + { + "epoch": 0.017560363750391973, + "grad_norm": 0.6961623430252075, + "learning_rate": 9.096989966555184e-05, + "loss": 1.2876, + "step": 28 + }, + { + "epoch": 0.018187519598620257, + "grad_norm": 0.6711810827255249, + "learning_rate": 9.063545150501673e-05, + "loss": 1.2588, + "step": 29 + }, + { + "epoch": 0.01881467544684854, + "grad_norm": 0.6240595579147339, + "learning_rate": 9.030100334448161e-05, + "loss": 1.2183, + "step": 30 + }, + { + "epoch": 0.019441831295076827, + "grad_norm": 0.6733318567276001, + "learning_rate": 8.996655518394649e-05, + "loss": 1.1895, + "step": 31 + }, + { + "epoch": 0.02006898714330511, + "grad_norm": 0.7283594012260437, + "learning_rate": 8.963210702341137e-05, + "loss": 1.0659, + "step": 32 + }, + { + "epoch": 0.020696142991533398, + "grad_norm": 0.713584303855896, + "learning_rate": 8.929765886287625e-05, + "loss": 1.1678, + "step": 33 + }, + { + "epoch": 0.02132329883976168, + "grad_norm": 0.7807942628860474, + "learning_rate": 8.896321070234114e-05, + "loss": 1.1614, + "step": 34 + }, + { + "epoch": 0.021950454687989965, + "grad_norm": 0.8136309385299683, + "learning_rate": 8.862876254180602e-05, + "loss": 1.2295, + "step": 35 + }, + { + "epoch": 0.022577610536218252, + "grad_norm": 0.8239562511444092, + "learning_rate": 8.82943143812709e-05, + "loss": 1.2797, + "step": 36 + }, + { + "epoch": 0.023204766384446535, + "grad_norm": 0.8340959548950195, + "learning_rate": 8.795986622073578e-05, + "loss": 1.1222, + "step": 37 + }, + { + "epoch": 0.02383192223267482, + "grad_norm": 0.7285664081573486, + "learning_rate": 8.762541806020068e-05, + "loss": 1.1263, + "step": 38 + }, + { + "epoch": 0.024459078080903106, + "grad_norm": 0.8342467546463013, + "learning_rate": 8.729096989966556e-05, + "loss": 1.2243, + "step": 39 + }, + { + "epoch": 0.02508623392913139, + "grad_norm": 0.7293782234191895, + "learning_rate": 8.695652173913044e-05, + "loss": 1.3218, + "step": 40 + }, + { + "epoch": 0.025713389777359673, + "grad_norm": 0.5601121783256531, + "learning_rate": 8.662207357859532e-05, + "loss": 1.1736, + "step": 41 + }, + { + "epoch": 0.02634054562558796, + "grad_norm": 0.5035584568977356, + "learning_rate": 8.62876254180602e-05, + "loss": 1.2016, + "step": 42 + }, + { + "epoch": 0.026967701473816243, + "grad_norm": 0.5000720024108887, + "learning_rate": 8.595317725752509e-05, + "loss": 1.1698, + "step": 43 + }, + { + "epoch": 0.027594857322044527, + "grad_norm": 0.5331399440765381, + "learning_rate": 8.561872909698997e-05, + "loss": 1.1174, + "step": 44 + }, + { + "epoch": 0.028222013170272814, + "grad_norm": 0.5762463808059692, + "learning_rate": 8.528428093645485e-05, + "loss": 1.174, + "step": 45 + }, + { + "epoch": 0.028849169018501097, + "grad_norm": 0.5670439004898071, + "learning_rate": 8.494983277591973e-05, + "loss": 1.2249, + "step": 46 + }, + { + "epoch": 0.02947632486672938, + "grad_norm": 0.5574221014976501, + "learning_rate": 8.461538461538461e-05, + "loss": 1.1928, + "step": 47 + }, + { + "epoch": 0.030103480714957668, + "grad_norm": 0.6322518587112427, + "learning_rate": 8.42809364548495e-05, + "loss": 1.1242, + "step": 48 + }, + { + "epoch": 0.03073063656318595, + "grad_norm": 0.552919864654541, + "learning_rate": 8.394648829431439e-05, + "loss": 1.1936, + "step": 49 + }, + { + "epoch": 0.03135779241141424, + "grad_norm": 0.506829559803009, + "learning_rate": 8.361204013377927e-05, + "loss": 1.1703, + "step": 50 + }, + { + "epoch": 0.03198494825964252, + "grad_norm": 0.4645536243915558, + "learning_rate": 8.327759197324416e-05, + "loss": 1.1559, + "step": 51 + }, + { + "epoch": 0.032612104107870805, + "grad_norm": 0.5211853981018066, + "learning_rate": 8.294314381270904e-05, + "loss": 1.1602, + "step": 52 + }, + { + "epoch": 0.03323925995609909, + "grad_norm": 0.6415900588035583, + "learning_rate": 8.260869565217392e-05, + "loss": 1.1585, + "step": 53 + }, + { + "epoch": 0.03386641580432737, + "grad_norm": 0.4860232174396515, + "learning_rate": 8.22742474916388e-05, + "loss": 1.1918, + "step": 54 + }, + { + "epoch": 0.03449357165255566, + "grad_norm": 0.5209294557571411, + "learning_rate": 8.193979933110368e-05, + "loss": 1.1477, + "step": 55 + }, + { + "epoch": 0.035120727500783946, + "grad_norm": 0.5140558481216431, + "learning_rate": 8.160535117056857e-05, + "loss": 1.1374, + "step": 56 + }, + { + "epoch": 0.03574788334901223, + "grad_norm": 0.5494105815887451, + "learning_rate": 8.127090301003345e-05, + "loss": 1.1525, + "step": 57 + }, + { + "epoch": 0.036375039197240513, + "grad_norm": 0.5180389881134033, + "learning_rate": 8.093645484949833e-05, + "loss": 1.1612, + "step": 58 + }, + { + "epoch": 0.0370021950454688, + "grad_norm": 0.5110743641853333, + "learning_rate": 8.060200668896321e-05, + "loss": 1.2495, + "step": 59 + }, + { + "epoch": 0.03762935089369708, + "grad_norm": 0.5188828110694885, + "learning_rate": 8.026755852842809e-05, + "loss": 1.1334, + "step": 60 + }, + { + "epoch": 0.03825650674192537, + "grad_norm": 0.4913420081138611, + "learning_rate": 7.993311036789299e-05, + "loss": 1.1378, + "step": 61 + }, + { + "epoch": 0.038883662590153655, + "grad_norm": 0.536530077457428, + "learning_rate": 7.959866220735787e-05, + "loss": 1.1373, + "step": 62 + }, + { + "epoch": 0.03951081843838194, + "grad_norm": 0.49781450629234314, + "learning_rate": 7.926421404682275e-05, + "loss": 1.1156, + "step": 63 + }, + { + "epoch": 0.04013797428661022, + "grad_norm": 0.5303206443786621, + "learning_rate": 7.892976588628763e-05, + "loss": 1.1264, + "step": 64 + }, + { + "epoch": 0.040765130134838505, + "grad_norm": 0.5993393063545227, + "learning_rate": 7.859531772575252e-05, + "loss": 1.1716, + "step": 65 + }, + { + "epoch": 0.041392285983066796, + "grad_norm": 0.5210543274879456, + "learning_rate": 7.82608695652174e-05, + "loss": 1.1374, + "step": 66 + }, + { + "epoch": 0.04201944183129508, + "grad_norm": 0.5523737072944641, + "learning_rate": 7.792642140468228e-05, + "loss": 1.0944, + "step": 67 + }, + { + "epoch": 0.04264659767952336, + "grad_norm": 0.5525214076042175, + "learning_rate": 7.759197324414716e-05, + "loss": 1.0747, + "step": 68 + }, + { + "epoch": 0.043273753527751646, + "grad_norm": 0.5736639499664307, + "learning_rate": 7.725752508361204e-05, + "loss": 1.1356, + "step": 69 + }, + { + "epoch": 0.04390090937597993, + "grad_norm": 0.6016295552253723, + "learning_rate": 7.692307692307693e-05, + "loss": 1.1052, + "step": 70 + }, + { + "epoch": 0.04452806522420821, + "grad_norm": 0.5571848154067993, + "learning_rate": 7.658862876254181e-05, + "loss": 1.0981, + "step": 71 + }, + { + "epoch": 0.045155221072436504, + "grad_norm": 0.5534396171569824, + "learning_rate": 7.62541806020067e-05, + "loss": 1.126, + "step": 72 + }, + { + "epoch": 0.04578237692066479, + "grad_norm": 0.5610045790672302, + "learning_rate": 7.591973244147159e-05, + "loss": 1.1178, + "step": 73 + }, + { + "epoch": 0.04640953276889307, + "grad_norm": 0.615006148815155, + "learning_rate": 7.558528428093647e-05, + "loss": 1.2245, + "step": 74 + }, + { + "epoch": 0.047036688617121354, + "grad_norm": 0.5436788201332092, + "learning_rate": 7.525083612040135e-05, + "loss": 1.1572, + "step": 75 + }, + { + "epoch": 0.04766384446534964, + "grad_norm": 0.5453152656555176, + "learning_rate": 7.491638795986622e-05, + "loss": 1.0931, + "step": 76 + }, + { + "epoch": 0.04829100031357792, + "grad_norm": 0.519679844379425, + "learning_rate": 7.45819397993311e-05, + "loss": 1.0683, + "step": 77 + }, + { + "epoch": 0.04891815616180621, + "grad_norm": 0.5648550987243652, + "learning_rate": 7.424749163879598e-05, + "loss": 1.1065, + "step": 78 + }, + { + "epoch": 0.049545312010034495, + "grad_norm": 0.5267070531845093, + "learning_rate": 7.391304347826086e-05, + "loss": 1.1555, + "step": 79 + }, + { + "epoch": 0.05017246785826278, + "grad_norm": 0.6587849855422974, + "learning_rate": 7.357859531772575e-05, + "loss": 1.1688, + "step": 80 + }, + { + "epoch": 0.05079962370649106, + "grad_norm": 0.5420068502426147, + "learning_rate": 7.324414715719064e-05, + "loss": 1.1425, + "step": 81 + }, + { + "epoch": 0.051426779554719346, + "grad_norm": 0.5998720526695251, + "learning_rate": 7.290969899665552e-05, + "loss": 1.1755, + "step": 82 + }, + { + "epoch": 0.05205393540294763, + "grad_norm": 0.5335188508033752, + "learning_rate": 7.25752508361204e-05, + "loss": 1.1334, + "step": 83 + }, + { + "epoch": 0.05268109125117592, + "grad_norm": 0.5293967723846436, + "learning_rate": 7.224080267558529e-05, + "loss": 1.0845, + "step": 84 + }, + { + "epoch": 0.0533082470994042, + "grad_norm": 0.548838198184967, + "learning_rate": 7.190635451505017e-05, + "loss": 1.1572, + "step": 85 + }, + { + "epoch": 0.05393540294763249, + "grad_norm": 0.5527269244194031, + "learning_rate": 7.157190635451505e-05, + "loss": 1.1789, + "step": 86 + }, + { + "epoch": 0.05456255879586077, + "grad_norm": 0.5414602160453796, + "learning_rate": 7.123745819397993e-05, + "loss": 1.1635, + "step": 87 + }, + { + "epoch": 0.055189714644089054, + "grad_norm": 0.5516874194145203, + "learning_rate": 7.090301003344481e-05, + "loss": 1.0885, + "step": 88 + }, + { + "epoch": 0.055816870492317344, + "grad_norm": 0.5483298897743225, + "learning_rate": 7.05685618729097e-05, + "loss": 1.135, + "step": 89 + }, + { + "epoch": 0.05644402634054563, + "grad_norm": 0.5225560069084167, + "learning_rate": 7.023411371237458e-05, + "loss": 1.1202, + "step": 90 + }, + { + "epoch": 0.05707118218877391, + "grad_norm": 0.5157314538955688, + "learning_rate": 6.989966555183946e-05, + "loss": 1.0701, + "step": 91 + }, + { + "epoch": 0.057698338037002195, + "grad_norm": 0.5118935704231262, + "learning_rate": 6.956521739130436e-05, + "loss": 1.1372, + "step": 92 + }, + { + "epoch": 0.05832549388523048, + "grad_norm": 0.5290085673332214, + "learning_rate": 6.923076923076924e-05, + "loss": 1.0433, + "step": 93 + }, + { + "epoch": 0.05895264973345876, + "grad_norm": 0.5611572265625, + "learning_rate": 6.889632107023412e-05, + "loss": 1.0563, + "step": 94 + }, + { + "epoch": 0.05957980558168705, + "grad_norm": 0.49483367800712585, + "learning_rate": 6.8561872909699e-05, + "loss": 1.0341, + "step": 95 + }, + { + "epoch": 0.060206961429915336, + "grad_norm": 0.5686403512954712, + "learning_rate": 6.822742474916388e-05, + "loss": 1.0087, + "step": 96 + }, + { + "epoch": 0.06083411727814362, + "grad_norm": 0.5443194508552551, + "learning_rate": 6.789297658862876e-05, + "loss": 1.1587, + "step": 97 + }, + { + "epoch": 0.0614612731263719, + "grad_norm": 0.60746830701828, + "learning_rate": 6.755852842809365e-05, + "loss": 1.1585, + "step": 98 + }, + { + "epoch": 0.062088428974600186, + "grad_norm": 0.5140027403831482, + "learning_rate": 6.722408026755853e-05, + "loss": 1.0371, + "step": 99 + }, + { + "epoch": 0.06271558482282848, + "grad_norm": 0.5666912794113159, + "learning_rate": 6.688963210702341e-05, + "loss": 1.1099, + "step": 100 + }, + { + "epoch": 0.06334274067105676, + "grad_norm": 0.5467258095741272, + "learning_rate": 6.655518394648829e-05, + "loss": 1.0563, + "step": 101 + }, + { + "epoch": 0.06396989651928504, + "grad_norm": 0.4934995174407959, + "learning_rate": 6.622073578595317e-05, + "loss": 1.0442, + "step": 102 + }, + { + "epoch": 0.06459705236751333, + "grad_norm": 0.5575374364852905, + "learning_rate": 6.588628762541807e-05, + "loss": 1.0859, + "step": 103 + }, + { + "epoch": 0.06522420821574161, + "grad_norm": 0.5914056897163391, + "learning_rate": 6.555183946488295e-05, + "loss": 1.2262, + "step": 104 + }, + { + "epoch": 0.0658513640639699, + "grad_norm": 0.5970908999443054, + "learning_rate": 6.521739130434783e-05, + "loss": 1.1537, + "step": 105 + }, + { + "epoch": 0.06647851991219818, + "grad_norm": 0.48699280619621277, + "learning_rate": 6.488294314381272e-05, + "loss": 1.0182, + "step": 106 + }, + { + "epoch": 0.06710567576042646, + "grad_norm": 0.6701877117156982, + "learning_rate": 6.45484949832776e-05, + "loss": 1.1193, + "step": 107 + }, + { + "epoch": 0.06773283160865474, + "grad_norm": 0.5456950068473816, + "learning_rate": 6.421404682274248e-05, + "loss": 0.984, + "step": 108 + }, + { + "epoch": 0.06835998745688304, + "grad_norm": 0.5178925395011902, + "learning_rate": 6.387959866220736e-05, + "loss": 1.0512, + "step": 109 + }, + { + "epoch": 0.06898714330511133, + "grad_norm": 0.6106047630310059, + "learning_rate": 6.354515050167224e-05, + "loss": 1.0582, + "step": 110 + }, + { + "epoch": 0.06961429915333961, + "grad_norm": 0.5366062521934509, + "learning_rate": 6.321070234113713e-05, + "loss": 1.2155, + "step": 111 + }, + { + "epoch": 0.07024145500156789, + "grad_norm": 0.5617843866348267, + "learning_rate": 6.287625418060201e-05, + "loss": 1.0701, + "step": 112 + }, + { + "epoch": 0.07086861084979618, + "grad_norm": 0.5460195541381836, + "learning_rate": 6.254180602006689e-05, + "loss": 1.104, + "step": 113 + }, + { + "epoch": 0.07149576669802446, + "grad_norm": 0.5366616249084473, + "learning_rate": 6.220735785953178e-05, + "loss": 1.0823, + "step": 114 + }, + { + "epoch": 0.07212292254625274, + "grad_norm": 0.6212649941444397, + "learning_rate": 6.187290969899667e-05, + "loss": 1.1166, + "step": 115 + }, + { + "epoch": 0.07275007839448103, + "grad_norm": 0.544100284576416, + "learning_rate": 6.153846153846155e-05, + "loss": 1.0853, + "step": 116 + }, + { + "epoch": 0.07337723424270931, + "grad_norm": 0.5893523693084717, + "learning_rate": 6.120401337792643e-05, + "loss": 1.1563, + "step": 117 + }, + { + "epoch": 0.0740043900909376, + "grad_norm": 0.5614563226699829, + "learning_rate": 6.086956521739131e-05, + "loss": 1.0899, + "step": 118 + }, + { + "epoch": 0.07463154593916588, + "grad_norm": 0.5281983613967896, + "learning_rate": 6.0535117056856194e-05, + "loss": 0.9794, + "step": 119 + }, + { + "epoch": 0.07525870178739416, + "grad_norm": 0.5434867143630981, + "learning_rate": 6.0200668896321076e-05, + "loss": 1.0257, + "step": 120 + }, + { + "epoch": 0.07588585763562246, + "grad_norm": 0.5829548835754395, + "learning_rate": 5.986622073578596e-05, + "loss": 1.0725, + "step": 121 + }, + { + "epoch": 0.07651301348385074, + "grad_norm": 0.6195796132087708, + "learning_rate": 5.953177257525085e-05, + "loss": 1.134, + "step": 122 + }, + { + "epoch": 0.07714016933207903, + "grad_norm": 0.5218833684921265, + "learning_rate": 5.919732441471573e-05, + "loss": 1.0776, + "step": 123 + }, + { + "epoch": 0.07776732518030731, + "grad_norm": 0.6198936700820923, + "learning_rate": 5.886287625418061e-05, + "loss": 1.1436, + "step": 124 + }, + { + "epoch": 0.07839448102853559, + "grad_norm": 0.6243681311607361, + "learning_rate": 5.852842809364549e-05, + "loss": 1.0745, + "step": 125 + }, + { + "epoch": 0.07902163687676388, + "grad_norm": 0.6964524984359741, + "learning_rate": 5.819397993311037e-05, + "loss": 1.1471, + "step": 126 + }, + { + "epoch": 0.07964879272499216, + "grad_norm": 0.5354197025299072, + "learning_rate": 5.785953177257525e-05, + "loss": 1.0318, + "step": 127 + }, + { + "epoch": 0.08027594857322044, + "grad_norm": 0.6243062615394592, + "learning_rate": 5.752508361204013e-05, + "loss": 1.1784, + "step": 128 + }, + { + "epoch": 0.08090310442144873, + "grad_norm": 0.6323968172073364, + "learning_rate": 5.7190635451505014e-05, + "loss": 1.1406, + "step": 129 + }, + { + "epoch": 0.08153026026967701, + "grad_norm": 0.5501415729522705, + "learning_rate": 5.6856187290969896e-05, + "loss": 1.0848, + "step": 130 + }, + { + "epoch": 0.0821574161179053, + "grad_norm": 0.5742263793945312, + "learning_rate": 5.652173913043478e-05, + "loss": 1.1337, + "step": 131 + }, + { + "epoch": 0.08278457196613359, + "grad_norm": 0.5625714659690857, + "learning_rate": 5.6187290969899666e-05, + "loss": 1.0598, + "step": 132 + }, + { + "epoch": 0.08341172781436187, + "grad_norm": 0.5755911469459534, + "learning_rate": 5.585284280936455e-05, + "loss": 0.9866, + "step": 133 + }, + { + "epoch": 0.08403888366259016, + "grad_norm": 0.6551838517189026, + "learning_rate": 5.551839464882943e-05, + "loss": 1.0965, + "step": 134 + }, + { + "epoch": 0.08466603951081844, + "grad_norm": 0.6076345443725586, + "learning_rate": 5.518394648829431e-05, + "loss": 1.0881, + "step": 135 + }, + { + "epoch": 0.08529319535904673, + "grad_norm": 0.5554491281509399, + "learning_rate": 5.4849498327759194e-05, + "loss": 1.0447, + "step": 136 + }, + { + "epoch": 0.08592035120727501, + "grad_norm": 0.5519855618476868, + "learning_rate": 5.451505016722408e-05, + "loss": 1.0043, + "step": 137 + }, + { + "epoch": 0.08654750705550329, + "grad_norm": 0.5771621465682983, + "learning_rate": 5.4180602006688965e-05, + "loss": 1.0422, + "step": 138 + }, + { + "epoch": 0.08717466290373158, + "grad_norm": 0.5669012069702148, + "learning_rate": 5.384615384615385e-05, + "loss": 1.0656, + "step": 139 + }, + { + "epoch": 0.08780181875195986, + "grad_norm": 0.5704293847084045, + "learning_rate": 5.351170568561873e-05, + "loss": 1.1394, + "step": 140 + }, + { + "epoch": 0.08842897460018814, + "grad_norm": 0.6227648258209229, + "learning_rate": 5.317725752508361e-05, + "loss": 1.0349, + "step": 141 + }, + { + "epoch": 0.08905613044841643, + "grad_norm": 0.6244713664054871, + "learning_rate": 5.284280936454849e-05, + "loss": 1.1137, + "step": 142 + }, + { + "epoch": 0.08968328629664471, + "grad_norm": 0.6457431316375732, + "learning_rate": 5.250836120401338e-05, + "loss": 1.1183, + "step": 143 + }, + { + "epoch": 0.09031044214487301, + "grad_norm": 0.5848296880722046, + "learning_rate": 5.217391304347826e-05, + "loss": 1.0605, + "step": 144 + }, + { + "epoch": 0.09093759799310129, + "grad_norm": 0.5849940180778503, + "learning_rate": 5.1839464882943145e-05, + "loss": 1.0723, + "step": 145 + }, + { + "epoch": 0.09156475384132957, + "grad_norm": 0.5821419358253479, + "learning_rate": 5.150501672240803e-05, + "loss": 1.0741, + "step": 146 + }, + { + "epoch": 0.09219190968955786, + "grad_norm": 0.6493012309074402, + "learning_rate": 5.117056856187291e-05, + "loss": 1.0765, + "step": 147 + }, + { + "epoch": 0.09281906553778614, + "grad_norm": 0.5811394453048706, + "learning_rate": 5.08361204013378e-05, + "loss": 1.0743, + "step": 148 + }, + { + "epoch": 0.09344622138601442, + "grad_norm": 0.5831654071807861, + "learning_rate": 5.050167224080268e-05, + "loss": 1.0832, + "step": 149 + }, + { + "epoch": 0.09407337723424271, + "grad_norm": 0.6489170789718628, + "learning_rate": 5.016722408026756e-05, + "loss": 1.0822, + "step": 150 + }, + { + "epoch": 0.09470053308247099, + "grad_norm": 0.6475048065185547, + "learning_rate": 4.983277591973244e-05, + "loss": 1.0875, + "step": 151 + }, + { + "epoch": 0.09532768893069928, + "grad_norm": 0.6687474846839905, + "learning_rate": 4.9498327759197325e-05, + "loss": 1.0386, + "step": 152 + }, + { + "epoch": 0.09595484477892756, + "grad_norm": 0.6984210014343262, + "learning_rate": 4.916387959866221e-05, + "loss": 1.0817, + "step": 153 + }, + { + "epoch": 0.09658200062715584, + "grad_norm": 0.5768287777900696, + "learning_rate": 4.8829431438127096e-05, + "loss": 1.0279, + "step": 154 + }, + { + "epoch": 0.09720915647538414, + "grad_norm": 0.66800856590271, + "learning_rate": 4.849498327759198e-05, + "loss": 1.1204, + "step": 155 + }, + { + "epoch": 0.09783631232361242, + "grad_norm": 0.5745876431465149, + "learning_rate": 4.816053511705686e-05, + "loss": 1.0726, + "step": 156 + }, + { + "epoch": 0.0984634681718407, + "grad_norm": 0.6475458145141602, + "learning_rate": 4.782608695652174e-05, + "loss": 1.1202, + "step": 157 + }, + { + "epoch": 0.09909062402006899, + "grad_norm": 0.6565487384796143, + "learning_rate": 4.7491638795986624e-05, + "loss": 1.1184, + "step": 158 + }, + { + "epoch": 0.09971777986829727, + "grad_norm": 0.673619270324707, + "learning_rate": 4.715719063545151e-05, + "loss": 0.9694, + "step": 159 + }, + { + "epoch": 0.10034493571652556, + "grad_norm": 0.591253399848938, + "learning_rate": 4.6822742474916394e-05, + "loss": 1.1111, + "step": 160 + }, + { + "epoch": 0.10097209156475384, + "grad_norm": 0.593618631362915, + "learning_rate": 4.6488294314381276e-05, + "loss": 1.0677, + "step": 161 + }, + { + "epoch": 0.10159924741298212, + "grad_norm": 0.6333155632019043, + "learning_rate": 4.615384615384616e-05, + "loss": 1.1757, + "step": 162 + }, + { + "epoch": 0.10222640326121041, + "grad_norm": 0.5838831663131714, + "learning_rate": 4.581939799331103e-05, + "loss": 1.0531, + "step": 163 + }, + { + "epoch": 0.10285355910943869, + "grad_norm": 0.5759211182594299, + "learning_rate": 4.548494983277592e-05, + "loss": 1.1191, + "step": 164 + }, + { + "epoch": 0.10348071495766697, + "grad_norm": 0.5697938799858093, + "learning_rate": 4.5150501672240804e-05, + "loss": 1.0633, + "step": 165 + }, + { + "epoch": 0.10410787080589526, + "grad_norm": 0.6286008358001709, + "learning_rate": 4.4816053511705686e-05, + "loss": 1.1103, + "step": 166 + }, + { + "epoch": 0.10473502665412356, + "grad_norm": 0.6342211961746216, + "learning_rate": 4.448160535117057e-05, + "loss": 1.0993, + "step": 167 + }, + { + "epoch": 0.10536218250235184, + "grad_norm": 0.6270163655281067, + "learning_rate": 4.414715719063545e-05, + "loss": 1.075, + "step": 168 + }, + { + "epoch": 0.10598933835058012, + "grad_norm": 0.6883480548858643, + "learning_rate": 4.381270903010034e-05, + "loss": 1.0802, + "step": 169 + }, + { + "epoch": 0.1066164941988084, + "grad_norm": 0.5330828428268433, + "learning_rate": 4.347826086956522e-05, + "loss": 1.0645, + "step": 170 + }, + { + "epoch": 0.10724365004703669, + "grad_norm": 0.6310392022132874, + "learning_rate": 4.31438127090301e-05, + "loss": 1.0856, + "step": 171 + }, + { + "epoch": 0.10787080589526497, + "grad_norm": 0.5833948254585266, + "learning_rate": 4.2809364548494984e-05, + "loss": 1.009, + "step": 172 + }, + { + "epoch": 0.10849796174349326, + "grad_norm": 0.6363572478294373, + "learning_rate": 4.2474916387959866e-05, + "loss": 1.0742, + "step": 173 + }, + { + "epoch": 0.10912511759172154, + "grad_norm": 0.619068443775177, + "learning_rate": 4.214046822742475e-05, + "loss": 1.052, + "step": 174 + }, + { + "epoch": 0.10975227343994982, + "grad_norm": 0.7238068580627441, + "learning_rate": 4.180602006688964e-05, + "loss": 1.103, + "step": 175 + }, + { + "epoch": 0.11037942928817811, + "grad_norm": 0.6014000177383423, + "learning_rate": 4.147157190635452e-05, + "loss": 1.0676, + "step": 176 + }, + { + "epoch": 0.11100658513640639, + "grad_norm": 0.6265282034873962, + "learning_rate": 4.11371237458194e-05, + "loss": 1.1245, + "step": 177 + }, + { + "epoch": 0.11163374098463469, + "grad_norm": 0.6240003705024719, + "learning_rate": 4.080267558528428e-05, + "loss": 1.0642, + "step": 178 + }, + { + "epoch": 0.11226089683286297, + "grad_norm": 0.6394066214561462, + "learning_rate": 4.0468227424749165e-05, + "loss": 1.0916, + "step": 179 + }, + { + "epoch": 0.11288805268109126, + "grad_norm": 0.5886242389678955, + "learning_rate": 4.0133779264214046e-05, + "loss": 1.0589, + "step": 180 + }, + { + "epoch": 0.11351520852931954, + "grad_norm": 0.6549810767173767, + "learning_rate": 3.9799331103678935e-05, + "loss": 1.0636, + "step": 181 + }, + { + "epoch": 0.11414236437754782, + "grad_norm": 0.5759313106536865, + "learning_rate": 3.946488294314382e-05, + "loss": 1.0285, + "step": 182 + }, + { + "epoch": 0.1147695202257761, + "grad_norm": 0.6328492164611816, + "learning_rate": 3.91304347826087e-05, + "loss": 1.1196, + "step": 183 + }, + { + "epoch": 0.11539667607400439, + "grad_norm": 0.6174641847610474, + "learning_rate": 3.879598662207358e-05, + "loss": 1.1102, + "step": 184 + }, + { + "epoch": 0.11602383192223267, + "grad_norm": 0.587072491645813, + "learning_rate": 3.846153846153846e-05, + "loss": 1.0394, + "step": 185 + }, + { + "epoch": 0.11665098777046096, + "grad_norm": 0.6698734164237976, + "learning_rate": 3.812709030100335e-05, + "loss": 1.1382, + "step": 186 + }, + { + "epoch": 0.11727814361868924, + "grad_norm": 0.6871348023414612, + "learning_rate": 3.7792642140468233e-05, + "loss": 1.0678, + "step": 187 + }, + { + "epoch": 0.11790529946691752, + "grad_norm": 0.5780801177024841, + "learning_rate": 3.745819397993311e-05, + "loss": 1.0239, + "step": 188 + }, + { + "epoch": 0.11853245531514581, + "grad_norm": 0.620836079120636, + "learning_rate": 3.712374581939799e-05, + "loss": 1.1471, + "step": 189 + }, + { + "epoch": 0.1191596111633741, + "grad_norm": 0.6410227417945862, + "learning_rate": 3.678929765886287e-05, + "loss": 1.1299, + "step": 190 + }, + { + "epoch": 0.11978676701160239, + "grad_norm": 0.6219584345817566, + "learning_rate": 3.645484949832776e-05, + "loss": 1.1006, + "step": 191 + }, + { + "epoch": 0.12041392285983067, + "grad_norm": 0.6573643684387207, + "learning_rate": 3.612040133779264e-05, + "loss": 1.1435, + "step": 192 + }, + { + "epoch": 0.12104107870805896, + "grad_norm": 0.6069468259811401, + "learning_rate": 3.5785953177257525e-05, + "loss": 1.091, + "step": 193 + }, + { + "epoch": 0.12166823455628724, + "grad_norm": 0.651067316532135, + "learning_rate": 3.545150501672241e-05, + "loss": 1.0609, + "step": 194 + }, + { + "epoch": 0.12229539040451552, + "grad_norm": 0.5634432435035706, + "learning_rate": 3.511705685618729e-05, + "loss": 1.0291, + "step": 195 + }, + { + "epoch": 0.1229225462527438, + "grad_norm": 0.5779179334640503, + "learning_rate": 3.478260869565218e-05, + "loss": 1.0537, + "step": 196 + }, + { + "epoch": 0.12354970210097209, + "grad_norm": 0.6306246519088745, + "learning_rate": 3.444816053511706e-05, + "loss": 1.0505, + "step": 197 + }, + { + "epoch": 0.12417685794920037, + "grad_norm": 0.6526687145233154, + "learning_rate": 3.411371237458194e-05, + "loss": 1.063, + "step": 198 + }, + { + "epoch": 0.12480401379742866, + "grad_norm": 0.6450494527816772, + "learning_rate": 3.3779264214046823e-05, + "loss": 1.0851, + "step": 199 + }, + { + "epoch": 0.12543116964565695, + "grad_norm": 0.6339107751846313, + "learning_rate": 3.3444816053511705e-05, + "loss": 1.045, + "step": 200 + }, + { + "epoch": 0.12605832549388524, + "grad_norm": 0.6144226789474487, + "learning_rate": 3.311036789297659e-05, + "loss": 1.0395, + "step": 201 + }, + { + "epoch": 0.12668548134211352, + "grad_norm": 0.5552593469619751, + "learning_rate": 3.2775919732441476e-05, + "loss": 1.0841, + "step": 202 + }, + { + "epoch": 0.1273126371903418, + "grad_norm": 0.5668186545372009, + "learning_rate": 3.244147157190636e-05, + "loss": 0.9973, + "step": 203 + }, + { + "epoch": 0.1279397930385701, + "grad_norm": 0.6562705636024475, + "learning_rate": 3.210702341137124e-05, + "loss": 1.1177, + "step": 204 + }, + { + "epoch": 0.12856694888679837, + "grad_norm": 0.6119149923324585, + "learning_rate": 3.177257525083612e-05, + "loss": 1.0633, + "step": 205 + }, + { + "epoch": 0.12919410473502665, + "grad_norm": 0.5346015691757202, + "learning_rate": 3.1438127090301004e-05, + "loss": 1.0137, + "step": 206 + }, + { + "epoch": 0.12982126058325494, + "grad_norm": 0.6165241599082947, + "learning_rate": 3.110367892976589e-05, + "loss": 1.1207, + "step": 207 + }, + { + "epoch": 0.13044841643148322, + "grad_norm": 0.5552146434783936, + "learning_rate": 3.0769230769230774e-05, + "loss": 0.9768, + "step": 208 + }, + { + "epoch": 0.1310755722797115, + "grad_norm": 0.7914800047874451, + "learning_rate": 3.0434782608695656e-05, + "loss": 1.0298, + "step": 209 + }, + { + "epoch": 0.1317027281279398, + "grad_norm": 0.6565576195716858, + "learning_rate": 3.0100334448160538e-05, + "loss": 1.0902, + "step": 210 + }, + { + "epoch": 0.13232988397616807, + "grad_norm": 0.6186336874961853, + "learning_rate": 2.9765886287625424e-05, + "loss": 1.0812, + "step": 211 + }, + { + "epoch": 0.13295703982439636, + "grad_norm": 0.623162567615509, + "learning_rate": 2.9431438127090305e-05, + "loss": 1.09, + "step": 212 + }, + { + "epoch": 0.13358419567262464, + "grad_norm": 0.61158686876297, + "learning_rate": 2.9096989966555184e-05, + "loss": 1.0838, + "step": 213 + }, + { + "epoch": 0.13421135152085292, + "grad_norm": 0.7301251292228699, + "learning_rate": 2.8762541806020066e-05, + "loss": 1.1238, + "step": 214 + }, + { + "epoch": 0.1348385073690812, + "grad_norm": 0.5984829068183899, + "learning_rate": 2.8428093645484948e-05, + "loss": 1.1151, + "step": 215 + }, + { + "epoch": 0.1354656632173095, + "grad_norm": 0.6232813000679016, + "learning_rate": 2.8093645484949833e-05, + "loss": 1.0937, + "step": 216 + }, + { + "epoch": 0.13609281906553777, + "grad_norm": 0.628766655921936, + "learning_rate": 2.7759197324414715e-05, + "loss": 1.0846, + "step": 217 + }, + { + "epoch": 0.13671997491376608, + "grad_norm": 0.5867526531219482, + "learning_rate": 2.7424749163879597e-05, + "loss": 1.0682, + "step": 218 + }, + { + "epoch": 0.13734713076199437, + "grad_norm": 0.6130120158195496, + "learning_rate": 2.7090301003344482e-05, + "loss": 0.9939, + "step": 219 + }, + { + "epoch": 0.13797428661022265, + "grad_norm": 0.6351154446601868, + "learning_rate": 2.6755852842809364e-05, + "loss": 1.0289, + "step": 220 + }, + { + "epoch": 0.13860144245845094, + "grad_norm": 0.5693631768226624, + "learning_rate": 2.6421404682274246e-05, + "loss": 1.0584, + "step": 221 + }, + { + "epoch": 0.13922859830667922, + "grad_norm": 0.7323676943778992, + "learning_rate": 2.608695652173913e-05, + "loss": 1.0999, + "step": 222 + }, + { + "epoch": 0.1398557541549075, + "grad_norm": 0.640272855758667, + "learning_rate": 2.5752508361204013e-05, + "loss": 1.0644, + "step": 223 + }, + { + "epoch": 0.14048291000313579, + "grad_norm": 0.5802699327468872, + "learning_rate": 2.54180602006689e-05, + "loss": 1.0275, + "step": 224 + }, + { + "epoch": 0.14111006585136407, + "grad_norm": 0.5993974804878235, + "learning_rate": 2.508361204013378e-05, + "loss": 1.0734, + "step": 225 + }, + { + "epoch": 0.14173722169959235, + "grad_norm": 0.6291252374649048, + "learning_rate": 2.4749163879598663e-05, + "loss": 1.0998, + "step": 226 + }, + { + "epoch": 0.14236437754782064, + "grad_norm": 0.6068099141120911, + "learning_rate": 2.4414715719063548e-05, + "loss": 1.0264, + "step": 227 + }, + { + "epoch": 0.14299153339604892, + "grad_norm": 0.6488208770751953, + "learning_rate": 2.408026755852843e-05, + "loss": 1.0634, + "step": 228 + }, + { + "epoch": 0.1436186892442772, + "grad_norm": 0.7257867455482483, + "learning_rate": 2.3745819397993312e-05, + "loss": 1.0244, + "step": 229 + }, + { + "epoch": 0.1442458450925055, + "grad_norm": 0.675695538520813, + "learning_rate": 2.3411371237458197e-05, + "loss": 1.1089, + "step": 230 + }, + { + "epoch": 0.14487300094073377, + "grad_norm": 0.6972987651824951, + "learning_rate": 2.307692307692308e-05, + "loss": 1.0762, + "step": 231 + }, + { + "epoch": 0.14550015678896205, + "grad_norm": 0.6543252468109131, + "learning_rate": 2.274247491638796e-05, + "loss": 1.0989, + "step": 232 + }, + { + "epoch": 0.14612731263719034, + "grad_norm": 0.6622485518455505, + "learning_rate": 2.2408026755852843e-05, + "loss": 1.0458, + "step": 233 + }, + { + "epoch": 0.14675446848541862, + "grad_norm": 0.7049436569213867, + "learning_rate": 2.2073578595317725e-05, + "loss": 1.0675, + "step": 234 + }, + { + "epoch": 0.1473816243336469, + "grad_norm": 0.6808706521987915, + "learning_rate": 2.173913043478261e-05, + "loss": 1.0374, + "step": 235 + }, + { + "epoch": 0.1480087801818752, + "grad_norm": 0.6068559288978577, + "learning_rate": 2.1404682274247492e-05, + "loss": 1.0097, + "step": 236 + }, + { + "epoch": 0.14863593603010347, + "grad_norm": 0.649840235710144, + "learning_rate": 2.1070234113712374e-05, + "loss": 1.1078, + "step": 237 + }, + { + "epoch": 0.14926309187833176, + "grad_norm": 0.7002633213996887, + "learning_rate": 2.073578595317726e-05, + "loss": 1.1646, + "step": 238 + }, + { + "epoch": 0.14989024772656004, + "grad_norm": 0.6251310706138611, + "learning_rate": 2.040133779264214e-05, + "loss": 1.0147, + "step": 239 + }, + { + "epoch": 0.15051740357478832, + "grad_norm": 0.6652458906173706, + "learning_rate": 2.0066889632107023e-05, + "loss": 0.9894, + "step": 240 + }, + { + "epoch": 0.15114455942301663, + "grad_norm": 0.5750979781150818, + "learning_rate": 1.973244147157191e-05, + "loss": 0.9389, + "step": 241 + }, + { + "epoch": 0.15177171527124492, + "grad_norm": 0.5840304493904114, + "learning_rate": 1.939799331103679e-05, + "loss": 1.0467, + "step": 242 + }, + { + "epoch": 0.1523988711194732, + "grad_norm": 0.6483275294303894, + "learning_rate": 1.9063545150501676e-05, + "loss": 1.1441, + "step": 243 + }, + { + "epoch": 0.15302602696770148, + "grad_norm": 0.6356698274612427, + "learning_rate": 1.8729096989966554e-05, + "loss": 1.004, + "step": 244 + }, + { + "epoch": 0.15365318281592977, + "grad_norm": 0.6308068633079529, + "learning_rate": 1.8394648829431436e-05, + "loss": 1.0407, + "step": 245 + }, + { + "epoch": 0.15428033866415805, + "grad_norm": 0.642793595790863, + "learning_rate": 1.806020066889632e-05, + "loss": 1.0187, + "step": 246 + }, + { + "epoch": 0.15490749451238633, + "grad_norm": 0.6721625924110413, + "learning_rate": 1.7725752508361204e-05, + "loss": 1.1702, + "step": 247 + }, + { + "epoch": 0.15553465036061462, + "grad_norm": 0.5884971618652344, + "learning_rate": 1.739130434782609e-05, + "loss": 1.0835, + "step": 248 + }, + { + "epoch": 0.1561618062088429, + "grad_norm": 0.6843286752700806, + "learning_rate": 1.705685618729097e-05, + "loss": 1.0798, + "step": 249 + }, + { + "epoch": 0.15678896205707119, + "grad_norm": 0.6333372592926025, + "learning_rate": 1.6722408026755853e-05, + "loss": 1.1092, + "step": 250 + }, + { + "epoch": 0.15741611790529947, + "grad_norm": 0.577247679233551, + "learning_rate": 1.6387959866220738e-05, + "loss": 0.9962, + "step": 251 + }, + { + "epoch": 0.15804327375352775, + "grad_norm": 0.5617176294326782, + "learning_rate": 1.605351170568562e-05, + "loss": 1.0189, + "step": 252 + }, + { + "epoch": 0.15867042960175604, + "grad_norm": 0.6413646936416626, + "learning_rate": 1.5719063545150502e-05, + "loss": 0.9931, + "step": 253 + }, + { + "epoch": 0.15929758544998432, + "grad_norm": 0.5995526909828186, + "learning_rate": 1.5384615384615387e-05, + "loss": 1.0613, + "step": 254 + }, + { + "epoch": 0.1599247412982126, + "grad_norm": 0.7512606382369995, + "learning_rate": 1.5050167224080269e-05, + "loss": 1.0668, + "step": 255 + }, + { + "epoch": 0.1605518971464409, + "grad_norm": 0.6574665307998657, + "learning_rate": 1.4715719063545153e-05, + "loss": 1.0746, + "step": 256 + }, + { + "epoch": 0.16117905299466917, + "grad_norm": 0.5919622182846069, + "learning_rate": 1.4381270903010033e-05, + "loss": 0.9597, + "step": 257 + }, + { + "epoch": 0.16180620884289745, + "grad_norm": 0.5962166786193848, + "learning_rate": 1.4046822742474917e-05, + "loss": 1.023, + "step": 258 + }, + { + "epoch": 0.16243336469112574, + "grad_norm": 0.5912542939186096, + "learning_rate": 1.3712374581939799e-05, + "loss": 0.9905, + "step": 259 + }, + { + "epoch": 0.16306052053935402, + "grad_norm": 0.607428789138794, + "learning_rate": 1.3377926421404682e-05, + "loss": 1.0885, + "step": 260 + }, + { + "epoch": 0.1636876763875823, + "grad_norm": 0.5949956178665161, + "learning_rate": 1.3043478260869566e-05, + "loss": 1.0587, + "step": 261 + }, + { + "epoch": 0.1643148322358106, + "grad_norm": 0.7283132076263428, + "learning_rate": 1.270903010033445e-05, + "loss": 1.0647, + "step": 262 + }, + { + "epoch": 0.16494198808403887, + "grad_norm": 0.6001077890396118, + "learning_rate": 1.2374581939799331e-05, + "loss": 0.9614, + "step": 263 + }, + { + "epoch": 0.16556914393226718, + "grad_norm": 0.6198778748512268, + "learning_rate": 1.2040133779264215e-05, + "loss": 1.1177, + "step": 264 + }, + { + "epoch": 0.16619629978049547, + "grad_norm": 0.637858510017395, + "learning_rate": 1.1705685618729099e-05, + "loss": 1.0138, + "step": 265 + }, + { + "epoch": 0.16682345562872375, + "grad_norm": 0.6358053684234619, + "learning_rate": 1.137123745819398e-05, + "loss": 1.1069, + "step": 266 + }, + { + "epoch": 0.16745061147695203, + "grad_norm": 0.6647583246231079, + "learning_rate": 1.1036789297658862e-05, + "loss": 1.0469, + "step": 267 + }, + { + "epoch": 0.16807776732518032, + "grad_norm": 0.6197422742843628, + "learning_rate": 1.0702341137123746e-05, + "loss": 1.0558, + "step": 268 + }, + { + "epoch": 0.1687049231734086, + "grad_norm": 0.6543489098548889, + "learning_rate": 1.036789297658863e-05, + "loss": 0.9776, + "step": 269 + }, + { + "epoch": 0.16933207902163688, + "grad_norm": 0.6723306775093079, + "learning_rate": 1.0033444816053512e-05, + "loss": 1.0513, + "step": 270 + }, + { + "epoch": 0.16995923486986517, + "grad_norm": 0.693151593208313, + "learning_rate": 9.698996655518395e-06, + "loss": 0.9982, + "step": 271 + }, + { + "epoch": 0.17058639071809345, + "grad_norm": 0.6714438796043396, + "learning_rate": 9.364548494983277e-06, + "loss": 1.0118, + "step": 272 + }, + { + "epoch": 0.17121354656632173, + "grad_norm": 0.6719355583190918, + "learning_rate": 9.03010033444816e-06, + "loss": 1.0448, + "step": 273 + }, + { + "epoch": 0.17184070241455002, + "grad_norm": 0.61588054895401, + "learning_rate": 8.695652173913044e-06, + "loss": 1.0432, + "step": 274 + }, + { + "epoch": 0.1724678582627783, + "grad_norm": 0.6188952326774597, + "learning_rate": 8.361204013377926e-06, + "loss": 1.0968, + "step": 275 + }, + { + "epoch": 0.17309501411100658, + "grad_norm": 0.6287317276000977, + "learning_rate": 8.02675585284281e-06, + "loss": 1.061, + "step": 276 + }, + { + "epoch": 0.17372216995923487, + "grad_norm": 0.6071834564208984, + "learning_rate": 7.692307692307694e-06, + "loss": 1.1155, + "step": 277 + }, + { + "epoch": 0.17434932580746315, + "grad_norm": 0.6347874999046326, + "learning_rate": 7.357859531772576e-06, + "loss": 1.0195, + "step": 278 + }, + { + "epoch": 0.17497648165569143, + "grad_norm": 0.7258687019348145, + "learning_rate": 7.023411371237458e-06, + "loss": 1.1093, + "step": 279 + }, + { + "epoch": 0.17560363750391972, + "grad_norm": 0.6398435235023499, + "learning_rate": 6.688963210702341e-06, + "loss": 1.0629, + "step": 280 + }, + { + "epoch": 0.176230793352148, + "grad_norm": 0.6699943542480469, + "learning_rate": 6.354515050167225e-06, + "loss": 1.0282, + "step": 281 + }, + { + "epoch": 0.17685794920037629, + "grad_norm": 0.6543219089508057, + "learning_rate": 6.0200668896321075e-06, + "loss": 1.0528, + "step": 282 + }, + { + "epoch": 0.17748510504860457, + "grad_norm": 0.6560432314872742, + "learning_rate": 5.68561872909699e-06, + "loss": 1.042, + "step": 283 + }, + { + "epoch": 0.17811226089683285, + "grad_norm": 0.604159414768219, + "learning_rate": 5.351170568561873e-06, + "loss": 1.0298, + "step": 284 + }, + { + "epoch": 0.17873941674506114, + "grad_norm": 0.57277512550354, + "learning_rate": 5.016722408026756e-06, + "loss": 1.0291, + "step": 285 + }, + { + "epoch": 0.17936657259328942, + "grad_norm": 0.6005733609199524, + "learning_rate": 4.682274247491639e-06, + "loss": 1.066, + "step": 286 + }, + { + "epoch": 0.17999372844151773, + "grad_norm": 0.6378143429756165, + "learning_rate": 4.347826086956522e-06, + "loss": 1.1147, + "step": 287 + }, + { + "epoch": 0.18062088428974601, + "grad_norm": 0.6788357496261597, + "learning_rate": 4.013377926421405e-06, + "loss": 0.9718, + "step": 288 + }, + { + "epoch": 0.1812480401379743, + "grad_norm": 0.6375592947006226, + "learning_rate": 3.678929765886288e-06, + "loss": 0.9952, + "step": 289 + }, + { + "epoch": 0.18187519598620258, + "grad_norm": 0.7433466911315918, + "learning_rate": 3.3444816053511705e-06, + "loss": 1.0636, + "step": 290 + }, + { + "epoch": 0.18250235183443086, + "grad_norm": 0.6593033671379089, + "learning_rate": 3.0100334448160537e-06, + "loss": 1.0491, + "step": 291 + }, + { + "epoch": 0.18312950768265915, + "grad_norm": 0.6319881677627563, + "learning_rate": 2.6755852842809365e-06, + "loss": 1.0633, + "step": 292 + }, + { + "epoch": 0.18375666353088743, + "grad_norm": 0.6874313950538635, + "learning_rate": 2.3411371237458193e-06, + "loss": 1.0182, + "step": 293 + }, + { + "epoch": 0.18438381937911572, + "grad_norm": 0.6446201205253601, + "learning_rate": 2.0066889632107025e-06, + "loss": 1.0318, + "step": 294 + }, + { + "epoch": 0.185010975227344, + "grad_norm": 0.6754373908042908, + "learning_rate": 1.6722408026755853e-06, + "loss": 1.0485, + "step": 295 + }, + { + "epoch": 0.18563813107557228, + "grad_norm": 0.6547301411628723, + "learning_rate": 1.3377926421404683e-06, + "loss": 1.0043, + "step": 296 + }, + { + "epoch": 0.18626528692380057, + "grad_norm": 0.603600263595581, + "learning_rate": 1.0033444816053512e-06, + "loss": 1.0574, + "step": 297 + }, + { + "epoch": 0.18689244277202885, + "grad_norm": 0.6556864380836487, + "learning_rate": 6.688963210702341e-07, + "loss": 1.0751, + "step": 298 + }, + { + "epoch": 0.18751959862025713, + "grad_norm": 0.6452258229255676, + "learning_rate": 3.3444816053511706e-07, + "loss": 1.0567, + "step": 299 + }, + { + "epoch": 0.18814675446848542, + "grad_norm": 0.6500896215438843, + "learning_rate": 0.0, + "loss": 1.0883, + "step": 300 + } + ], + "logging_steps": 1, + "max_steps": 300, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 3.766382062809907e+16, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/llama_English_Chinese/checkpoint-300/training_args.bin b/llama_English_Chinese/checkpoint-300/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..197ef3925f1a49c82557f295b52432c5aaae2bb3 --- /dev/null +++ b/llama_English_Chinese/checkpoint-300/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8118115b79eaff2a894f4c4dd0b288d8aadc922eedb579f30793defdfa89d32 +size 5624 diff --git a/llama_English_French/checkpoint-300/README.md b/llama_English_French/checkpoint-300/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ec91fdd050e2a5613ef04bb6c8fc77a63b8b10a1 --- /dev/null +++ b/llama_English_French/checkpoint-300/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.14.0 \ No newline at end of file diff --git a/llama_English_French/checkpoint-300/adapter_config.json b/llama_English_French/checkpoint-300/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a70e77e25f3e350f82b012e6927480f3e837652a --- /dev/null +++ b/llama_English_French/checkpoint-300/adapter_config.json @@ -0,0 +1,37 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit", + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "gate_proj", + "q_proj", + "down_proj", + "o_proj", + "up_proj", + "k_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/llama_English_French/checkpoint-300/adapter_model.safetensors b/llama_English_French/checkpoint-300/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e1aab71e36fc48eff5ed39fb035b4231e54efcd6 --- /dev/null +++ b/llama_English_French/checkpoint-300/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:793c851883ee48a3bfeb42470a6febb902891bae0fb4b878cacb04c789b1d585 +size 167832240 diff --git a/llama_English_French/checkpoint-300/optimizer.pt b/llama_English_French/checkpoint-300/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..f1f4b55662cc48df51ca272aaa0dd02660152700 --- /dev/null +++ b/llama_English_French/checkpoint-300/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:188ba8410552ddc178adf1f7ad90c99bca5ec4d4dcf87d496473d4bd546e539d +size 85723732 diff --git a/llama_English_French/checkpoint-300/rng_state.pth b/llama_English_French/checkpoint-300/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1f8291cd6ce87668b786a72f3e93d072fbe54902 --- /dev/null +++ b/llama_English_French/checkpoint-300/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4c917636c7a58af68a29056522a757e9f9b99005b776641aa157c536967817d +size 14244 diff --git a/llama_English_French/checkpoint-300/scheduler.pt b/llama_English_French/checkpoint-300/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6e209dcc25b944d4b85ca13bea87ea706231c655 --- /dev/null +++ b/llama_English_French/checkpoint-300/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec7445dd881aa0e896fdc6ef61cf9c541f4299feaa6850ba7cac238afd6649f3 +size 1064 diff --git a/llama_English_French/checkpoint-300/special_tokens_map.json b/llama_English_French/checkpoint-300/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/llama_English_French/checkpoint-300/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/llama_English_French/checkpoint-300/tokenizer.json b/llama_English_French/checkpoint-300/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/llama_English_French/checkpoint-300/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/llama_English_French/checkpoint-300/tokenizer_config.json b/llama_English_French/checkpoint-300/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..447bd30f21e8c20e9e6497d6cfc650a3881e26a6 --- /dev/null +++ b/llama_English_French/checkpoint-300/tokenizer_config.json @@ -0,0 +1,2064 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/llama_English_French/checkpoint-300/trainer_state.json b/llama_English_French/checkpoint-300/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..abee8c964e3fa9e9a90b2b2335c01d302384898d --- /dev/null +++ b/llama_English_French/checkpoint-300/trainer_state.json @@ -0,0 +1,2133 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.18814675446848542, + "eval_steps": 500, + "global_step": 300, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0006271558482282847, + "grad_norm": 1.1241575479507446, + "learning_rate": 0.0001, + "loss": 2.371, + "step": 1 + }, + { + "epoch": 0.0012543116964565694, + "grad_norm": 1.0141234397888184, + "learning_rate": 9.966555183946489e-05, + "loss": 2.1534, + "step": 2 + }, + { + "epoch": 0.0018814675446848542, + "grad_norm": 0.9427400231361389, + "learning_rate": 9.933110367892977e-05, + "loss": 2.3618, + "step": 3 + }, + { + "epoch": 0.002508623392913139, + "grad_norm": 0.9919387698173523, + "learning_rate": 9.899665551839465e-05, + "loss": 2.1973, + "step": 4 + }, + { + "epoch": 0.0031357792411414237, + "grad_norm": 1.042110562324524, + "learning_rate": 9.866220735785953e-05, + "loss": 2.0364, + "step": 5 + }, + { + "epoch": 0.0037629350893697085, + "grad_norm": 1.1066715717315674, + "learning_rate": 9.832775919732441e-05, + "loss": 1.7375, + "step": 6 + }, + { + "epoch": 0.004390090937597993, + "grad_norm": 1.1894862651824951, + "learning_rate": 9.799331103678931e-05, + "loss": 1.6476, + "step": 7 + }, + { + "epoch": 0.005017246785826278, + "grad_norm": 0.9607447385787964, + "learning_rate": 9.765886287625419e-05, + "loss": 1.4399, + "step": 8 + }, + { + "epoch": 0.005644402634054563, + "grad_norm": 0.8756276369094849, + "learning_rate": 9.732441471571907e-05, + "loss": 1.4069, + "step": 9 + }, + { + "epoch": 0.006271558482282847, + "grad_norm": 0.8891203999519348, + "learning_rate": 9.698996655518396e-05, + "loss": 1.3944, + "step": 10 + }, + { + "epoch": 0.006898714330511132, + "grad_norm": 0.9099848866462708, + "learning_rate": 9.665551839464884e-05, + "loss": 1.2704, + "step": 11 + }, + { + "epoch": 0.007525870178739417, + "grad_norm": 0.6798804998397827, + "learning_rate": 9.632107023411372e-05, + "loss": 1.2606, + "step": 12 + }, + { + "epoch": 0.008153026026967701, + "grad_norm": 0.4697795808315277, + "learning_rate": 9.59866220735786e-05, + "loss": 1.2039, + "step": 13 + }, + { + "epoch": 0.008780181875195987, + "grad_norm": 0.49036362767219543, + "learning_rate": 9.565217391304348e-05, + "loss": 1.2249, + "step": 14 + }, + { + "epoch": 0.00940733772342427, + "grad_norm": 0.4770255982875824, + "learning_rate": 9.531772575250837e-05, + "loss": 1.1787, + "step": 15 + }, + { + "epoch": 0.010034493571652555, + "grad_norm": 0.4494694769382477, + "learning_rate": 9.498327759197325e-05, + "loss": 1.2239, + "step": 16 + }, + { + "epoch": 0.01066164941988084, + "grad_norm": 0.4780626595020294, + "learning_rate": 9.464882943143813e-05, + "loss": 1.1149, + "step": 17 + }, + { + "epoch": 0.011288805268109126, + "grad_norm": 0.4681190550327301, + "learning_rate": 9.431438127090302e-05, + "loss": 1.1354, + "step": 18 + }, + { + "epoch": 0.01191596111633741, + "grad_norm": 0.49637556076049805, + "learning_rate": 9.39799331103679e-05, + "loss": 1.1951, + "step": 19 + }, + { + "epoch": 0.012543116964565695, + "grad_norm": 0.5190874338150024, + "learning_rate": 9.364548494983279e-05, + "loss": 1.1399, + "step": 20 + }, + { + "epoch": 0.01317027281279398, + "grad_norm": 0.5862295627593994, + "learning_rate": 9.331103678929767e-05, + "loss": 1.1189, + "step": 21 + }, + { + "epoch": 0.013797428661022263, + "grad_norm": 0.5344688892364502, + "learning_rate": 9.297658862876255e-05, + "loss": 1.1553, + "step": 22 + }, + { + "epoch": 0.014424584509250549, + "grad_norm": 0.5440303683280945, + "learning_rate": 9.264214046822743e-05, + "loss": 1.2029, + "step": 23 + }, + { + "epoch": 0.015051740357478834, + "grad_norm": 0.48431307077407837, + "learning_rate": 9.230769230769232e-05, + "loss": 1.1198, + "step": 24 + }, + { + "epoch": 0.01567889620570712, + "grad_norm": 0.5911933779716492, + "learning_rate": 9.19732441471572e-05, + "loss": 1.1882, + "step": 25 + }, + { + "epoch": 0.016306052053935403, + "grad_norm": 0.6006789803504944, + "learning_rate": 9.163879598662207e-05, + "loss": 1.0898, + "step": 26 + }, + { + "epoch": 0.016933207902163686, + "grad_norm": 0.6420937180519104, + "learning_rate": 9.130434782608696e-05, + "loss": 1.1844, + "step": 27 + }, + { + "epoch": 0.017560363750391973, + "grad_norm": 0.7237846851348877, + "learning_rate": 9.096989966555184e-05, + "loss": 1.1115, + "step": 28 + }, + { + "epoch": 0.018187519598620257, + "grad_norm": 0.7362813353538513, + "learning_rate": 9.063545150501673e-05, + "loss": 1.1492, + "step": 29 + }, + { + "epoch": 0.01881467544684854, + "grad_norm": 0.7279103398323059, + "learning_rate": 9.030100334448161e-05, + "loss": 1.0738, + "step": 30 + }, + { + "epoch": 0.019441831295076827, + "grad_norm": 0.7517884969711304, + "learning_rate": 8.996655518394649e-05, + "loss": 1.0702, + "step": 31 + }, + { + "epoch": 0.02006898714330511, + "grad_norm": 0.803814172744751, + "learning_rate": 8.963210702341137e-05, + "loss": 0.9605, + "step": 32 + }, + { + "epoch": 0.020696142991533398, + "grad_norm": 0.7744300961494446, + "learning_rate": 8.929765886287625e-05, + "loss": 1.0486, + "step": 33 + }, + { + "epoch": 0.02132329883976168, + "grad_norm": 0.8029720783233643, + "learning_rate": 8.896321070234114e-05, + "loss": 1.0673, + "step": 34 + }, + { + "epoch": 0.021950454687989965, + "grad_norm": 0.7909408211708069, + "learning_rate": 8.862876254180602e-05, + "loss": 1.1001, + "step": 35 + }, + { + "epoch": 0.022577610536218252, + "grad_norm": 0.6341575980186462, + "learning_rate": 8.82943143812709e-05, + "loss": 1.1613, + "step": 36 + }, + { + "epoch": 0.023204766384446535, + "grad_norm": 0.4735874831676483, + "learning_rate": 8.795986622073578e-05, + "loss": 0.9744, + "step": 37 + }, + { + "epoch": 0.02383192223267482, + "grad_norm": 0.36456331610679626, + "learning_rate": 8.762541806020068e-05, + "loss": 1.0535, + "step": 38 + }, + { + "epoch": 0.024459078080903106, + "grad_norm": 0.43026307225227356, + "learning_rate": 8.729096989966556e-05, + "loss": 1.0974, + "step": 39 + }, + { + "epoch": 0.02508623392913139, + "grad_norm": 0.4285936653614044, + "learning_rate": 8.695652173913044e-05, + "loss": 1.2176, + "step": 40 + }, + { + "epoch": 0.025713389777359673, + "grad_norm": 0.33779293298721313, + "learning_rate": 8.662207357859532e-05, + "loss": 1.0243, + "step": 41 + }, + { + "epoch": 0.02634054562558796, + "grad_norm": 0.3825758099555969, + "learning_rate": 8.62876254180602e-05, + "loss": 1.1127, + "step": 42 + }, + { + "epoch": 0.026967701473816243, + "grad_norm": 0.38178229331970215, + "learning_rate": 8.595317725752509e-05, + "loss": 1.058, + "step": 43 + }, + { + "epoch": 0.027594857322044527, + "grad_norm": 0.3562932312488556, + "learning_rate": 8.561872909698997e-05, + "loss": 1.0776, + "step": 44 + }, + { + "epoch": 0.028222013170272814, + "grad_norm": 0.41771742701530457, + "learning_rate": 8.528428093645485e-05, + "loss": 1.1033, + "step": 45 + }, + { + "epoch": 0.028849169018501097, + "grad_norm": 0.40144434571266174, + "learning_rate": 8.494983277591973e-05, + "loss": 1.0842, + "step": 46 + }, + { + "epoch": 0.02947632486672938, + "grad_norm": 0.4194628894329071, + "learning_rate": 8.461538461538461e-05, + "loss": 1.0986, + "step": 47 + }, + { + "epoch": 0.030103480714957668, + "grad_norm": 0.3551357388496399, + "learning_rate": 8.42809364548495e-05, + "loss": 0.9806, + "step": 48 + }, + { + "epoch": 0.03073063656318595, + "grad_norm": 0.4674742519855499, + "learning_rate": 8.394648829431439e-05, + "loss": 1.1232, + "step": 49 + }, + { + "epoch": 0.03135779241141424, + "grad_norm": 0.3798663020133972, + "learning_rate": 8.361204013377927e-05, + "loss": 1.0847, + "step": 50 + }, + { + "epoch": 0.03198494825964252, + "grad_norm": 0.37322738766670227, + "learning_rate": 8.327759197324416e-05, + "loss": 1.0732, + "step": 51 + }, + { + "epoch": 0.032612104107870805, + "grad_norm": 0.39893415570259094, + "learning_rate": 8.294314381270904e-05, + "loss": 1.0823, + "step": 52 + }, + { + "epoch": 0.03323925995609909, + "grad_norm": 0.4456530809402466, + "learning_rate": 8.260869565217392e-05, + "loss": 1.0817, + "step": 53 + }, + { + "epoch": 0.03386641580432737, + "grad_norm": 0.37753430008888245, + "learning_rate": 8.22742474916388e-05, + "loss": 1.0824, + "step": 54 + }, + { + "epoch": 0.03449357165255566, + "grad_norm": 0.3684673607349396, + "learning_rate": 8.193979933110368e-05, + "loss": 1.0696, + "step": 55 + }, + { + "epoch": 0.035120727500783946, + "grad_norm": 0.3490143120288849, + "learning_rate": 8.160535117056857e-05, + "loss": 1.0141, + "step": 56 + }, + { + "epoch": 0.03574788334901223, + "grad_norm": 0.3897489309310913, + "learning_rate": 8.127090301003345e-05, + "loss": 1.0179, + "step": 57 + }, + { + "epoch": 0.036375039197240513, + "grad_norm": 0.38840609788894653, + "learning_rate": 8.093645484949833e-05, + "loss": 1.0654, + "step": 58 + }, + { + "epoch": 0.0370021950454688, + "grad_norm": 0.4170911908149719, + "learning_rate": 8.060200668896321e-05, + "loss": 1.1377, + "step": 59 + }, + { + "epoch": 0.03762935089369708, + "grad_norm": 0.45000165700912476, + "learning_rate": 8.026755852842809e-05, + "loss": 1.091, + "step": 60 + }, + { + "epoch": 0.03825650674192537, + "grad_norm": 0.36387529969215393, + "learning_rate": 7.993311036789299e-05, + "loss": 1.0408, + "step": 61 + }, + { + "epoch": 0.038883662590153655, + "grad_norm": 0.40481963753700256, + "learning_rate": 7.959866220735787e-05, + "loss": 1.0383, + "step": 62 + }, + { + "epoch": 0.03951081843838194, + "grad_norm": 0.4196191728115082, + "learning_rate": 7.926421404682275e-05, + "loss": 1.0607, + "step": 63 + }, + { + "epoch": 0.04013797428661022, + "grad_norm": 0.36332547664642334, + "learning_rate": 7.892976588628763e-05, + "loss": 1.0907, + "step": 64 + }, + { + "epoch": 0.040765130134838505, + "grad_norm": 0.3924827575683594, + "learning_rate": 7.859531772575252e-05, + "loss": 1.1283, + "step": 65 + }, + { + "epoch": 0.041392285983066796, + "grad_norm": 0.42891165614128113, + "learning_rate": 7.82608695652174e-05, + "loss": 1.0302, + "step": 66 + }, + { + "epoch": 0.04201944183129508, + "grad_norm": 0.3694935441017151, + "learning_rate": 7.792642140468228e-05, + "loss": 1.0455, + "step": 67 + }, + { + "epoch": 0.04264659767952336, + "grad_norm": 0.36729368567466736, + "learning_rate": 7.759197324414716e-05, + "loss": 0.9672, + "step": 68 + }, + { + "epoch": 0.043273753527751646, + "grad_norm": 0.4106704890727997, + "learning_rate": 7.725752508361204e-05, + "loss": 1.0755, + "step": 69 + }, + { + "epoch": 0.04390090937597993, + "grad_norm": 0.3694314956665039, + "learning_rate": 7.692307692307693e-05, + "loss": 1.0127, + "step": 70 + }, + { + "epoch": 0.04452806522420821, + "grad_norm": 0.40102723240852356, + "learning_rate": 7.658862876254181e-05, + "loss": 1.0281, + "step": 71 + }, + { + "epoch": 0.045155221072436504, + "grad_norm": 0.481668621301651, + "learning_rate": 7.62541806020067e-05, + "loss": 1.0825, + "step": 72 + }, + { + "epoch": 0.04578237692066479, + "grad_norm": 0.4356762170791626, + "learning_rate": 7.591973244147159e-05, + "loss": 1.0739, + "step": 73 + }, + { + "epoch": 0.04640953276889307, + "grad_norm": 0.3791458010673523, + "learning_rate": 7.558528428093647e-05, + "loss": 1.1232, + "step": 74 + }, + { + "epoch": 0.047036688617121354, + "grad_norm": 0.4018148183822632, + "learning_rate": 7.525083612040135e-05, + "loss": 1.1061, + "step": 75 + }, + { + "epoch": 0.04766384446534964, + "grad_norm": 0.35003310441970825, + "learning_rate": 7.491638795986622e-05, + "loss": 1.0553, + "step": 76 + }, + { + "epoch": 0.04829100031357792, + "grad_norm": 0.3296242952346802, + "learning_rate": 7.45819397993311e-05, + "loss": 0.9636, + "step": 77 + }, + { + "epoch": 0.04891815616180621, + "grad_norm": 0.3522872030735016, + "learning_rate": 7.424749163879598e-05, + "loss": 1.029, + "step": 78 + }, + { + "epoch": 0.049545312010034495, + "grad_norm": 0.3733522295951843, + "learning_rate": 7.391304347826086e-05, + "loss": 1.0913, + "step": 79 + }, + { + "epoch": 0.05017246785826278, + "grad_norm": 0.38543638586997986, + "learning_rate": 7.357859531772575e-05, + "loss": 1.0734, + "step": 80 + }, + { + "epoch": 0.05079962370649106, + "grad_norm": 0.36167678236961365, + "learning_rate": 7.324414715719064e-05, + "loss": 1.07, + "step": 81 + }, + { + "epoch": 0.051426779554719346, + "grad_norm": 0.4997354745864868, + "learning_rate": 7.290969899665552e-05, + "loss": 1.1243, + "step": 82 + }, + { + "epoch": 0.05205393540294763, + "grad_norm": 0.39237678050994873, + "learning_rate": 7.25752508361204e-05, + "loss": 1.0661, + "step": 83 + }, + { + "epoch": 0.05268109125117592, + "grad_norm": 0.47913259267807007, + "learning_rate": 7.224080267558529e-05, + "loss": 1.029, + "step": 84 + }, + { + "epoch": 0.0533082470994042, + "grad_norm": 0.4255577325820923, + "learning_rate": 7.190635451505017e-05, + "loss": 1.134, + "step": 85 + }, + { + "epoch": 0.05393540294763249, + "grad_norm": 0.398740291595459, + "learning_rate": 7.157190635451505e-05, + "loss": 1.0946, + "step": 86 + }, + { + "epoch": 0.05456255879586077, + "grad_norm": 0.44168323278427124, + "learning_rate": 7.123745819397993e-05, + "loss": 1.1035, + "step": 87 + }, + { + "epoch": 0.055189714644089054, + "grad_norm": 0.3680216372013092, + "learning_rate": 7.090301003344481e-05, + "loss": 1.0278, + "step": 88 + }, + { + "epoch": 0.055816870492317344, + "grad_norm": 0.37274274230003357, + "learning_rate": 7.05685618729097e-05, + "loss": 1.0752, + "step": 89 + }, + { + "epoch": 0.05644402634054563, + "grad_norm": 0.3773200213909149, + "learning_rate": 7.023411371237458e-05, + "loss": 1.0778, + "step": 90 + }, + { + "epoch": 0.05707118218877391, + "grad_norm": 0.3545572757720947, + "learning_rate": 6.989966555183946e-05, + "loss": 1.0186, + "step": 91 + }, + { + "epoch": 0.057698338037002195, + "grad_norm": 0.39364176988601685, + "learning_rate": 6.956521739130436e-05, + "loss": 1.0964, + "step": 92 + }, + { + "epoch": 0.05832549388523048, + "grad_norm": 0.36303600668907166, + "learning_rate": 6.923076923076924e-05, + "loss": 1.0115, + "step": 93 + }, + { + "epoch": 0.05895264973345876, + "grad_norm": 0.4130772352218628, + "learning_rate": 6.889632107023412e-05, + "loss": 1.0067, + "step": 94 + }, + { + "epoch": 0.05957980558168705, + "grad_norm": 0.32856303453445435, + "learning_rate": 6.8561872909699e-05, + "loss": 0.9777, + "step": 95 + }, + { + "epoch": 0.060206961429915336, + "grad_norm": 0.3589972257614136, + "learning_rate": 6.822742474916388e-05, + "loss": 0.9646, + "step": 96 + }, + { + "epoch": 0.06083411727814362, + "grad_norm": 0.36986011266708374, + "learning_rate": 6.789297658862876e-05, + "loss": 1.0971, + "step": 97 + }, + { + "epoch": 0.0614612731263719, + "grad_norm": 0.38670483231544495, + "learning_rate": 6.755852842809365e-05, + "loss": 1.0684, + "step": 98 + }, + { + "epoch": 0.062088428974600186, + "grad_norm": 0.37243106961250305, + "learning_rate": 6.722408026755853e-05, + "loss": 1.027, + "step": 99 + }, + { + "epoch": 0.06271558482282848, + "grad_norm": 0.3547367751598358, + "learning_rate": 6.688963210702341e-05, + "loss": 1.0162, + "step": 100 + }, + { + "epoch": 0.06334274067105676, + "grad_norm": 0.33387291431427, + "learning_rate": 6.655518394648829e-05, + "loss": 1.0316, + "step": 101 + }, + { + "epoch": 0.06396989651928504, + "grad_norm": 0.36430153250694275, + "learning_rate": 6.622073578595317e-05, + "loss": 1.0509, + "step": 102 + }, + { + "epoch": 0.06459705236751333, + "grad_norm": 0.3841400146484375, + "learning_rate": 6.588628762541807e-05, + "loss": 0.9907, + "step": 103 + }, + { + "epoch": 0.06522420821574161, + "grad_norm": 0.3880312740802765, + "learning_rate": 6.555183946488295e-05, + "loss": 1.1199, + "step": 104 + }, + { + "epoch": 0.0658513640639699, + "grad_norm": 0.40127745270729065, + "learning_rate": 6.521739130434783e-05, + "loss": 1.0608, + "step": 105 + }, + { + "epoch": 0.06647851991219818, + "grad_norm": 0.36271992325782776, + "learning_rate": 6.488294314381272e-05, + "loss": 1.0196, + "step": 106 + }, + { + "epoch": 0.06710567576042646, + "grad_norm": 0.491242378950119, + "learning_rate": 6.45484949832776e-05, + "loss": 1.0333, + "step": 107 + }, + { + "epoch": 0.06773283160865474, + "grad_norm": 0.43671101331710815, + "learning_rate": 6.421404682274248e-05, + "loss": 0.9627, + "step": 108 + }, + { + "epoch": 0.06835998745688304, + "grad_norm": 0.3669928312301636, + "learning_rate": 6.387959866220736e-05, + "loss": 1.0114, + "step": 109 + }, + { + "epoch": 0.06898714330511133, + "grad_norm": 0.36973488330841064, + "learning_rate": 6.354515050167224e-05, + "loss": 1.018, + "step": 110 + }, + { + "epoch": 0.06961429915333961, + "grad_norm": 0.4004829525947571, + "learning_rate": 6.321070234113713e-05, + "loss": 1.1436, + "step": 111 + }, + { + "epoch": 0.07024145500156789, + "grad_norm": 0.37323564291000366, + "learning_rate": 6.287625418060201e-05, + "loss": 1.0113, + "step": 112 + }, + { + "epoch": 0.07086861084979618, + "grad_norm": 0.3787195086479187, + "learning_rate": 6.254180602006689e-05, + "loss": 1.0352, + "step": 113 + }, + { + "epoch": 0.07149576669802446, + "grad_norm": 0.3870258331298828, + "learning_rate": 6.220735785953178e-05, + "loss": 1.0475, + "step": 114 + }, + { + "epoch": 0.07212292254625274, + "grad_norm": 0.4817638099193573, + "learning_rate": 6.187290969899667e-05, + "loss": 1.0326, + "step": 115 + }, + { + "epoch": 0.07275007839448103, + "grad_norm": 0.39872509241104126, + "learning_rate": 6.153846153846155e-05, + "loss": 1.024, + "step": 116 + }, + { + "epoch": 0.07337723424270931, + "grad_norm": 0.45137402415275574, + "learning_rate": 6.120401337792643e-05, + "loss": 1.1143, + "step": 117 + }, + { + "epoch": 0.0740043900909376, + "grad_norm": 0.39231353998184204, + "learning_rate": 6.086956521739131e-05, + "loss": 1.0349, + "step": 118 + }, + { + "epoch": 0.07463154593916588, + "grad_norm": 0.35880520939826965, + "learning_rate": 6.0535117056856194e-05, + "loss": 0.9345, + "step": 119 + }, + { + "epoch": 0.07525870178739416, + "grad_norm": 0.3513333797454834, + "learning_rate": 6.0200668896321076e-05, + "loss": 0.9967, + "step": 120 + }, + { + "epoch": 0.07588585763562246, + "grad_norm": 0.35939666628837585, + "learning_rate": 5.986622073578596e-05, + "loss": 1.0429, + "step": 121 + }, + { + "epoch": 0.07651301348385074, + "grad_norm": 0.4144527018070221, + "learning_rate": 5.953177257525085e-05, + "loss": 1.0793, + "step": 122 + }, + { + "epoch": 0.07714016933207903, + "grad_norm": 0.37052595615386963, + "learning_rate": 5.919732441471573e-05, + "loss": 1.0402, + "step": 123 + }, + { + "epoch": 0.07776732518030731, + "grad_norm": 0.3883453607559204, + "learning_rate": 5.886287625418061e-05, + "loss": 1.0859, + "step": 124 + }, + { + "epoch": 0.07839448102853559, + "grad_norm": 0.3768259286880493, + "learning_rate": 5.852842809364549e-05, + "loss": 1.0231, + "step": 125 + }, + { + "epoch": 0.07902163687676388, + "grad_norm": 0.3901348412036896, + "learning_rate": 5.819397993311037e-05, + "loss": 1.0889, + "step": 126 + }, + { + "epoch": 0.07964879272499216, + "grad_norm": 0.404969185590744, + "learning_rate": 5.785953177257525e-05, + "loss": 0.9751, + "step": 127 + }, + { + "epoch": 0.08027594857322044, + "grad_norm": 0.4729614555835724, + "learning_rate": 5.752508361204013e-05, + "loss": 1.1595, + "step": 128 + }, + { + "epoch": 0.08090310442144873, + "grad_norm": 0.37773218750953674, + "learning_rate": 5.7190635451505014e-05, + "loss": 1.0176, + "step": 129 + }, + { + "epoch": 0.08153026026967701, + "grad_norm": 0.3693353831768036, + "learning_rate": 5.6856187290969896e-05, + "loss": 1.0628, + "step": 130 + }, + { + "epoch": 0.0821574161179053, + "grad_norm": 0.3893721103668213, + "learning_rate": 5.652173913043478e-05, + "loss": 1.083, + "step": 131 + }, + { + "epoch": 0.08278457196613359, + "grad_norm": 0.3790439963340759, + "learning_rate": 5.6187290969899666e-05, + "loss": 1.0221, + "step": 132 + }, + { + "epoch": 0.08341172781436187, + "grad_norm": 0.37359848618507385, + "learning_rate": 5.585284280936455e-05, + "loss": 0.9618, + "step": 133 + }, + { + "epoch": 0.08403888366259016, + "grad_norm": 0.3717849552631378, + "learning_rate": 5.551839464882943e-05, + "loss": 1.0401, + "step": 134 + }, + { + "epoch": 0.08466603951081844, + "grad_norm": 0.3849802613258362, + "learning_rate": 5.518394648829431e-05, + "loss": 1.0028, + "step": 135 + }, + { + "epoch": 0.08529319535904673, + "grad_norm": 0.3668459355831146, + "learning_rate": 5.4849498327759194e-05, + "loss": 1.0282, + "step": 136 + }, + { + "epoch": 0.08592035120727501, + "grad_norm": 0.3450651466846466, + "learning_rate": 5.451505016722408e-05, + "loss": 0.953, + "step": 137 + }, + { + "epoch": 0.08654750705550329, + "grad_norm": 0.4239393472671509, + "learning_rate": 5.4180602006688965e-05, + "loss": 1.0164, + "step": 138 + }, + { + "epoch": 0.08717466290373158, + "grad_norm": 0.3869022727012634, + "learning_rate": 5.384615384615385e-05, + "loss": 1.0557, + "step": 139 + }, + { + "epoch": 0.08780181875195986, + "grad_norm": 0.3623475432395935, + "learning_rate": 5.351170568561873e-05, + "loss": 1.0586, + "step": 140 + }, + { + "epoch": 0.08842897460018814, + "grad_norm": 0.39329102635383606, + "learning_rate": 5.317725752508361e-05, + "loss": 1.0265, + "step": 141 + }, + { + "epoch": 0.08905613044841643, + "grad_norm": 0.4004840552806854, + "learning_rate": 5.284280936454849e-05, + "loss": 1.0591, + "step": 142 + }, + { + "epoch": 0.08968328629664471, + "grad_norm": 0.4108268618583679, + "learning_rate": 5.250836120401338e-05, + "loss": 1.0628, + "step": 143 + }, + { + "epoch": 0.09031044214487301, + "grad_norm": 0.4385989308357239, + "learning_rate": 5.217391304347826e-05, + "loss": 1.0755, + "step": 144 + }, + { + "epoch": 0.09093759799310129, + "grad_norm": 0.43369996547698975, + "learning_rate": 5.1839464882943145e-05, + "loss": 1.0681, + "step": 145 + }, + { + "epoch": 0.09156475384132957, + "grad_norm": 0.3623196482658386, + "learning_rate": 5.150501672240803e-05, + "loss": 1.0098, + "step": 146 + }, + { + "epoch": 0.09219190968955786, + "grad_norm": 0.37398290634155273, + "learning_rate": 5.117056856187291e-05, + "loss": 0.9872, + "step": 147 + }, + { + "epoch": 0.09281906553778614, + "grad_norm": 0.3824230134487152, + "learning_rate": 5.08361204013378e-05, + "loss": 1.0274, + "step": 148 + }, + { + "epoch": 0.09344622138601442, + "grad_norm": 0.40129098296165466, + "learning_rate": 5.050167224080268e-05, + "loss": 1.057, + "step": 149 + }, + { + "epoch": 0.09407337723424271, + "grad_norm": 0.3967929780483246, + "learning_rate": 5.016722408026756e-05, + "loss": 1.0452, + "step": 150 + }, + { + "epoch": 0.09470053308247099, + "grad_norm": 0.4444187581539154, + "learning_rate": 4.983277591973244e-05, + "loss": 1.0567, + "step": 151 + }, + { + "epoch": 0.09532768893069928, + "grad_norm": 0.44575589895248413, + "learning_rate": 4.9498327759197325e-05, + "loss": 0.9791, + "step": 152 + }, + { + "epoch": 0.09595484477892756, + "grad_norm": 0.37481924891471863, + "learning_rate": 4.916387959866221e-05, + "loss": 1.0127, + "step": 153 + }, + { + "epoch": 0.09658200062715584, + "grad_norm": 0.38331514596939087, + "learning_rate": 4.8829431438127096e-05, + "loss": 1.0009, + "step": 154 + }, + { + "epoch": 0.09720915647538414, + "grad_norm": 0.41739198565483093, + "learning_rate": 4.849498327759198e-05, + "loss": 1.0721, + "step": 155 + }, + { + "epoch": 0.09783631232361242, + "grad_norm": 0.401023805141449, + "learning_rate": 4.816053511705686e-05, + "loss": 1.0375, + "step": 156 + }, + { + "epoch": 0.0984634681718407, + "grad_norm": 0.38500455021858215, + "learning_rate": 4.782608695652174e-05, + "loss": 1.045, + "step": 157 + }, + { + "epoch": 0.09909062402006899, + "grad_norm": 0.4293578863143921, + "learning_rate": 4.7491638795986624e-05, + "loss": 1.056, + "step": 158 + }, + { + "epoch": 0.09971777986829727, + "grad_norm": 0.4320215880870819, + "learning_rate": 4.715719063545151e-05, + "loss": 0.9005, + "step": 159 + }, + { + "epoch": 0.10034493571652556, + "grad_norm": 0.4018799960613251, + "learning_rate": 4.6822742474916394e-05, + "loss": 1.0486, + "step": 160 + }, + { + "epoch": 0.10097209156475384, + "grad_norm": 0.41116300225257874, + "learning_rate": 4.6488294314381276e-05, + "loss": 1.0391, + "step": 161 + }, + { + "epoch": 0.10159924741298212, + "grad_norm": 0.40958237648010254, + "learning_rate": 4.615384615384616e-05, + "loss": 1.1217, + "step": 162 + }, + { + "epoch": 0.10222640326121041, + "grad_norm": 0.41457492113113403, + "learning_rate": 4.581939799331103e-05, + "loss": 1.031, + "step": 163 + }, + { + "epoch": 0.10285355910943869, + "grad_norm": 0.41084182262420654, + "learning_rate": 4.548494983277592e-05, + "loss": 1.0815, + "step": 164 + }, + { + "epoch": 0.10348071495766697, + "grad_norm": 0.39257150888442993, + "learning_rate": 4.5150501672240804e-05, + "loss": 1.0174, + "step": 165 + }, + { + "epoch": 0.10410787080589526, + "grad_norm": 0.38952505588531494, + "learning_rate": 4.4816053511705686e-05, + "loss": 1.0691, + "step": 166 + }, + { + "epoch": 0.10473502665412356, + "grad_norm": 0.44120845198631287, + "learning_rate": 4.448160535117057e-05, + "loss": 1.0508, + "step": 167 + }, + { + "epoch": 0.10536218250235184, + "grad_norm": 0.4178633391857147, + "learning_rate": 4.414715719063545e-05, + "loss": 1.0608, + "step": 168 + }, + { + "epoch": 0.10598933835058012, + "grad_norm": 0.3926730453968048, + "learning_rate": 4.381270903010034e-05, + "loss": 1.0186, + "step": 169 + }, + { + "epoch": 0.1066164941988084, + "grad_norm": 0.4078935980796814, + "learning_rate": 4.347826086956522e-05, + "loss": 1.0849, + "step": 170 + }, + { + "epoch": 0.10724365004703669, + "grad_norm": 0.38935065269470215, + "learning_rate": 4.31438127090301e-05, + "loss": 1.0418, + "step": 171 + }, + { + "epoch": 0.10787080589526497, + "grad_norm": 0.4126635193824768, + "learning_rate": 4.2809364548494984e-05, + "loss": 1.0002, + "step": 172 + }, + { + "epoch": 0.10849796174349326, + "grad_norm": 0.413591593503952, + "learning_rate": 4.2474916387959866e-05, + "loss": 1.0497, + "step": 173 + }, + { + "epoch": 0.10912511759172154, + "grad_norm": 0.39876362681388855, + "learning_rate": 4.214046822742475e-05, + "loss": 1.036, + "step": 174 + }, + { + "epoch": 0.10975227343994982, + "grad_norm": 0.3766686022281647, + "learning_rate": 4.180602006688964e-05, + "loss": 1.0355, + "step": 175 + }, + { + "epoch": 0.11037942928817811, + "grad_norm": 0.3662855625152588, + "learning_rate": 4.147157190635452e-05, + "loss": 0.9986, + "step": 176 + }, + { + "epoch": 0.11100658513640639, + "grad_norm": 0.3837905526161194, + "learning_rate": 4.11371237458194e-05, + "loss": 1.0735, + "step": 177 + }, + { + "epoch": 0.11163374098463469, + "grad_norm": 0.5081769824028015, + "learning_rate": 4.080267558528428e-05, + "loss": 1.0549, + "step": 178 + }, + { + "epoch": 0.11226089683286297, + "grad_norm": 0.4082931578159332, + "learning_rate": 4.0468227424749165e-05, + "loss": 1.0462, + "step": 179 + }, + { + "epoch": 0.11288805268109126, + "grad_norm": 0.4553743600845337, + "learning_rate": 4.0133779264214046e-05, + "loss": 1.0242, + "step": 180 + }, + { + "epoch": 0.11351520852931954, + "grad_norm": 0.3822484314441681, + "learning_rate": 3.9799331103678935e-05, + "loss": 1.0099, + "step": 181 + }, + { + "epoch": 0.11414236437754782, + "grad_norm": 0.3885160982608795, + "learning_rate": 3.946488294314382e-05, + "loss": 0.9953, + "step": 182 + }, + { + "epoch": 0.1147695202257761, + "grad_norm": 0.38939550518989563, + "learning_rate": 3.91304347826087e-05, + "loss": 1.033, + "step": 183 + }, + { + "epoch": 0.11539667607400439, + "grad_norm": 0.4274287819862366, + "learning_rate": 3.879598662207358e-05, + "loss": 1.0495, + "step": 184 + }, + { + "epoch": 0.11602383192223267, + "grad_norm": 0.4428479075431824, + "learning_rate": 3.846153846153846e-05, + "loss": 0.9925, + "step": 185 + }, + { + "epoch": 0.11665098777046096, + "grad_norm": 0.4075606167316437, + "learning_rate": 3.812709030100335e-05, + "loss": 1.0926, + "step": 186 + }, + { + "epoch": 0.11727814361868924, + "grad_norm": 0.3637610375881195, + "learning_rate": 3.7792642140468233e-05, + "loss": 1.033, + "step": 187 + }, + { + "epoch": 0.11790529946691752, + "grad_norm": 0.38497859239578247, + "learning_rate": 3.745819397993311e-05, + "loss": 0.9976, + "step": 188 + }, + { + "epoch": 0.11853245531514581, + "grad_norm": 0.4265378415584564, + "learning_rate": 3.712374581939799e-05, + "loss": 1.125, + "step": 189 + }, + { + "epoch": 0.1191596111633741, + "grad_norm": 0.42353934049606323, + "learning_rate": 3.678929765886287e-05, + "loss": 1.0472, + "step": 190 + }, + { + "epoch": 0.11978676701160239, + "grad_norm": 0.42161011695861816, + "learning_rate": 3.645484949832776e-05, + "loss": 1.0717, + "step": 191 + }, + { + "epoch": 0.12041392285983067, + "grad_norm": 0.414531409740448, + "learning_rate": 3.612040133779264e-05, + "loss": 1.0454, + "step": 192 + }, + { + "epoch": 0.12104107870805896, + "grad_norm": 0.41244766116142273, + "learning_rate": 3.5785953177257525e-05, + "loss": 1.0387, + "step": 193 + }, + { + "epoch": 0.12166823455628724, + "grad_norm": 0.39831194281578064, + "learning_rate": 3.545150501672241e-05, + "loss": 0.9996, + "step": 194 + }, + { + "epoch": 0.12229539040451552, + "grad_norm": 0.397178053855896, + "learning_rate": 3.511705685618729e-05, + "loss": 1.02, + "step": 195 + }, + { + "epoch": 0.1229225462527438, + "grad_norm": 0.3719254732131958, + "learning_rate": 3.478260869565218e-05, + "loss": 1.0445, + "step": 196 + }, + { + "epoch": 0.12354970210097209, + "grad_norm": 0.39827004075050354, + "learning_rate": 3.444816053511706e-05, + "loss": 1.0226, + "step": 197 + }, + { + "epoch": 0.12417685794920037, + "grad_norm": 0.4362657070159912, + "learning_rate": 3.411371237458194e-05, + "loss": 1.0073, + "step": 198 + }, + { + "epoch": 0.12480401379742866, + "grad_norm": 0.41483965516090393, + "learning_rate": 3.3779264214046823e-05, + "loss": 1.043, + "step": 199 + }, + { + "epoch": 0.12543116964565695, + "grad_norm": 0.40933161973953247, + "learning_rate": 3.3444816053511705e-05, + "loss": 1.0299, + "step": 200 + }, + { + "epoch": 0.12605832549388524, + "grad_norm": 0.3616185784339905, + "learning_rate": 3.311036789297659e-05, + "loss": 0.9555, + "step": 201 + }, + { + "epoch": 0.12668548134211352, + "grad_norm": 0.37591472268104553, + "learning_rate": 3.2775919732441476e-05, + "loss": 1.076, + "step": 202 + }, + { + "epoch": 0.1273126371903418, + "grad_norm": 0.3866356611251831, + "learning_rate": 3.244147157190636e-05, + "loss": 0.9543, + "step": 203 + }, + { + "epoch": 0.1279397930385701, + "grad_norm": 0.4237740933895111, + "learning_rate": 3.210702341137124e-05, + "loss": 1.0667, + "step": 204 + }, + { + "epoch": 0.12856694888679837, + "grad_norm": 0.4242074489593506, + "learning_rate": 3.177257525083612e-05, + "loss": 0.9898, + "step": 205 + }, + { + "epoch": 0.12919410473502665, + "grad_norm": 0.39934250712394714, + "learning_rate": 3.1438127090301004e-05, + "loss": 1.0489, + "step": 206 + }, + { + "epoch": 0.12982126058325494, + "grad_norm": 0.42593199014663696, + "learning_rate": 3.110367892976589e-05, + "loss": 1.0614, + "step": 207 + }, + { + "epoch": 0.13044841643148322, + "grad_norm": 0.4550575315952301, + "learning_rate": 3.0769230769230774e-05, + "loss": 0.9835, + "step": 208 + }, + { + "epoch": 0.1310755722797115, + "grad_norm": 0.4025239944458008, + "learning_rate": 3.0434782608695656e-05, + "loss": 0.9985, + "step": 209 + }, + { + "epoch": 0.1317027281279398, + "grad_norm": 0.41113126277923584, + "learning_rate": 3.0100334448160538e-05, + "loss": 1.04, + "step": 210 + }, + { + "epoch": 0.13232988397616807, + "grad_norm": 0.39338940382003784, + "learning_rate": 2.9765886287625424e-05, + "loss": 1.0446, + "step": 211 + }, + { + "epoch": 0.13295703982439636, + "grad_norm": 0.4386296272277832, + "learning_rate": 2.9431438127090305e-05, + "loss": 1.0588, + "step": 212 + }, + { + "epoch": 0.13358419567262464, + "grad_norm": 0.454953134059906, + "learning_rate": 2.9096989966555184e-05, + "loss": 1.0658, + "step": 213 + }, + { + "epoch": 0.13421135152085292, + "grad_norm": 0.549017071723938, + "learning_rate": 2.8762541806020066e-05, + "loss": 1.0808, + "step": 214 + }, + { + "epoch": 0.1348385073690812, + "grad_norm": 0.4158805012702942, + "learning_rate": 2.8428093645484948e-05, + "loss": 1.0682, + "step": 215 + }, + { + "epoch": 0.1354656632173095, + "grad_norm": 0.37862929701805115, + "learning_rate": 2.8093645484949833e-05, + "loss": 1.057, + "step": 216 + }, + { + "epoch": 0.13609281906553777, + "grad_norm": 0.4133341610431671, + "learning_rate": 2.7759197324414715e-05, + "loss": 1.0314, + "step": 217 + }, + { + "epoch": 0.13671997491376608, + "grad_norm": 0.43372365832328796, + "learning_rate": 2.7424749163879597e-05, + "loss": 1.0514, + "step": 218 + }, + { + "epoch": 0.13734713076199437, + "grad_norm": 0.37303996086120605, + "learning_rate": 2.7090301003344482e-05, + "loss": 0.9123, + "step": 219 + }, + { + "epoch": 0.13797428661022265, + "grad_norm": 0.3717896342277527, + "learning_rate": 2.6755852842809364e-05, + "loss": 0.9934, + "step": 220 + }, + { + "epoch": 0.13860144245845094, + "grad_norm": 0.3860597610473633, + "learning_rate": 2.6421404682274246e-05, + "loss": 1.0206, + "step": 221 + }, + { + "epoch": 0.13922859830667922, + "grad_norm": 0.5044668912887573, + "learning_rate": 2.608695652173913e-05, + "loss": 1.0132, + "step": 222 + }, + { + "epoch": 0.1398557541549075, + "grad_norm": 0.4312911331653595, + "learning_rate": 2.5752508361204013e-05, + "loss": 1.0147, + "step": 223 + }, + { + "epoch": 0.14048291000313579, + "grad_norm": 0.4386849105358124, + "learning_rate": 2.54180602006689e-05, + "loss": 1.0569, + "step": 224 + }, + { + "epoch": 0.14111006585136407, + "grad_norm": 0.42580652236938477, + "learning_rate": 2.508361204013378e-05, + "loss": 1.0415, + "step": 225 + }, + { + "epoch": 0.14173722169959235, + "grad_norm": 0.4034588634967804, + "learning_rate": 2.4749163879598663e-05, + "loss": 1.0761, + "step": 226 + }, + { + "epoch": 0.14236437754782064, + "grad_norm": 0.42452365159988403, + "learning_rate": 2.4414715719063548e-05, + "loss": 1.015, + "step": 227 + }, + { + "epoch": 0.14299153339604892, + "grad_norm": 0.44657889008522034, + "learning_rate": 2.408026755852843e-05, + "loss": 1.0705, + "step": 228 + }, + { + "epoch": 0.1436186892442772, + "grad_norm": 0.4117855131626129, + "learning_rate": 2.3745819397993312e-05, + "loss": 1.0086, + "step": 229 + }, + { + "epoch": 0.1442458450925055, + "grad_norm": 0.44331252574920654, + "learning_rate": 2.3411371237458197e-05, + "loss": 1.0749, + "step": 230 + }, + { + "epoch": 0.14487300094073377, + "grad_norm": 0.43930211663246155, + "learning_rate": 2.307692307692308e-05, + "loss": 1.0463, + "step": 231 + }, + { + "epoch": 0.14550015678896205, + "grad_norm": 0.40405145287513733, + "learning_rate": 2.274247491638796e-05, + "loss": 1.0351, + "step": 232 + }, + { + "epoch": 0.14612731263719034, + "grad_norm": 0.42328453063964844, + "learning_rate": 2.2408026755852843e-05, + "loss": 0.9787, + "step": 233 + }, + { + "epoch": 0.14675446848541862, + "grad_norm": 0.40019333362579346, + "learning_rate": 2.2073578595317725e-05, + "loss": 0.9817, + "step": 234 + }, + { + "epoch": 0.1473816243336469, + "grad_norm": 0.39503028988838196, + "learning_rate": 2.173913043478261e-05, + "loss": 0.9632, + "step": 235 + }, + { + "epoch": 0.1480087801818752, + "grad_norm": 0.36922141909599304, + "learning_rate": 2.1404682274247492e-05, + "loss": 0.9626, + "step": 236 + }, + { + "epoch": 0.14863593603010347, + "grad_norm": 0.40530282258987427, + "learning_rate": 2.1070234113712374e-05, + "loss": 1.0501, + "step": 237 + }, + { + "epoch": 0.14926309187833176, + "grad_norm": 0.4161907136440277, + "learning_rate": 2.073578595317726e-05, + "loss": 1.0479, + "step": 238 + }, + { + "epoch": 0.14989024772656004, + "grad_norm": 0.4405962824821472, + "learning_rate": 2.040133779264214e-05, + "loss": 1.0068, + "step": 239 + }, + { + "epoch": 0.15051740357478832, + "grad_norm": 0.4103190004825592, + "learning_rate": 2.0066889632107023e-05, + "loss": 0.9647, + "step": 240 + }, + { + "epoch": 0.15114455942301663, + "grad_norm": 0.3906611502170563, + "learning_rate": 1.973244147157191e-05, + "loss": 0.9434, + "step": 241 + }, + { + "epoch": 0.15177171527124492, + "grad_norm": 0.4198826849460602, + "learning_rate": 1.939799331103679e-05, + "loss": 1.0264, + "step": 242 + }, + { + "epoch": 0.1523988711194732, + "grad_norm": 0.43589988350868225, + "learning_rate": 1.9063545150501676e-05, + "loss": 1.0957, + "step": 243 + }, + { + "epoch": 0.15302602696770148, + "grad_norm": 0.38247135281562805, + "learning_rate": 1.8729096989966554e-05, + "loss": 0.9668, + "step": 244 + }, + { + "epoch": 0.15365318281592977, + "grad_norm": 0.3897751271724701, + "learning_rate": 1.8394648829431436e-05, + "loss": 0.9465, + "step": 245 + }, + { + "epoch": 0.15428033866415805, + "grad_norm": 0.39580726623535156, + "learning_rate": 1.806020066889632e-05, + "loss": 0.9801, + "step": 246 + }, + { + "epoch": 0.15490749451238633, + "grad_norm": 0.4514491856098175, + "learning_rate": 1.7725752508361204e-05, + "loss": 1.1401, + "step": 247 + }, + { + "epoch": 0.15553465036061462, + "grad_norm": 0.40299785137176514, + "learning_rate": 1.739130434782609e-05, + "loss": 1.063, + "step": 248 + }, + { + "epoch": 0.1561618062088429, + "grad_norm": 0.41476714611053467, + "learning_rate": 1.705685618729097e-05, + "loss": 1.0065, + "step": 249 + }, + { + "epoch": 0.15678896205707119, + "grad_norm": 0.4274609088897705, + "learning_rate": 1.6722408026755853e-05, + "loss": 1.0558, + "step": 250 + }, + { + "epoch": 0.15741611790529947, + "grad_norm": 0.4935191869735718, + "learning_rate": 1.6387959866220738e-05, + "loss": 1.0056, + "step": 251 + }, + { + "epoch": 0.15804327375352775, + "grad_norm": 0.38647282123565674, + "learning_rate": 1.605351170568562e-05, + "loss": 0.9921, + "step": 252 + }, + { + "epoch": 0.15867042960175604, + "grad_norm": 0.41485288739204407, + "learning_rate": 1.5719063545150502e-05, + "loss": 0.9997, + "step": 253 + }, + { + "epoch": 0.15929758544998432, + "grad_norm": 0.41094207763671875, + "learning_rate": 1.5384615384615387e-05, + "loss": 1.026, + "step": 254 + }, + { + "epoch": 0.1599247412982126, + "grad_norm": 0.5010769367218018, + "learning_rate": 1.5050167224080269e-05, + "loss": 1.0301, + "step": 255 + }, + { + "epoch": 0.1605518971464409, + "grad_norm": 0.40588176250457764, + "learning_rate": 1.4715719063545153e-05, + "loss": 1.0085, + "step": 256 + }, + { + "epoch": 0.16117905299466917, + "grad_norm": 0.4186544120311737, + "learning_rate": 1.4381270903010033e-05, + "loss": 0.9676, + "step": 257 + }, + { + "epoch": 0.16180620884289745, + "grad_norm": 0.38908740878105164, + "learning_rate": 1.4046822742474917e-05, + "loss": 1.0011, + "step": 258 + }, + { + "epoch": 0.16243336469112574, + "grad_norm": 0.4013504385948181, + "learning_rate": 1.3712374581939799e-05, + "loss": 0.9683, + "step": 259 + }, + { + "epoch": 0.16306052053935402, + "grad_norm": 0.3967900276184082, + "learning_rate": 1.3377926421404682e-05, + "loss": 1.0273, + "step": 260 + }, + { + "epoch": 0.1636876763875823, + "grad_norm": 0.4046870172023773, + "learning_rate": 1.3043478260869566e-05, + "loss": 1.0192, + "step": 261 + }, + { + "epoch": 0.1643148322358106, + "grad_norm": 0.4919883608818054, + "learning_rate": 1.270903010033445e-05, + "loss": 1.0134, + "step": 262 + }, + { + "epoch": 0.16494198808403887, + "grad_norm": 0.39341261982917786, + "learning_rate": 1.2374581939799331e-05, + "loss": 0.9644, + "step": 263 + }, + { + "epoch": 0.16556914393226718, + "grad_norm": 0.44106414914131165, + "learning_rate": 1.2040133779264215e-05, + "loss": 1.0905, + "step": 264 + }, + { + "epoch": 0.16619629978049547, + "grad_norm": 0.4262993037700653, + "learning_rate": 1.1705685618729099e-05, + "loss": 0.9678, + "step": 265 + }, + { + "epoch": 0.16682345562872375, + "grad_norm": 0.43883177638053894, + "learning_rate": 1.137123745819398e-05, + "loss": 1.0759, + "step": 266 + }, + { + "epoch": 0.16745061147695203, + "grad_norm": 0.4418894052505493, + "learning_rate": 1.1036789297658862e-05, + "loss": 1.0093, + "step": 267 + }, + { + "epoch": 0.16807776732518032, + "grad_norm": 0.40942269563674927, + "learning_rate": 1.0702341137123746e-05, + "loss": 1.0525, + "step": 268 + }, + { + "epoch": 0.1687049231734086, + "grad_norm": 0.3933064639568329, + "learning_rate": 1.036789297658863e-05, + "loss": 0.9859, + "step": 269 + }, + { + "epoch": 0.16933207902163688, + "grad_norm": 0.43355491757392883, + "learning_rate": 1.0033444816053512e-05, + "loss": 1.0071, + "step": 270 + }, + { + "epoch": 0.16995923486986517, + "grad_norm": 0.4011549949645996, + "learning_rate": 9.698996655518395e-06, + "loss": 0.9301, + "step": 271 + }, + { + "epoch": 0.17058639071809345, + "grad_norm": 0.41236254572868347, + "learning_rate": 9.364548494983277e-06, + "loss": 0.9722, + "step": 272 + }, + { + "epoch": 0.17121354656632173, + "grad_norm": 0.4208712577819824, + "learning_rate": 9.03010033444816e-06, + "loss": 1.016, + "step": 273 + }, + { + "epoch": 0.17184070241455002, + "grad_norm": 0.43124738335609436, + "learning_rate": 8.695652173913044e-06, + "loss": 1.042, + "step": 274 + }, + { + "epoch": 0.1724678582627783, + "grad_norm": 0.43700850009918213, + "learning_rate": 8.361204013377926e-06, + "loss": 1.096, + "step": 275 + }, + { + "epoch": 0.17309501411100658, + "grad_norm": 0.41486409306526184, + "learning_rate": 8.02675585284281e-06, + "loss": 1.0385, + "step": 276 + }, + { + "epoch": 0.17372216995923487, + "grad_norm": 0.41307199001312256, + "learning_rate": 7.692307692307694e-06, + "loss": 1.0535, + "step": 277 + }, + { + "epoch": 0.17434932580746315, + "grad_norm": 0.4321426749229431, + "learning_rate": 7.357859531772576e-06, + "loss": 0.9939, + "step": 278 + }, + { + "epoch": 0.17497648165569143, + "grad_norm": 0.43575429916381836, + "learning_rate": 7.023411371237458e-06, + "loss": 1.0395, + "step": 279 + }, + { + "epoch": 0.17560363750391972, + "grad_norm": 0.40828558802604675, + "learning_rate": 6.688963210702341e-06, + "loss": 1.0325, + "step": 280 + }, + { + "epoch": 0.176230793352148, + "grad_norm": 0.39350196719169617, + "learning_rate": 6.354515050167225e-06, + "loss": 0.9613, + "step": 281 + }, + { + "epoch": 0.17685794920037629, + "grad_norm": 0.46433594822883606, + "learning_rate": 6.0200668896321075e-06, + "loss": 0.9917, + "step": 282 + }, + { + "epoch": 0.17748510504860457, + "grad_norm": 0.451623797416687, + "learning_rate": 5.68561872909699e-06, + "loss": 0.995, + "step": 283 + }, + { + "epoch": 0.17811226089683285, + "grad_norm": 0.4262632727622986, + "learning_rate": 5.351170568561873e-06, + "loss": 1.0278, + "step": 284 + }, + { + "epoch": 0.17873941674506114, + "grad_norm": 0.4028262197971344, + "learning_rate": 5.016722408026756e-06, + "loss": 1.0407, + "step": 285 + }, + { + "epoch": 0.17936657259328942, + "grad_norm": 0.4666818082332611, + "learning_rate": 4.682274247491639e-06, + "loss": 1.0857, + "step": 286 + }, + { + "epoch": 0.17999372844151773, + "grad_norm": 0.4177907705307007, + "learning_rate": 4.347826086956522e-06, + "loss": 1.1049, + "step": 287 + }, + { + "epoch": 0.18062088428974601, + "grad_norm": 0.4109366238117218, + "learning_rate": 4.013377926421405e-06, + "loss": 0.9684, + "step": 288 + }, + { + "epoch": 0.1812480401379743, + "grad_norm": 0.3886430263519287, + "learning_rate": 3.678929765886288e-06, + "loss": 0.9639, + "step": 289 + }, + { + "epoch": 0.18187519598620258, + "grad_norm": 0.3857450783252716, + "learning_rate": 3.3444816053511705e-06, + "loss": 1.0563, + "step": 290 + }, + { + "epoch": 0.18250235183443086, + "grad_norm": 0.4161767363548279, + "learning_rate": 3.0100334448160537e-06, + "loss": 0.9995, + "step": 291 + }, + { + "epoch": 0.18312950768265915, + "grad_norm": 0.4179271459579468, + "learning_rate": 2.6755852842809365e-06, + "loss": 1.0718, + "step": 292 + }, + { + "epoch": 0.18375666353088743, + "grad_norm": 0.36055099964141846, + "learning_rate": 2.3411371237458193e-06, + "loss": 0.9857, + "step": 293 + }, + { + "epoch": 0.18438381937911572, + "grad_norm": 0.40733909606933594, + "learning_rate": 2.0066889632107025e-06, + "loss": 0.9945, + "step": 294 + }, + { + "epoch": 0.185010975227344, + "grad_norm": 0.3714075982570648, + "learning_rate": 1.6722408026755853e-06, + "loss": 1.0026, + "step": 295 + }, + { + "epoch": 0.18563813107557228, + "grad_norm": 0.4596184492111206, + "learning_rate": 1.3377926421404683e-06, + "loss": 0.9711, + "step": 296 + }, + { + "epoch": 0.18626528692380057, + "grad_norm": 0.38908660411834717, + "learning_rate": 1.0033444816053512e-06, + "loss": 1.0009, + "step": 297 + }, + { + "epoch": 0.18689244277202885, + "grad_norm": 0.43956777453422546, + "learning_rate": 6.688963210702341e-07, + "loss": 1.0317, + "step": 298 + }, + { + "epoch": 0.18751959862025713, + "grad_norm": 0.4340553879737854, + "learning_rate": 3.3444816053511706e-07, + "loss": 1.0152, + "step": 299 + }, + { + "epoch": 0.18814675446848542, + "grad_norm": 0.4342001676559448, + "learning_rate": 0.0, + "loss": 1.0882, + "step": 300 + } + ], + "logging_steps": 1, + "max_steps": 300, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 3.314602411592909e+16, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/llama_English_French/checkpoint-300/training_args.bin b/llama_English_French/checkpoint-300/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e7707275df602c1f37c1460dd4565908a45fccd8 --- /dev/null +++ b/llama_English_French/checkpoint-300/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b33964b95d0d57198b98fbeffaa4784f637b939f5e9f628b40d42ce51952e02 +size 5624 diff --git a/llama_English_German/checkpoint-300/README.md b/llama_English_German/checkpoint-300/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ec91fdd050e2a5613ef04bb6c8fc77a63b8b10a1 --- /dev/null +++ b/llama_English_German/checkpoint-300/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.14.0 \ No newline at end of file diff --git a/llama_English_German/checkpoint-300/adapter_config.json b/llama_English_German/checkpoint-300/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..7b91487d24d7c60da9fa474dd66ea5ee6745eeff --- /dev/null +++ b/llama_English_German/checkpoint-300/adapter_config.json @@ -0,0 +1,37 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit", + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "k_proj", + "down_proj", + "gate_proj", + "o_proj", + "q_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/llama_English_German/checkpoint-300/adapter_model.safetensors b/llama_English_German/checkpoint-300/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0385287b86f2cdeb5e84da800f8561c8ab5039da --- /dev/null +++ b/llama_English_German/checkpoint-300/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b5667d1462e7bccef124db4c390f0399d917772ebdd06cac9b661250f3717f2 +size 167832240 diff --git a/llama_English_German/checkpoint-300/optimizer.pt b/llama_English_German/checkpoint-300/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..bca703b9912f65b080f269b0b5f762759fcd0e40 --- /dev/null +++ b/llama_English_German/checkpoint-300/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c662668d0811732a27ed0312db08458969357f6c740479cb1e708fa1a227bdf4 +size 85723732 diff --git a/llama_English_German/checkpoint-300/rng_state.pth b/llama_English_German/checkpoint-300/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1f8291cd6ce87668b786a72f3e93d072fbe54902 --- /dev/null +++ b/llama_English_German/checkpoint-300/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4c917636c7a58af68a29056522a757e9f9b99005b776641aa157c536967817d +size 14244 diff --git a/llama_English_German/checkpoint-300/scheduler.pt b/llama_English_German/checkpoint-300/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6e209dcc25b944d4b85ca13bea87ea706231c655 --- /dev/null +++ b/llama_English_German/checkpoint-300/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec7445dd881aa0e896fdc6ef61cf9c541f4299feaa6850ba7cac238afd6649f3 +size 1064 diff --git a/llama_English_German/checkpoint-300/special_tokens_map.json b/llama_English_German/checkpoint-300/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/llama_English_German/checkpoint-300/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/llama_English_German/checkpoint-300/tokenizer.json b/llama_English_German/checkpoint-300/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/llama_English_German/checkpoint-300/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/llama_English_German/checkpoint-300/tokenizer_config.json b/llama_English_German/checkpoint-300/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..447bd30f21e8c20e9e6497d6cfc650a3881e26a6 --- /dev/null +++ b/llama_English_German/checkpoint-300/tokenizer_config.json @@ -0,0 +1,2064 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/llama_English_German/checkpoint-300/trainer_state.json b/llama_English_German/checkpoint-300/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..8eb37504aac170286a4172e8db0085a7bf17233d --- /dev/null +++ b/llama_English_German/checkpoint-300/trainer_state.json @@ -0,0 +1,2133 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.18814675446848542, + "eval_steps": 500, + "global_step": 300, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0006271558482282847, + "grad_norm": 1.1711084842681885, + "learning_rate": 0.0001, + "loss": 2.4046, + "step": 1 + }, + { + "epoch": 0.0012543116964565694, + "grad_norm": 1.008390188217163, + "learning_rate": 9.966555183946489e-05, + "loss": 2.2223, + "step": 2 + }, + { + "epoch": 0.0018814675446848542, + "grad_norm": 0.9596850872039795, + "learning_rate": 9.933110367892977e-05, + "loss": 2.4222, + "step": 3 + }, + { + "epoch": 0.002508623392913139, + "grad_norm": 0.9983300566673279, + "learning_rate": 9.899665551839465e-05, + "loss": 2.2476, + "step": 4 + }, + { + "epoch": 0.0031357792411414237, + "grad_norm": 1.08811354637146, + "learning_rate": 9.866220735785953e-05, + "loss": 2.1187, + "step": 5 + }, + { + "epoch": 0.0037629350893697085, + "grad_norm": 1.1631696224212646, + "learning_rate": 9.832775919732441e-05, + "loss": 1.768, + "step": 6 + }, + { + "epoch": 0.004390090937597993, + "grad_norm": 1.5843448638916016, + "learning_rate": 9.799331103678931e-05, + "loss": 1.6732, + "step": 7 + }, + { + "epoch": 0.005017246785826278, + "grad_norm": 0.9309654235839844, + "learning_rate": 9.765886287625419e-05, + "loss": 1.4838, + "step": 8 + }, + { + "epoch": 0.005644402634054563, + "grad_norm": 0.809363603591919, + "learning_rate": 9.732441471571907e-05, + "loss": 1.4857, + "step": 9 + }, + { + "epoch": 0.006271558482282847, + "grad_norm": 0.8475091457366943, + "learning_rate": 9.698996655518396e-05, + "loss": 1.4753, + "step": 10 + }, + { + "epoch": 0.006898714330511132, + "grad_norm": 0.8010616898536682, + "learning_rate": 9.665551839464884e-05, + "loss": 1.2778, + "step": 11 + }, + { + "epoch": 0.007525870178739417, + "grad_norm": 0.6085690259933472, + "learning_rate": 9.632107023411372e-05, + "loss": 1.3194, + "step": 12 + }, + { + "epoch": 0.008153026026967701, + "grad_norm": 0.4717109799385071, + "learning_rate": 9.59866220735786e-05, + "loss": 1.275, + "step": 13 + }, + { + "epoch": 0.008780181875195987, + "grad_norm": 0.4746397137641907, + "learning_rate": 9.565217391304348e-05, + "loss": 1.2539, + "step": 14 + }, + { + "epoch": 0.00940733772342427, + "grad_norm": 0.48207026720046997, + "learning_rate": 9.531772575250837e-05, + "loss": 1.2033, + "step": 15 + }, + { + "epoch": 0.010034493571652555, + "grad_norm": 0.4692087769508362, + "learning_rate": 9.498327759197325e-05, + "loss": 1.2711, + "step": 16 + }, + { + "epoch": 0.01066164941988084, + "grad_norm": 0.4813322126865387, + "learning_rate": 9.464882943143813e-05, + "loss": 1.1544, + "step": 17 + }, + { + "epoch": 0.011288805268109126, + "grad_norm": 0.4796231985092163, + "learning_rate": 9.431438127090302e-05, + "loss": 1.1802, + "step": 18 + }, + { + "epoch": 0.01191596111633741, + "grad_norm": 0.5106775760650635, + "learning_rate": 9.39799331103679e-05, + "loss": 1.2377, + "step": 19 + }, + { + "epoch": 0.012543116964565695, + "grad_norm": 0.5248191356658936, + "learning_rate": 9.364548494983279e-05, + "loss": 1.1643, + "step": 20 + }, + { + "epoch": 0.01317027281279398, + "grad_norm": 0.5244407653808594, + "learning_rate": 9.331103678929767e-05, + "loss": 1.1315, + "step": 21 + }, + { + "epoch": 0.013797428661022263, + "grad_norm": 0.5456350445747375, + "learning_rate": 9.297658862876255e-05, + "loss": 1.1715, + "step": 22 + }, + { + "epoch": 0.014424584509250549, + "grad_norm": 0.5740377902984619, + "learning_rate": 9.264214046822743e-05, + "loss": 1.2334, + "step": 23 + }, + { + "epoch": 0.015051740357478834, + "grad_norm": 0.5430876016616821, + "learning_rate": 9.230769230769232e-05, + "loss": 1.1988, + "step": 24 + }, + { + "epoch": 0.01567889620570712, + "grad_norm": 0.6482923626899719, + "learning_rate": 9.19732441471572e-05, + "loss": 1.2454, + "step": 25 + }, + { + "epoch": 0.016306052053935403, + "grad_norm": 0.6234032511711121, + "learning_rate": 9.163879598662207e-05, + "loss": 1.1488, + "step": 26 + }, + { + "epoch": 0.016933207902163686, + "grad_norm": 0.6938403248786926, + "learning_rate": 9.130434782608696e-05, + "loss": 1.228, + "step": 27 + }, + { + "epoch": 0.017560363750391973, + "grad_norm": 0.7055364847183228, + "learning_rate": 9.096989966555184e-05, + "loss": 1.1577, + "step": 28 + }, + { + "epoch": 0.018187519598620257, + "grad_norm": 0.7398049235343933, + "learning_rate": 9.063545150501673e-05, + "loss": 1.1863, + "step": 29 + }, + { + "epoch": 0.01881467544684854, + "grad_norm": 0.7258826494216919, + "learning_rate": 9.030100334448161e-05, + "loss": 1.124, + "step": 30 + }, + { + "epoch": 0.019441831295076827, + "grad_norm": 0.7851470708847046, + "learning_rate": 8.996655518394649e-05, + "loss": 1.1256, + "step": 31 + }, + { + "epoch": 0.02006898714330511, + "grad_norm": 0.8108616471290588, + "learning_rate": 8.963210702341137e-05, + "loss": 1.0102, + "step": 32 + }, + { + "epoch": 0.020696142991533398, + "grad_norm": 0.7878923416137695, + "learning_rate": 8.929765886287625e-05, + "loss": 1.1043, + "step": 33 + }, + { + "epoch": 0.02132329883976168, + "grad_norm": 0.8210941553115845, + "learning_rate": 8.896321070234114e-05, + "loss": 1.0992, + "step": 34 + }, + { + "epoch": 0.021950454687989965, + "grad_norm": 0.8405901193618774, + "learning_rate": 8.862876254180602e-05, + "loss": 1.1528, + "step": 35 + }, + { + "epoch": 0.022577610536218252, + "grad_norm": 0.6356008052825928, + "learning_rate": 8.82943143812709e-05, + "loss": 1.1983, + "step": 36 + }, + { + "epoch": 0.023204766384446535, + "grad_norm": 0.4946132302284241, + "learning_rate": 8.795986622073578e-05, + "loss": 1.0066, + "step": 37 + }, + { + "epoch": 0.02383192223267482, + "grad_norm": 0.3963969051837921, + "learning_rate": 8.762541806020068e-05, + "loss": 1.1063, + "step": 38 + }, + { + "epoch": 0.024459078080903106, + "grad_norm": 0.42465490102767944, + "learning_rate": 8.729096989966556e-05, + "loss": 1.1582, + "step": 39 + }, + { + "epoch": 0.02508623392913139, + "grad_norm": 0.43699175119400024, + "learning_rate": 8.695652173913044e-05, + "loss": 1.2481, + "step": 40 + }, + { + "epoch": 0.025713389777359673, + "grad_norm": 0.35003581643104553, + "learning_rate": 8.662207357859532e-05, + "loss": 1.0657, + "step": 41 + }, + { + "epoch": 0.02634054562558796, + "grad_norm": 0.3598668873310089, + "learning_rate": 8.62876254180602e-05, + "loss": 1.1521, + "step": 42 + }, + { + "epoch": 0.026967701473816243, + "grad_norm": 0.3630351126194, + "learning_rate": 8.595317725752509e-05, + "loss": 1.0817, + "step": 43 + }, + { + "epoch": 0.027594857322044527, + "grad_norm": 0.40215277671813965, + "learning_rate": 8.561872909698997e-05, + "loss": 1.129, + "step": 44 + }, + { + "epoch": 0.028222013170272814, + "grad_norm": 0.4377795457839966, + "learning_rate": 8.528428093645485e-05, + "loss": 1.1326, + "step": 45 + }, + { + "epoch": 0.028849169018501097, + "grad_norm": 0.48775389790534973, + "learning_rate": 8.494983277591973e-05, + "loss": 1.1442, + "step": 46 + }, + { + "epoch": 0.02947632486672938, + "grad_norm": 0.4295575022697449, + "learning_rate": 8.461538461538461e-05, + "loss": 1.1551, + "step": 47 + }, + { + "epoch": 0.030103480714957668, + "grad_norm": 0.3610740900039673, + "learning_rate": 8.42809364548495e-05, + "loss": 1.019, + "step": 48 + }, + { + "epoch": 0.03073063656318595, + "grad_norm": 0.43356069922447205, + "learning_rate": 8.394648829431439e-05, + "loss": 1.1686, + "step": 49 + }, + { + "epoch": 0.03135779241141424, + "grad_norm": 0.4673689305782318, + "learning_rate": 8.361204013377927e-05, + "loss": 1.1223, + "step": 50 + }, + { + "epoch": 0.03198494825964252, + "grad_norm": 0.3878093659877777, + "learning_rate": 8.327759197324416e-05, + "loss": 1.1221, + "step": 51 + }, + { + "epoch": 0.032612104107870805, + "grad_norm": 0.40353527665138245, + "learning_rate": 8.294314381270904e-05, + "loss": 1.1203, + "step": 52 + }, + { + "epoch": 0.03323925995609909, + "grad_norm": 0.4405611753463745, + "learning_rate": 8.260869565217392e-05, + "loss": 1.121, + "step": 53 + }, + { + "epoch": 0.03386641580432737, + "grad_norm": 0.43877699971199036, + "learning_rate": 8.22742474916388e-05, + "loss": 1.1487, + "step": 54 + }, + { + "epoch": 0.03449357165255566, + "grad_norm": 0.38161155581474304, + "learning_rate": 8.193979933110368e-05, + "loss": 1.0979, + "step": 55 + }, + { + "epoch": 0.035120727500783946, + "grad_norm": 0.3485104441642761, + "learning_rate": 8.160535117056857e-05, + "loss": 1.0555, + "step": 56 + }, + { + "epoch": 0.03574788334901223, + "grad_norm": 0.4038199186325073, + "learning_rate": 8.127090301003345e-05, + "loss": 1.108, + "step": 57 + }, + { + "epoch": 0.036375039197240513, + "grad_norm": 0.4036957621574402, + "learning_rate": 8.093645484949833e-05, + "loss": 1.0979, + "step": 58 + }, + { + "epoch": 0.0370021950454688, + "grad_norm": 0.433468222618103, + "learning_rate": 8.060200668896321e-05, + "loss": 1.1714, + "step": 59 + }, + { + "epoch": 0.03762935089369708, + "grad_norm": 0.42450040578842163, + "learning_rate": 8.026755852842809e-05, + "loss": 1.0953, + "step": 60 + }, + { + "epoch": 0.03825650674192537, + "grad_norm": 0.43583229184150696, + "learning_rate": 7.993311036789299e-05, + "loss": 1.0818, + "step": 61 + }, + { + "epoch": 0.038883662590153655, + "grad_norm": 0.3825129270553589, + "learning_rate": 7.959866220735787e-05, + "loss": 1.0982, + "step": 62 + }, + { + "epoch": 0.03951081843838194, + "grad_norm": 0.4264048933982849, + "learning_rate": 7.926421404682275e-05, + "loss": 1.0857, + "step": 63 + }, + { + "epoch": 0.04013797428661022, + "grad_norm": 0.4036998152732849, + "learning_rate": 7.892976588628763e-05, + "loss": 1.1427, + "step": 64 + }, + { + "epoch": 0.040765130134838505, + "grad_norm": 0.40438225865364075, + "learning_rate": 7.859531772575252e-05, + "loss": 1.1452, + "step": 65 + }, + { + "epoch": 0.041392285983066796, + "grad_norm": 0.42490822076797485, + "learning_rate": 7.82608695652174e-05, + "loss": 1.0982, + "step": 66 + }, + { + "epoch": 0.04201944183129508, + "grad_norm": 0.395109087228775, + "learning_rate": 7.792642140468228e-05, + "loss": 1.0857, + "step": 67 + }, + { + "epoch": 0.04264659767952336, + "grad_norm": 0.36866042017936707, + "learning_rate": 7.759197324414716e-05, + "loss": 1.0181, + "step": 68 + }, + { + "epoch": 0.043273753527751646, + "grad_norm": 0.4392179846763611, + "learning_rate": 7.725752508361204e-05, + "loss": 1.1206, + "step": 69 + }, + { + "epoch": 0.04390090937597993, + "grad_norm": 0.44415852427482605, + "learning_rate": 7.692307692307693e-05, + "loss": 1.0366, + "step": 70 + }, + { + "epoch": 0.04452806522420821, + "grad_norm": 0.40733855962753296, + "learning_rate": 7.658862876254181e-05, + "loss": 1.0553, + "step": 71 + }, + { + "epoch": 0.045155221072436504, + "grad_norm": 0.44080695509910583, + "learning_rate": 7.62541806020067e-05, + "loss": 1.105, + "step": 72 + }, + { + "epoch": 0.04578237692066479, + "grad_norm": 0.4520654082298279, + "learning_rate": 7.591973244147159e-05, + "loss": 1.1086, + "step": 73 + }, + { + "epoch": 0.04640953276889307, + "grad_norm": 0.39047616720199585, + "learning_rate": 7.558528428093647e-05, + "loss": 1.17, + "step": 74 + }, + { + "epoch": 0.047036688617121354, + "grad_norm": 0.3795330822467804, + "learning_rate": 7.525083612040135e-05, + "loss": 1.1285, + "step": 75 + }, + { + "epoch": 0.04766384446534964, + "grad_norm": 0.3575787842273712, + "learning_rate": 7.491638795986622e-05, + "loss": 1.0705, + "step": 76 + }, + { + "epoch": 0.04829100031357792, + "grad_norm": 0.3704710304737091, + "learning_rate": 7.45819397993311e-05, + "loss": 1.0276, + "step": 77 + }, + { + "epoch": 0.04891815616180621, + "grad_norm": 0.3675391674041748, + "learning_rate": 7.424749163879598e-05, + "loss": 1.0486, + "step": 78 + }, + { + "epoch": 0.049545312010034495, + "grad_norm": 0.39707186818122864, + "learning_rate": 7.391304347826086e-05, + "loss": 1.1237, + "step": 79 + }, + { + "epoch": 0.05017246785826278, + "grad_norm": 0.4096493124961853, + "learning_rate": 7.357859531772575e-05, + "loss": 1.1009, + "step": 80 + }, + { + "epoch": 0.05079962370649106, + "grad_norm": 0.3946245014667511, + "learning_rate": 7.324414715719064e-05, + "loss": 1.1206, + "step": 81 + }, + { + "epoch": 0.051426779554719346, + "grad_norm": 0.4242815375328064, + "learning_rate": 7.290969899665552e-05, + "loss": 1.1361, + "step": 82 + }, + { + "epoch": 0.05205393540294763, + "grad_norm": 0.41571012139320374, + "learning_rate": 7.25752508361204e-05, + "loss": 1.0891, + "step": 83 + }, + { + "epoch": 0.05268109125117592, + "grad_norm": 0.4722791314125061, + "learning_rate": 7.224080267558529e-05, + "loss": 1.0776, + "step": 84 + }, + { + "epoch": 0.0533082470994042, + "grad_norm": 0.40548330545425415, + "learning_rate": 7.190635451505017e-05, + "loss": 1.1499, + "step": 85 + }, + { + "epoch": 0.05393540294763249, + "grad_norm": 0.4097810983657837, + "learning_rate": 7.157190635451505e-05, + "loss": 1.1157, + "step": 86 + }, + { + "epoch": 0.05456255879586077, + "grad_norm": 0.4443519711494446, + "learning_rate": 7.123745819397993e-05, + "loss": 1.1399, + "step": 87 + }, + { + "epoch": 0.055189714644089054, + "grad_norm": 0.3862561285495758, + "learning_rate": 7.090301003344481e-05, + "loss": 1.0257, + "step": 88 + }, + { + "epoch": 0.055816870492317344, + "grad_norm": 0.4226873219013214, + "learning_rate": 7.05685618729097e-05, + "loss": 1.157, + "step": 89 + }, + { + "epoch": 0.05644402634054563, + "grad_norm": 0.39252427220344543, + "learning_rate": 7.023411371237458e-05, + "loss": 1.0942, + "step": 90 + }, + { + "epoch": 0.05707118218877391, + "grad_norm": 0.4015486240386963, + "learning_rate": 6.989966555183946e-05, + "loss": 1.0752, + "step": 91 + }, + { + "epoch": 0.057698338037002195, + "grad_norm": 0.5048426985740662, + "learning_rate": 6.956521739130436e-05, + "loss": 1.1434, + "step": 92 + }, + { + "epoch": 0.05832549388523048, + "grad_norm": 0.4149401783943176, + "learning_rate": 6.923076923076924e-05, + "loss": 1.0733, + "step": 93 + }, + { + "epoch": 0.05895264973345876, + "grad_norm": 0.38355541229248047, + "learning_rate": 6.889632107023412e-05, + "loss": 1.021, + "step": 94 + }, + { + "epoch": 0.05957980558168705, + "grad_norm": 0.3577008545398712, + "learning_rate": 6.8561872909699e-05, + "loss": 1.0236, + "step": 95 + }, + { + "epoch": 0.060206961429915336, + "grad_norm": 0.3427559435367584, + "learning_rate": 6.822742474916388e-05, + "loss": 0.9913, + "step": 96 + }, + { + "epoch": 0.06083411727814362, + "grad_norm": 0.3767299950122833, + "learning_rate": 6.789297658862876e-05, + "loss": 1.1362, + "step": 97 + }, + { + "epoch": 0.0614612731263719, + "grad_norm": 0.39204639196395874, + "learning_rate": 6.755852842809365e-05, + "loss": 1.12, + "step": 98 + }, + { + "epoch": 0.062088428974600186, + "grad_norm": 0.39349111914634705, + "learning_rate": 6.722408026755853e-05, + "loss": 1.0915, + "step": 99 + }, + { + "epoch": 0.06271558482282848, + "grad_norm": 0.35664206743240356, + "learning_rate": 6.688963210702341e-05, + "loss": 1.0661, + "step": 100 + }, + { + "epoch": 0.06334274067105676, + "grad_norm": 0.4107705056667328, + "learning_rate": 6.655518394648829e-05, + "loss": 1.0927, + "step": 101 + }, + { + "epoch": 0.06396989651928504, + "grad_norm": 0.3697938621044159, + "learning_rate": 6.622073578595317e-05, + "loss": 1.0881, + "step": 102 + }, + { + "epoch": 0.06459705236751333, + "grad_norm": 0.39495396614074707, + "learning_rate": 6.588628762541807e-05, + "loss": 1.0044, + "step": 103 + }, + { + "epoch": 0.06522420821574161, + "grad_norm": 0.39368346333503723, + "learning_rate": 6.555183946488295e-05, + "loss": 1.1562, + "step": 104 + }, + { + "epoch": 0.0658513640639699, + "grad_norm": 0.42191728949546814, + "learning_rate": 6.521739130434783e-05, + "loss": 1.0978, + "step": 105 + }, + { + "epoch": 0.06647851991219818, + "grad_norm": 0.3669389486312866, + "learning_rate": 6.488294314381272e-05, + "loss": 1.0559, + "step": 106 + }, + { + "epoch": 0.06710567576042646, + "grad_norm": 0.4310162663459778, + "learning_rate": 6.45484949832776e-05, + "loss": 1.0318, + "step": 107 + }, + { + "epoch": 0.06773283160865474, + "grad_norm": 0.38448938727378845, + "learning_rate": 6.421404682274248e-05, + "loss": 0.9811, + "step": 108 + }, + { + "epoch": 0.06835998745688304, + "grad_norm": 0.4004499912261963, + "learning_rate": 6.387959866220736e-05, + "loss": 1.0724, + "step": 109 + }, + { + "epoch": 0.06898714330511133, + "grad_norm": 0.3796185851097107, + "learning_rate": 6.354515050167224e-05, + "loss": 1.0357, + "step": 110 + }, + { + "epoch": 0.06961429915333961, + "grad_norm": 0.40571820735931396, + "learning_rate": 6.321070234113713e-05, + "loss": 1.2145, + "step": 111 + }, + { + "epoch": 0.07024145500156789, + "grad_norm": 0.391155868768692, + "learning_rate": 6.287625418060201e-05, + "loss": 1.0646, + "step": 112 + }, + { + "epoch": 0.07086861084979618, + "grad_norm": 0.40619781613349915, + "learning_rate": 6.254180602006689e-05, + "loss": 1.0744, + "step": 113 + }, + { + "epoch": 0.07149576669802446, + "grad_norm": 0.41316911578178406, + "learning_rate": 6.220735785953178e-05, + "loss": 1.068, + "step": 114 + }, + { + "epoch": 0.07212292254625274, + "grad_norm": 0.4110977053642273, + "learning_rate": 6.187290969899667e-05, + "loss": 1.0571, + "step": 115 + }, + { + "epoch": 0.07275007839448103, + "grad_norm": 0.44076740741729736, + "learning_rate": 6.153846153846155e-05, + "loss": 1.0955, + "step": 116 + }, + { + "epoch": 0.07337723424270931, + "grad_norm": 0.4696763753890991, + "learning_rate": 6.120401337792643e-05, + "loss": 1.1225, + "step": 117 + }, + { + "epoch": 0.0740043900909376, + "grad_norm": 0.39804211258888245, + "learning_rate": 6.086956521739131e-05, + "loss": 1.058, + "step": 118 + }, + { + "epoch": 0.07463154593916588, + "grad_norm": 0.373542845249176, + "learning_rate": 6.0535117056856194e-05, + "loss": 0.9764, + "step": 119 + }, + { + "epoch": 0.07525870178739416, + "grad_norm": 0.39055049419403076, + "learning_rate": 6.0200668896321076e-05, + "loss": 1.0179, + "step": 120 + }, + { + "epoch": 0.07588585763562246, + "grad_norm": 0.38459017872810364, + "learning_rate": 5.986622073578596e-05, + "loss": 1.0748, + "step": 121 + }, + { + "epoch": 0.07651301348385074, + "grad_norm": 0.44538697600364685, + "learning_rate": 5.953177257525085e-05, + "loss": 1.1458, + "step": 122 + }, + { + "epoch": 0.07714016933207903, + "grad_norm": 0.39173659682273865, + "learning_rate": 5.919732441471573e-05, + "loss": 1.0784, + "step": 123 + }, + { + "epoch": 0.07776732518030731, + "grad_norm": 0.42208802700042725, + "learning_rate": 5.886287625418061e-05, + "loss": 1.1294, + "step": 124 + }, + { + "epoch": 0.07839448102853559, + "grad_norm": 0.40436652302742004, + "learning_rate": 5.852842809364549e-05, + "loss": 1.053, + "step": 125 + }, + { + "epoch": 0.07902163687676388, + "grad_norm": 0.41719841957092285, + "learning_rate": 5.819397993311037e-05, + "loss": 1.1382, + "step": 126 + }, + { + "epoch": 0.07964879272499216, + "grad_norm": 0.4149632751941681, + "learning_rate": 5.785953177257525e-05, + "loss": 1.0128, + "step": 127 + }, + { + "epoch": 0.08027594857322044, + "grad_norm": 0.504405677318573, + "learning_rate": 5.752508361204013e-05, + "loss": 1.1933, + "step": 128 + }, + { + "epoch": 0.08090310442144873, + "grad_norm": 0.4094352126121521, + "learning_rate": 5.7190635451505014e-05, + "loss": 1.0802, + "step": 129 + }, + { + "epoch": 0.08153026026967701, + "grad_norm": 0.37789756059646606, + "learning_rate": 5.6856187290969896e-05, + "loss": 1.082, + "step": 130 + }, + { + "epoch": 0.0821574161179053, + "grad_norm": 0.4045063257217407, + "learning_rate": 5.652173913043478e-05, + "loss": 1.1129, + "step": 131 + }, + { + "epoch": 0.08278457196613359, + "grad_norm": 0.40179678797721863, + "learning_rate": 5.6187290969899666e-05, + "loss": 1.0833, + "step": 132 + }, + { + "epoch": 0.08341172781436187, + "grad_norm": 0.403834730386734, + "learning_rate": 5.585284280936455e-05, + "loss": 0.9957, + "step": 133 + }, + { + "epoch": 0.08403888366259016, + "grad_norm": 0.38336220383644104, + "learning_rate": 5.551839464882943e-05, + "loss": 1.079, + "step": 134 + }, + { + "epoch": 0.08466603951081844, + "grad_norm": 0.44965264201164246, + "learning_rate": 5.518394648829431e-05, + "loss": 1.0677, + "step": 135 + }, + { + "epoch": 0.08529319535904673, + "grad_norm": 0.4083324074745178, + "learning_rate": 5.4849498327759194e-05, + "loss": 1.0375, + "step": 136 + }, + { + "epoch": 0.08592035120727501, + "grad_norm": 0.37549543380737305, + "learning_rate": 5.451505016722408e-05, + "loss": 0.9811, + "step": 137 + }, + { + "epoch": 0.08654750705550329, + "grad_norm": 0.41478872299194336, + "learning_rate": 5.4180602006688965e-05, + "loss": 1.0698, + "step": 138 + }, + { + "epoch": 0.08717466290373158, + "grad_norm": 0.4115673005580902, + "learning_rate": 5.384615384615385e-05, + "loss": 1.1094, + "step": 139 + }, + { + "epoch": 0.08780181875195986, + "grad_norm": 0.3878139555454254, + "learning_rate": 5.351170568561873e-05, + "loss": 1.0792, + "step": 140 + }, + { + "epoch": 0.08842897460018814, + "grad_norm": 0.43197542428970337, + "learning_rate": 5.317725752508361e-05, + "loss": 1.0418, + "step": 141 + }, + { + "epoch": 0.08905613044841643, + "grad_norm": 0.4401797652244568, + "learning_rate": 5.284280936454849e-05, + "loss": 1.09, + "step": 142 + }, + { + "epoch": 0.08968328629664471, + "grad_norm": 0.3953765630722046, + "learning_rate": 5.250836120401338e-05, + "loss": 1.0994, + "step": 143 + }, + { + "epoch": 0.09031044214487301, + "grad_norm": 0.5845592617988586, + "learning_rate": 5.217391304347826e-05, + "loss": 1.0931, + "step": 144 + }, + { + "epoch": 0.09093759799310129, + "grad_norm": 0.4614081084728241, + "learning_rate": 5.1839464882943145e-05, + "loss": 1.1122, + "step": 145 + }, + { + "epoch": 0.09156475384132957, + "grad_norm": 0.3829100430011749, + "learning_rate": 5.150501672240803e-05, + "loss": 1.0379, + "step": 146 + }, + { + "epoch": 0.09219190968955786, + "grad_norm": 0.3920990824699402, + "learning_rate": 5.117056856187291e-05, + "loss": 1.0251, + "step": 147 + }, + { + "epoch": 0.09281906553778614, + "grad_norm": 0.40057647228240967, + "learning_rate": 5.08361204013378e-05, + "loss": 1.075, + "step": 148 + }, + { + "epoch": 0.09344622138601442, + "grad_norm": 0.4224538505077362, + "learning_rate": 5.050167224080268e-05, + "loss": 1.0747, + "step": 149 + }, + { + "epoch": 0.09407337723424271, + "grad_norm": 0.495451956987381, + "learning_rate": 5.016722408026756e-05, + "loss": 1.113, + "step": 150 + }, + { + "epoch": 0.09470053308247099, + "grad_norm": 0.4393994212150574, + "learning_rate": 4.983277591973244e-05, + "loss": 1.0819, + "step": 151 + }, + { + "epoch": 0.09532768893069928, + "grad_norm": 0.41883373260498047, + "learning_rate": 4.9498327759197325e-05, + "loss": 1.0081, + "step": 152 + }, + { + "epoch": 0.09595484477892756, + "grad_norm": 0.4273068904876709, + "learning_rate": 4.916387959866221e-05, + "loss": 1.0865, + "step": 153 + }, + { + "epoch": 0.09658200062715584, + "grad_norm": 0.42094531655311584, + "learning_rate": 4.8829431438127096e-05, + "loss": 1.0419, + "step": 154 + }, + { + "epoch": 0.09720915647538414, + "grad_norm": 0.42659589648246765, + "learning_rate": 4.849498327759198e-05, + "loss": 1.1113, + "step": 155 + }, + { + "epoch": 0.09783631232361242, + "grad_norm": 0.38099798560142517, + "learning_rate": 4.816053511705686e-05, + "loss": 1.0508, + "step": 156 + }, + { + "epoch": 0.0984634681718407, + "grad_norm": 0.46048882603645325, + "learning_rate": 4.782608695652174e-05, + "loss": 1.0956, + "step": 157 + }, + { + "epoch": 0.09909062402006899, + "grad_norm": 0.4822726845741272, + "learning_rate": 4.7491638795986624e-05, + "loss": 1.0993, + "step": 158 + }, + { + "epoch": 0.09971777986829727, + "grad_norm": 0.42813563346862793, + "learning_rate": 4.715719063545151e-05, + "loss": 0.953, + "step": 159 + }, + { + "epoch": 0.10034493571652556, + "grad_norm": 0.4069565534591675, + "learning_rate": 4.6822742474916394e-05, + "loss": 1.0803, + "step": 160 + }, + { + "epoch": 0.10097209156475384, + "grad_norm": 0.4259192645549774, + "learning_rate": 4.6488294314381276e-05, + "loss": 1.082, + "step": 161 + }, + { + "epoch": 0.10159924741298212, + "grad_norm": 0.4321853220462799, + "learning_rate": 4.615384615384616e-05, + "loss": 1.1825, + "step": 162 + }, + { + "epoch": 0.10222640326121041, + "grad_norm": 0.42676714062690735, + "learning_rate": 4.581939799331103e-05, + "loss": 1.0423, + "step": 163 + }, + { + "epoch": 0.10285355910943869, + "grad_norm": 0.3924862742424011, + "learning_rate": 4.548494983277592e-05, + "loss": 1.1112, + "step": 164 + }, + { + "epoch": 0.10348071495766697, + "grad_norm": 0.38594508171081543, + "learning_rate": 4.5150501672240804e-05, + "loss": 1.0284, + "step": 165 + }, + { + "epoch": 0.10410787080589526, + "grad_norm": 0.4287392497062683, + "learning_rate": 4.4816053511705686e-05, + "loss": 1.11, + "step": 166 + }, + { + "epoch": 0.10473502665412356, + "grad_norm": 0.43313270807266235, + "learning_rate": 4.448160535117057e-05, + "loss": 1.0954, + "step": 167 + }, + { + "epoch": 0.10536218250235184, + "grad_norm": 0.44197893142700195, + "learning_rate": 4.414715719063545e-05, + "loss": 1.111, + "step": 168 + }, + { + "epoch": 0.10598933835058012, + "grad_norm": 0.3909394443035126, + "learning_rate": 4.381270903010034e-05, + "loss": 1.0555, + "step": 169 + }, + { + "epoch": 0.1066164941988084, + "grad_norm": 0.4243182837963104, + "learning_rate": 4.347826086956522e-05, + "loss": 1.1004, + "step": 170 + }, + { + "epoch": 0.10724365004703669, + "grad_norm": 0.4609302878379822, + "learning_rate": 4.31438127090301e-05, + "loss": 1.0873, + "step": 171 + }, + { + "epoch": 0.10787080589526497, + "grad_norm": 0.4273108243942261, + "learning_rate": 4.2809364548494984e-05, + "loss": 1.0637, + "step": 172 + }, + { + "epoch": 0.10849796174349326, + "grad_norm": 0.43937408924102783, + "learning_rate": 4.2474916387959866e-05, + "loss": 1.0548, + "step": 173 + }, + { + "epoch": 0.10912511759172154, + "grad_norm": 0.42941388487815857, + "learning_rate": 4.214046822742475e-05, + "loss": 1.0855, + "step": 174 + }, + { + "epoch": 0.10975227343994982, + "grad_norm": 0.4183155298233032, + "learning_rate": 4.180602006688964e-05, + "loss": 1.0658, + "step": 175 + }, + { + "epoch": 0.11037942928817811, + "grad_norm": 0.3873193860054016, + "learning_rate": 4.147157190635452e-05, + "loss": 1.0439, + "step": 176 + }, + { + "epoch": 0.11100658513640639, + "grad_norm": 0.4220661520957947, + "learning_rate": 4.11371237458194e-05, + "loss": 1.0994, + "step": 177 + }, + { + "epoch": 0.11163374098463469, + "grad_norm": 0.5796094536781311, + "learning_rate": 4.080267558528428e-05, + "loss": 1.1111, + "step": 178 + }, + { + "epoch": 0.11226089683286297, + "grad_norm": 0.4496459662914276, + "learning_rate": 4.0468227424749165e-05, + "loss": 1.1081, + "step": 179 + }, + { + "epoch": 0.11288805268109126, + "grad_norm": 0.4354843199253082, + "learning_rate": 4.0133779264214046e-05, + "loss": 1.0617, + "step": 180 + }, + { + "epoch": 0.11351520852931954, + "grad_norm": 0.4018702805042267, + "learning_rate": 3.9799331103678935e-05, + "loss": 1.041, + "step": 181 + }, + { + "epoch": 0.11414236437754782, + "grad_norm": 0.4189532697200775, + "learning_rate": 3.946488294314382e-05, + "loss": 1.0214, + "step": 182 + }, + { + "epoch": 0.1147695202257761, + "grad_norm": 0.43697431683540344, + "learning_rate": 3.91304347826087e-05, + "loss": 1.0886, + "step": 183 + }, + { + "epoch": 0.11539667607400439, + "grad_norm": 0.45214566588401794, + "learning_rate": 3.879598662207358e-05, + "loss": 1.0817, + "step": 184 + }, + { + "epoch": 0.11602383192223267, + "grad_norm": 0.5171282887458801, + "learning_rate": 3.846153846153846e-05, + "loss": 1.07, + "step": 185 + }, + { + "epoch": 0.11665098777046096, + "grad_norm": 0.47615286707878113, + "learning_rate": 3.812709030100335e-05, + "loss": 1.139, + "step": 186 + }, + { + "epoch": 0.11727814361868924, + "grad_norm": 0.41021615266799927, + "learning_rate": 3.7792642140468233e-05, + "loss": 1.0551, + "step": 187 + }, + { + "epoch": 0.11790529946691752, + "grad_norm": 0.4250172972679138, + "learning_rate": 3.745819397993311e-05, + "loss": 1.0366, + "step": 188 + }, + { + "epoch": 0.11853245531514581, + "grad_norm": 0.4063580334186554, + "learning_rate": 3.712374581939799e-05, + "loss": 1.1209, + "step": 189 + }, + { + "epoch": 0.1191596111633741, + "grad_norm": 0.4396308362483978, + "learning_rate": 3.678929765886287e-05, + "loss": 1.1082, + "step": 190 + }, + { + "epoch": 0.11978676701160239, + "grad_norm": 0.4504964351654053, + "learning_rate": 3.645484949832776e-05, + "loss": 1.1178, + "step": 191 + }, + { + "epoch": 0.12041392285983067, + "grad_norm": 0.4479026794433594, + "learning_rate": 3.612040133779264e-05, + "loss": 1.0619, + "step": 192 + }, + { + "epoch": 0.12104107870805896, + "grad_norm": 0.4287208020687103, + "learning_rate": 3.5785953177257525e-05, + "loss": 1.0619, + "step": 193 + }, + { + "epoch": 0.12166823455628724, + "grad_norm": 0.40154144167900085, + "learning_rate": 3.545150501672241e-05, + "loss": 1.0142, + "step": 194 + }, + { + "epoch": 0.12229539040451552, + "grad_norm": 0.4535212814807892, + "learning_rate": 3.511705685618729e-05, + "loss": 1.0802, + "step": 195 + }, + { + "epoch": 0.1229225462527438, + "grad_norm": 0.44110241532325745, + "learning_rate": 3.478260869565218e-05, + "loss": 1.0918, + "step": 196 + }, + { + "epoch": 0.12354970210097209, + "grad_norm": 0.42719611525535583, + "learning_rate": 3.444816053511706e-05, + "loss": 1.0392, + "step": 197 + }, + { + "epoch": 0.12417685794920037, + "grad_norm": 0.47314372658729553, + "learning_rate": 3.411371237458194e-05, + "loss": 1.0826, + "step": 198 + }, + { + "epoch": 0.12480401379742866, + "grad_norm": 0.4626355767250061, + "learning_rate": 3.3779264214046823e-05, + "loss": 1.0953, + "step": 199 + }, + { + "epoch": 0.12543116964565695, + "grad_norm": 0.4239532947540283, + "learning_rate": 3.3444816053511705e-05, + "loss": 1.0525, + "step": 200 + }, + { + "epoch": 0.12605832549388524, + "grad_norm": 0.4138485789299011, + "learning_rate": 3.311036789297659e-05, + "loss": 1.0174, + "step": 201 + }, + { + "epoch": 0.12668548134211352, + "grad_norm": 0.42386671900749207, + "learning_rate": 3.2775919732441476e-05, + "loss": 1.1395, + "step": 202 + }, + { + "epoch": 0.1273126371903418, + "grad_norm": 0.43196991086006165, + "learning_rate": 3.244147157190636e-05, + "loss": 0.9895, + "step": 203 + }, + { + "epoch": 0.1279397930385701, + "grad_norm": 0.433040052652359, + "learning_rate": 3.210702341137124e-05, + "loss": 1.1044, + "step": 204 + }, + { + "epoch": 0.12856694888679837, + "grad_norm": 0.45214182138442993, + "learning_rate": 3.177257525083612e-05, + "loss": 1.0114, + "step": 205 + }, + { + "epoch": 0.12919410473502665, + "grad_norm": 0.42340749502182007, + "learning_rate": 3.1438127090301004e-05, + "loss": 1.0759, + "step": 206 + }, + { + "epoch": 0.12982126058325494, + "grad_norm": 0.4078756868839264, + "learning_rate": 3.110367892976589e-05, + "loss": 1.0794, + "step": 207 + }, + { + "epoch": 0.13044841643148322, + "grad_norm": 0.4599223732948303, + "learning_rate": 3.0769230769230774e-05, + "loss": 1.0031, + "step": 208 + }, + { + "epoch": 0.1310755722797115, + "grad_norm": 0.4522516429424286, + "learning_rate": 3.0434782608695656e-05, + "loss": 1.0573, + "step": 209 + }, + { + "epoch": 0.1317027281279398, + "grad_norm": 0.4725417494773865, + "learning_rate": 3.0100334448160538e-05, + "loss": 1.0747, + "step": 210 + }, + { + "epoch": 0.13232988397616807, + "grad_norm": 0.3871685862541199, + "learning_rate": 2.9765886287625424e-05, + "loss": 1.0604, + "step": 211 + }, + { + "epoch": 0.13295703982439636, + "grad_norm": 0.4436711370944977, + "learning_rate": 2.9431438127090305e-05, + "loss": 1.0968, + "step": 212 + }, + { + "epoch": 0.13358419567262464, + "grad_norm": 0.469163179397583, + "learning_rate": 2.9096989966555184e-05, + "loss": 1.0837, + "step": 213 + }, + { + "epoch": 0.13421135152085292, + "grad_norm": 0.5687686800956726, + "learning_rate": 2.8762541806020066e-05, + "loss": 1.1365, + "step": 214 + }, + { + "epoch": 0.1348385073690812, + "grad_norm": 0.4138805866241455, + "learning_rate": 2.8428093645484948e-05, + "loss": 1.1286, + "step": 215 + }, + { + "epoch": 0.1354656632173095, + "grad_norm": 0.41999486088752747, + "learning_rate": 2.8093645484949833e-05, + "loss": 1.0997, + "step": 216 + }, + { + "epoch": 0.13609281906553777, + "grad_norm": 0.477196604013443, + "learning_rate": 2.7759197324414715e-05, + "loss": 1.0981, + "step": 217 + }, + { + "epoch": 0.13671997491376608, + "grad_norm": 0.42687904834747314, + "learning_rate": 2.7424749163879597e-05, + "loss": 1.0995, + "step": 218 + }, + { + "epoch": 0.13734713076199437, + "grad_norm": 0.3768168091773987, + "learning_rate": 2.7090301003344482e-05, + "loss": 0.9547, + "step": 219 + }, + { + "epoch": 0.13797428661022265, + "grad_norm": 0.44866129755973816, + "learning_rate": 2.6755852842809364e-05, + "loss": 1.0523, + "step": 220 + }, + { + "epoch": 0.13860144245845094, + "grad_norm": 0.40763071179389954, + "learning_rate": 2.6421404682274246e-05, + "loss": 1.0735, + "step": 221 + }, + { + "epoch": 0.13922859830667922, + "grad_norm": 0.46890097856521606, + "learning_rate": 2.608695652173913e-05, + "loss": 1.0587, + "step": 222 + }, + { + "epoch": 0.1398557541549075, + "grad_norm": 0.415772408246994, + "learning_rate": 2.5752508361204013e-05, + "loss": 1.0704, + "step": 223 + }, + { + "epoch": 0.14048291000313579, + "grad_norm": 0.4222339987754822, + "learning_rate": 2.54180602006689e-05, + "loss": 1.0769, + "step": 224 + }, + { + "epoch": 0.14111006585136407, + "grad_norm": 0.4234933853149414, + "learning_rate": 2.508361204013378e-05, + "loss": 1.0896, + "step": 225 + }, + { + "epoch": 0.14173722169959235, + "grad_norm": 0.41926753520965576, + "learning_rate": 2.4749163879598663e-05, + "loss": 1.1036, + "step": 226 + }, + { + "epoch": 0.14236437754782064, + "grad_norm": 0.42077013850212097, + "learning_rate": 2.4414715719063548e-05, + "loss": 1.0231, + "step": 227 + }, + { + "epoch": 0.14299153339604892, + "grad_norm": 0.43312421441078186, + "learning_rate": 2.408026755852843e-05, + "loss": 1.0829, + "step": 228 + }, + { + "epoch": 0.1436186892442772, + "grad_norm": 0.42710229754447937, + "learning_rate": 2.3745819397993312e-05, + "loss": 1.0389, + "step": 229 + }, + { + "epoch": 0.1442458450925055, + "grad_norm": 0.4713851511478424, + "learning_rate": 2.3411371237458197e-05, + "loss": 1.1172, + "step": 230 + }, + { + "epoch": 0.14487300094073377, + "grad_norm": 0.44223445653915405, + "learning_rate": 2.307692307692308e-05, + "loss": 1.0541, + "step": 231 + }, + { + "epoch": 0.14550015678896205, + "grad_norm": 0.41953906416893005, + "learning_rate": 2.274247491638796e-05, + "loss": 1.0799, + "step": 232 + }, + { + "epoch": 0.14612731263719034, + "grad_norm": 0.4237317144870758, + "learning_rate": 2.2408026755852843e-05, + "loss": 1.0088, + "step": 233 + }, + { + "epoch": 0.14675446848541862, + "grad_norm": 0.46339884400367737, + "learning_rate": 2.2073578595317725e-05, + "loss": 1.0181, + "step": 234 + }, + { + "epoch": 0.1473816243336469, + "grad_norm": 0.41633766889572144, + "learning_rate": 2.173913043478261e-05, + "loss": 1.0265, + "step": 235 + }, + { + "epoch": 0.1480087801818752, + "grad_norm": 0.407678484916687, + "learning_rate": 2.1404682274247492e-05, + "loss": 0.9992, + "step": 236 + }, + { + "epoch": 0.14863593603010347, + "grad_norm": 0.44348976016044617, + "learning_rate": 2.1070234113712374e-05, + "loss": 1.0769, + "step": 237 + }, + { + "epoch": 0.14926309187833176, + "grad_norm": 0.42968854308128357, + "learning_rate": 2.073578595317726e-05, + "loss": 1.0618, + "step": 238 + }, + { + "epoch": 0.14989024772656004, + "grad_norm": 0.43641427159309387, + "learning_rate": 2.040133779264214e-05, + "loss": 1.0511, + "step": 239 + }, + { + "epoch": 0.15051740357478832, + "grad_norm": 0.4331563413143158, + "learning_rate": 2.0066889632107023e-05, + "loss": 0.9729, + "step": 240 + }, + { + "epoch": 0.15114455942301663, + "grad_norm": 0.3954283893108368, + "learning_rate": 1.973244147157191e-05, + "loss": 0.9756, + "step": 241 + }, + { + "epoch": 0.15177171527124492, + "grad_norm": 0.44188499450683594, + "learning_rate": 1.939799331103679e-05, + "loss": 1.0855, + "step": 242 + }, + { + "epoch": 0.1523988711194732, + "grad_norm": 0.46987831592559814, + "learning_rate": 1.9063545150501676e-05, + "loss": 1.1184, + "step": 243 + }, + { + "epoch": 0.15302602696770148, + "grad_norm": 0.4148559868335724, + "learning_rate": 1.8729096989966554e-05, + "loss": 0.9811, + "step": 244 + }, + { + "epoch": 0.15365318281592977, + "grad_norm": 0.439791738986969, + "learning_rate": 1.8394648829431436e-05, + "loss": 0.9971, + "step": 245 + }, + { + "epoch": 0.15428033866415805, + "grad_norm": 0.4216610789299011, + "learning_rate": 1.806020066889632e-05, + "loss": 1.0084, + "step": 246 + }, + { + "epoch": 0.15490749451238633, + "grad_norm": 0.5165313482284546, + "learning_rate": 1.7725752508361204e-05, + "loss": 1.224, + "step": 247 + }, + { + "epoch": 0.15553465036061462, + "grad_norm": 0.4595968425273895, + "learning_rate": 1.739130434782609e-05, + "loss": 1.1169, + "step": 248 + }, + { + "epoch": 0.1561618062088429, + "grad_norm": 0.40049493312835693, + "learning_rate": 1.705685618729097e-05, + "loss": 1.0533, + "step": 249 + }, + { + "epoch": 0.15678896205707119, + "grad_norm": 0.4440176486968994, + "learning_rate": 1.6722408026755853e-05, + "loss": 1.1044, + "step": 250 + }, + { + "epoch": 0.15741611790529947, + "grad_norm": 0.42966777086257935, + "learning_rate": 1.6387959866220738e-05, + "loss": 0.9887, + "step": 251 + }, + { + "epoch": 0.15804327375352775, + "grad_norm": 0.3809144198894501, + "learning_rate": 1.605351170568562e-05, + "loss": 1.0007, + "step": 252 + }, + { + "epoch": 0.15867042960175604, + "grad_norm": 0.42232662439346313, + "learning_rate": 1.5719063545150502e-05, + "loss": 1.0061, + "step": 253 + }, + { + "epoch": 0.15929758544998432, + "grad_norm": 0.43439656496047974, + "learning_rate": 1.5384615384615387e-05, + "loss": 1.0307, + "step": 254 + }, + { + "epoch": 0.1599247412982126, + "grad_norm": 0.5382441282272339, + "learning_rate": 1.5050167224080269e-05, + "loss": 1.0649, + "step": 255 + }, + { + "epoch": 0.1605518971464409, + "grad_norm": 0.44557082653045654, + "learning_rate": 1.4715719063545153e-05, + "loss": 1.0415, + "step": 256 + }, + { + "epoch": 0.16117905299466917, + "grad_norm": 0.41304898262023926, + "learning_rate": 1.4381270903010033e-05, + "loss": 1.0008, + "step": 257 + }, + { + "epoch": 0.16180620884289745, + "grad_norm": 0.3917330503463745, + "learning_rate": 1.4046822742474917e-05, + "loss": 1.0225, + "step": 258 + }, + { + "epoch": 0.16243336469112574, + "grad_norm": 0.43536174297332764, + "learning_rate": 1.3712374581939799e-05, + "loss": 1.0143, + "step": 259 + }, + { + "epoch": 0.16306052053935402, + "grad_norm": 0.4426629841327667, + "learning_rate": 1.3377926421404682e-05, + "loss": 1.0585, + "step": 260 + }, + { + "epoch": 0.1636876763875823, + "grad_norm": 0.4304388463497162, + "learning_rate": 1.3043478260869566e-05, + "loss": 1.0495, + "step": 261 + }, + { + "epoch": 0.1643148322358106, + "grad_norm": 0.5129019021987915, + "learning_rate": 1.270903010033445e-05, + "loss": 1.0578, + "step": 262 + }, + { + "epoch": 0.16494198808403887, + "grad_norm": 0.44399893283843994, + "learning_rate": 1.2374581939799331e-05, + "loss": 0.9971, + "step": 263 + }, + { + "epoch": 0.16556914393226718, + "grad_norm": 0.4635840356349945, + "learning_rate": 1.2040133779264215e-05, + "loss": 1.1255, + "step": 264 + }, + { + "epoch": 0.16619629978049547, + "grad_norm": 0.4606925845146179, + "learning_rate": 1.1705685618729099e-05, + "loss": 1.0031, + "step": 265 + }, + { + "epoch": 0.16682345562872375, + "grad_norm": 0.4620150923728943, + "learning_rate": 1.137123745819398e-05, + "loss": 1.0969, + "step": 266 + }, + { + "epoch": 0.16745061147695203, + "grad_norm": 0.47829005122184753, + "learning_rate": 1.1036789297658862e-05, + "loss": 1.0382, + "step": 267 + }, + { + "epoch": 0.16807776732518032, + "grad_norm": 0.41557204723358154, + "learning_rate": 1.0702341137123746e-05, + "loss": 1.0763, + "step": 268 + }, + { + "epoch": 0.1687049231734086, + "grad_norm": 0.4283234775066376, + "learning_rate": 1.036789297658863e-05, + "loss": 1.0327, + "step": 269 + }, + { + "epoch": 0.16933207902163688, + "grad_norm": 0.4353739619255066, + "learning_rate": 1.0033444816053512e-05, + "loss": 1.0306, + "step": 270 + }, + { + "epoch": 0.16995923486986517, + "grad_norm": 0.4430319666862488, + "learning_rate": 9.698996655518395e-06, + "loss": 0.9921, + "step": 271 + }, + { + "epoch": 0.17058639071809345, + "grad_norm": 0.40864184498786926, + "learning_rate": 9.364548494983277e-06, + "loss": 0.9795, + "step": 272 + }, + { + "epoch": 0.17121354656632173, + "grad_norm": 0.5019369721412659, + "learning_rate": 9.03010033444816e-06, + "loss": 1.039, + "step": 273 + }, + { + "epoch": 0.17184070241455002, + "grad_norm": 0.47258105874061584, + "learning_rate": 8.695652173913044e-06, + "loss": 1.0749, + "step": 274 + }, + { + "epoch": 0.1724678582627783, + "grad_norm": 0.4339354634284973, + "learning_rate": 8.361204013377926e-06, + "loss": 1.1275, + "step": 275 + }, + { + "epoch": 0.17309501411100658, + "grad_norm": 0.4404523968696594, + "learning_rate": 8.02675585284281e-06, + "loss": 1.073, + "step": 276 + }, + { + "epoch": 0.17372216995923487, + "grad_norm": 0.4684053659439087, + "learning_rate": 7.692307692307694e-06, + "loss": 1.1251, + "step": 277 + }, + { + "epoch": 0.17434932580746315, + "grad_norm": 0.47471368312835693, + "learning_rate": 7.357859531772576e-06, + "loss": 1.0472, + "step": 278 + }, + { + "epoch": 0.17497648165569143, + "grad_norm": 0.4690254330635071, + "learning_rate": 7.023411371237458e-06, + "loss": 1.0877, + "step": 279 + }, + { + "epoch": 0.17560363750391972, + "grad_norm": 0.4029237627983093, + "learning_rate": 6.688963210702341e-06, + "loss": 1.0563, + "step": 280 + }, + { + "epoch": 0.176230793352148, + "grad_norm": 0.39394280314445496, + "learning_rate": 6.354515050167225e-06, + "loss": 0.9945, + "step": 281 + }, + { + "epoch": 0.17685794920037629, + "grad_norm": 0.45974263548851013, + "learning_rate": 6.0200668896321075e-06, + "loss": 1.014, + "step": 282 + }, + { + "epoch": 0.17748510504860457, + "grad_norm": 0.46561309695243835, + "learning_rate": 5.68561872909699e-06, + "loss": 1.0119, + "step": 283 + }, + { + "epoch": 0.17811226089683285, + "grad_norm": 0.4529838562011719, + "learning_rate": 5.351170568561873e-06, + "loss": 1.0669, + "step": 284 + }, + { + "epoch": 0.17873941674506114, + "grad_norm": 0.4240736961364746, + "learning_rate": 5.016722408026756e-06, + "loss": 1.066, + "step": 285 + }, + { + "epoch": 0.17936657259328942, + "grad_norm": 0.43232661485671997, + "learning_rate": 4.682274247491639e-06, + "loss": 1.1118, + "step": 286 + }, + { + "epoch": 0.17999372844151773, + "grad_norm": 0.42727506160736084, + "learning_rate": 4.347826086956522e-06, + "loss": 1.1266, + "step": 287 + }, + { + "epoch": 0.18062088428974601, + "grad_norm": 0.44371864199638367, + "learning_rate": 4.013377926421405e-06, + "loss": 1.0074, + "step": 288 + }, + { + "epoch": 0.1812480401379743, + "grad_norm": 0.4051826596260071, + "learning_rate": 3.678929765886288e-06, + "loss": 0.9974, + "step": 289 + }, + { + "epoch": 0.18187519598620258, + "grad_norm": 0.4324273467063904, + "learning_rate": 3.3444816053511705e-06, + "loss": 1.0931, + "step": 290 + }, + { + "epoch": 0.18250235183443086, + "grad_norm": 0.45636269450187683, + "learning_rate": 3.0100334448160537e-06, + "loss": 1.0332, + "step": 291 + }, + { + "epoch": 0.18312950768265915, + "grad_norm": 0.43998774886131287, + "learning_rate": 2.6755852842809365e-06, + "loss": 1.0825, + "step": 292 + }, + { + "epoch": 0.18375666353088743, + "grad_norm": 0.38203734159469604, + "learning_rate": 2.3411371237458193e-06, + "loss": 1.02, + "step": 293 + }, + { + "epoch": 0.18438381937911572, + "grad_norm": 0.44293034076690674, + "learning_rate": 2.0066889632107025e-06, + "loss": 1.0437, + "step": 294 + }, + { + "epoch": 0.185010975227344, + "grad_norm": 0.45386576652526855, + "learning_rate": 1.6722408026755853e-06, + "loss": 1.0495, + "step": 295 + }, + { + "epoch": 0.18563813107557228, + "grad_norm": 0.43592047691345215, + "learning_rate": 1.3377926421404683e-06, + "loss": 1.0225, + "step": 296 + }, + { + "epoch": 0.18626528692380057, + "grad_norm": 0.4214257299900055, + "learning_rate": 1.0033444816053512e-06, + "loss": 1.0383, + "step": 297 + }, + { + "epoch": 0.18689244277202885, + "grad_norm": 0.4367026388645172, + "learning_rate": 6.688963210702341e-07, + "loss": 1.0466, + "step": 298 + }, + { + "epoch": 0.18751959862025713, + "grad_norm": 0.4570874273777008, + "learning_rate": 3.3444816053511706e-07, + "loss": 1.0191, + "step": 299 + }, + { + "epoch": 0.18814675446848542, + "grad_norm": 0.43601682782173157, + "learning_rate": 0.0, + "loss": 1.1164, + "step": 300 + } + ], + "logging_steps": 1, + "max_steps": 300, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 3.280261144756224e+16, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/llama_English_German/checkpoint-300/training_args.bin b/llama_English_German/checkpoint-300/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..89435c7abffd2f1d2361506be73943321fb12891 --- /dev/null +++ b/llama_English_German/checkpoint-300/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc5cadd0641017aeabf8a408ad72988c6a1f74f149c9ba61616ef6a2796ba097 +size 5624 diff --git a/llama_English_Vietnamese/checkpoint-300/README.md b/llama_English_Vietnamese/checkpoint-300/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ec91fdd050e2a5613ef04bb6c8fc77a63b8b10a1 --- /dev/null +++ b/llama_English_Vietnamese/checkpoint-300/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.14.0 \ No newline at end of file diff --git a/llama_English_Vietnamese/checkpoint-300/adapter_config.json b/llama_English_Vietnamese/checkpoint-300/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3bca6e7bc6542bd07125c7f2738b405c9f4c55b4 --- /dev/null +++ b/llama_English_Vietnamese/checkpoint-300/adapter_config.json @@ -0,0 +1,37 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit", + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "down_proj", + "v_proj", + "k_proj", + "up_proj", + "gate_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/llama_English_Vietnamese/checkpoint-300/adapter_model.safetensors b/llama_English_Vietnamese/checkpoint-300/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..806fa1237e99d607642a1ca31447b7a5969d2777 --- /dev/null +++ b/llama_English_Vietnamese/checkpoint-300/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70e86afa107bbb958972b9df9b1e220fbddee54acd6c39aa725e1595370011a7 +size 167832240 diff --git a/llama_English_Vietnamese/checkpoint-300/optimizer.pt b/llama_English_Vietnamese/checkpoint-300/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..eaebe9e60e6ae0ed6332c6e63e97ccc4f8576d66 --- /dev/null +++ b/llama_English_Vietnamese/checkpoint-300/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1cc54da49bf17d47345f30701814df29bce58434fd7ecc5c41e6655d60f9c5fd +size 85723732 diff --git a/llama_English_Vietnamese/checkpoint-300/rng_state.pth b/llama_English_Vietnamese/checkpoint-300/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1f8291cd6ce87668b786a72f3e93d072fbe54902 --- /dev/null +++ b/llama_English_Vietnamese/checkpoint-300/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4c917636c7a58af68a29056522a757e9f9b99005b776641aa157c536967817d +size 14244 diff --git a/llama_English_Vietnamese/checkpoint-300/scheduler.pt b/llama_English_Vietnamese/checkpoint-300/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6e209dcc25b944d4b85ca13bea87ea706231c655 --- /dev/null +++ b/llama_English_Vietnamese/checkpoint-300/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec7445dd881aa0e896fdc6ef61cf9c541f4299feaa6850ba7cac238afd6649f3 +size 1064 diff --git a/llama_English_Vietnamese/checkpoint-300/special_tokens_map.json b/llama_English_Vietnamese/checkpoint-300/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/llama_English_Vietnamese/checkpoint-300/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/llama_English_Vietnamese/checkpoint-300/tokenizer.json b/llama_English_Vietnamese/checkpoint-300/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/llama_English_Vietnamese/checkpoint-300/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/llama_English_Vietnamese/checkpoint-300/tokenizer_config.json b/llama_English_Vietnamese/checkpoint-300/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..447bd30f21e8c20e9e6497d6cfc650a3881e26a6 --- /dev/null +++ b/llama_English_Vietnamese/checkpoint-300/tokenizer_config.json @@ -0,0 +1,2064 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/llama_English_Vietnamese/checkpoint-300/trainer_state.json b/llama_English_Vietnamese/checkpoint-300/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..4666e07dccb24e9dfe8b551ed3187a356d40cf97 --- /dev/null +++ b/llama_English_Vietnamese/checkpoint-300/trainer_state.json @@ -0,0 +1,2133 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.18814675446848542, + "eval_steps": 500, + "global_step": 300, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0006271558482282847, + "grad_norm": 1.1329874992370605, + "learning_rate": 0.0001, + "loss": 2.5314, + "step": 1 + }, + { + "epoch": 0.0012543116964565694, + "grad_norm": 1.0171780586242676, + "learning_rate": 9.966555183946489e-05, + "loss": 2.291, + "step": 2 + }, + { + "epoch": 0.0018814675446848542, + "grad_norm": 0.9779753088951111, + "learning_rate": 9.933110367892977e-05, + "loss": 2.5455, + "step": 3 + }, + { + "epoch": 0.002508623392913139, + "grad_norm": 1.0696159601211548, + "learning_rate": 9.899665551839465e-05, + "loss": 2.385, + "step": 4 + }, + { + "epoch": 0.0031357792411414237, + "grad_norm": 1.1393558979034424, + "learning_rate": 9.866220735785953e-05, + "loss": 2.22, + "step": 5 + }, + { + "epoch": 0.0037629350893697085, + "grad_norm": 1.214498519897461, + "learning_rate": 9.832775919732441e-05, + "loss": 1.8638, + "step": 6 + }, + { + "epoch": 0.004390090937597993, + "grad_norm": 1.7331534624099731, + "learning_rate": 9.799331103678931e-05, + "loss": 1.7735, + "step": 7 + }, + { + "epoch": 0.005017246785826278, + "grad_norm": 1.0437425374984741, + "learning_rate": 9.765886287625419e-05, + "loss": 1.5449, + "step": 8 + }, + { + "epoch": 0.005644402634054563, + "grad_norm": 0.9133126139640808, + "learning_rate": 9.732441471571907e-05, + "loss": 1.5503, + "step": 9 + }, + { + "epoch": 0.006271558482282847, + "grad_norm": 0.8929418325424194, + "learning_rate": 9.698996655518396e-05, + "loss": 1.5382, + "step": 10 + }, + { + "epoch": 0.006898714330511132, + "grad_norm": 0.7974631190299988, + "learning_rate": 9.665551839464884e-05, + "loss": 1.3685, + "step": 11 + }, + { + "epoch": 0.007525870178739417, + "grad_norm": 0.6889926791191101, + "learning_rate": 9.632107023411372e-05, + "loss": 1.4076, + "step": 12 + }, + { + "epoch": 0.008153026026967701, + "grad_norm": 0.463778555393219, + "learning_rate": 9.59866220735786e-05, + "loss": 1.3502, + "step": 13 + }, + { + "epoch": 0.008780181875195987, + "grad_norm": 0.4665314853191376, + "learning_rate": 9.565217391304348e-05, + "loss": 1.3215, + "step": 14 + }, + { + "epoch": 0.00940733772342427, + "grad_norm": 0.49210086464881897, + "learning_rate": 9.531772575250837e-05, + "loss": 1.2521, + "step": 15 + }, + { + "epoch": 0.010034493571652555, + "grad_norm": 0.5524080395698547, + "learning_rate": 9.498327759197325e-05, + "loss": 1.3825, + "step": 16 + }, + { + "epoch": 0.01066164941988084, + "grad_norm": 0.5872707366943359, + "learning_rate": 9.464882943143813e-05, + "loss": 1.2788, + "step": 17 + }, + { + "epoch": 0.011288805268109126, + "grad_norm": 0.6367695331573486, + "learning_rate": 9.431438127090302e-05, + "loss": 1.2561, + "step": 18 + }, + { + "epoch": 0.01191596111633741, + "grad_norm": 0.5402896404266357, + "learning_rate": 9.39799331103679e-05, + "loss": 1.3212, + "step": 19 + }, + { + "epoch": 0.012543116964565695, + "grad_norm": 0.5474902987480164, + "learning_rate": 9.364548494983279e-05, + "loss": 1.2272, + "step": 20 + }, + { + "epoch": 0.01317027281279398, + "grad_norm": 0.5391371846199036, + "learning_rate": 9.331103678929767e-05, + "loss": 1.2131, + "step": 21 + }, + { + "epoch": 0.013797428661022263, + "grad_norm": 0.6083835959434509, + "learning_rate": 9.297658862876255e-05, + "loss": 1.2583, + "step": 22 + }, + { + "epoch": 0.014424584509250549, + "grad_norm": 0.625644326210022, + "learning_rate": 9.264214046822743e-05, + "loss": 1.2724, + "step": 23 + }, + { + "epoch": 0.015051740357478834, + "grad_norm": 0.537401020526886, + "learning_rate": 9.230769230769232e-05, + "loss": 1.2605, + "step": 24 + }, + { + "epoch": 0.01567889620570712, + "grad_norm": 0.6273273825645447, + "learning_rate": 9.19732441471572e-05, + "loss": 1.3261, + "step": 25 + }, + { + "epoch": 0.016306052053935403, + "grad_norm": 0.637491762638092, + "learning_rate": 9.163879598662207e-05, + "loss": 1.1808, + "step": 26 + }, + { + "epoch": 0.016933207902163686, + "grad_norm": 0.6607061624526978, + "learning_rate": 9.130434782608696e-05, + "loss": 1.2826, + "step": 27 + }, + { + "epoch": 0.017560363750391973, + "grad_norm": 0.7268609404563904, + "learning_rate": 9.096989966555184e-05, + "loss": 1.2573, + "step": 28 + }, + { + "epoch": 0.018187519598620257, + "grad_norm": 0.7763082385063171, + "learning_rate": 9.063545150501673e-05, + "loss": 1.266, + "step": 29 + }, + { + "epoch": 0.01881467544684854, + "grad_norm": 0.72670978307724, + "learning_rate": 9.030100334448161e-05, + "loss": 1.1736, + "step": 30 + }, + { + "epoch": 0.019441831295076827, + "grad_norm": 0.7848783731460571, + "learning_rate": 8.996655518394649e-05, + "loss": 1.1741, + "step": 31 + }, + { + "epoch": 0.02006898714330511, + "grad_norm": 0.8250067234039307, + "learning_rate": 8.963210702341137e-05, + "loss": 1.0689, + "step": 32 + }, + { + "epoch": 0.020696142991533398, + "grad_norm": 0.8322198390960693, + "learning_rate": 8.929765886287625e-05, + "loss": 1.1708, + "step": 33 + }, + { + "epoch": 0.02132329883976168, + "grad_norm": 0.8828574419021606, + "learning_rate": 8.896321070234114e-05, + "loss": 1.1479, + "step": 34 + }, + { + "epoch": 0.021950454687989965, + "grad_norm": 0.8523293137550354, + "learning_rate": 8.862876254180602e-05, + "loss": 1.1957, + "step": 35 + }, + { + "epoch": 0.022577610536218252, + "grad_norm": 0.8329194784164429, + "learning_rate": 8.82943143812709e-05, + "loss": 1.2659, + "step": 36 + }, + { + "epoch": 0.023204766384446535, + "grad_norm": 0.5968635082244873, + "learning_rate": 8.795986622073578e-05, + "loss": 1.0579, + "step": 37 + }, + { + "epoch": 0.02383192223267482, + "grad_norm": 0.4709320068359375, + "learning_rate": 8.762541806020068e-05, + "loss": 1.1495, + "step": 38 + }, + { + "epoch": 0.024459078080903106, + "grad_norm": 0.4913414418697357, + "learning_rate": 8.729096989966556e-05, + "loss": 1.2192, + "step": 39 + }, + { + "epoch": 0.02508623392913139, + "grad_norm": 0.4877021014690399, + "learning_rate": 8.695652173913044e-05, + "loss": 1.3065, + "step": 40 + }, + { + "epoch": 0.025713389777359673, + "grad_norm": 0.3754381835460663, + "learning_rate": 8.662207357859532e-05, + "loss": 1.1311, + "step": 41 + }, + { + "epoch": 0.02634054562558796, + "grad_norm": 0.4108287990093231, + "learning_rate": 8.62876254180602e-05, + "loss": 1.2202, + "step": 42 + }, + { + "epoch": 0.026967701473816243, + "grad_norm": 0.40012553334236145, + "learning_rate": 8.595317725752509e-05, + "loss": 1.1415, + "step": 43 + }, + { + "epoch": 0.027594857322044527, + "grad_norm": 0.39889755845069885, + "learning_rate": 8.561872909698997e-05, + "loss": 1.1504, + "step": 44 + }, + { + "epoch": 0.028222013170272814, + "grad_norm": 0.46676573157310486, + "learning_rate": 8.528428093645485e-05, + "loss": 1.1806, + "step": 45 + }, + { + "epoch": 0.028849169018501097, + "grad_norm": 0.4341486692428589, + "learning_rate": 8.494983277591973e-05, + "loss": 1.1706, + "step": 46 + }, + { + "epoch": 0.02947632486672938, + "grad_norm": 0.5124387145042419, + "learning_rate": 8.461538461538461e-05, + "loss": 1.1873, + "step": 47 + }, + { + "epoch": 0.030103480714957668, + "grad_norm": 0.40826505422592163, + "learning_rate": 8.42809364548495e-05, + "loss": 1.0895, + "step": 48 + }, + { + "epoch": 0.03073063656318595, + "grad_norm": 0.4583950936794281, + "learning_rate": 8.394648829431439e-05, + "loss": 1.2264, + "step": 49 + }, + { + "epoch": 0.03135779241141424, + "grad_norm": 0.4081198275089264, + "learning_rate": 8.361204013377927e-05, + "loss": 1.1446, + "step": 50 + }, + { + "epoch": 0.03198494825964252, + "grad_norm": 0.41521233320236206, + "learning_rate": 8.327759197324416e-05, + "loss": 1.1491, + "step": 51 + }, + { + "epoch": 0.032612104107870805, + "grad_norm": 0.40511155128479004, + "learning_rate": 8.294314381270904e-05, + "loss": 1.1759, + "step": 52 + }, + { + "epoch": 0.03323925995609909, + "grad_norm": 0.5162859559059143, + "learning_rate": 8.260869565217392e-05, + "loss": 1.1583, + "step": 53 + }, + { + "epoch": 0.03386641580432737, + "grad_norm": 0.465262770652771, + "learning_rate": 8.22742474916388e-05, + "loss": 1.2034, + "step": 54 + }, + { + "epoch": 0.03449357165255566, + "grad_norm": 0.42902040481567383, + "learning_rate": 8.193979933110368e-05, + "loss": 1.172, + "step": 55 + }, + { + "epoch": 0.035120727500783946, + "grad_norm": 0.42888331413269043, + "learning_rate": 8.160535117056857e-05, + "loss": 1.082, + "step": 56 + }, + { + "epoch": 0.03574788334901223, + "grad_norm": 0.49385911226272583, + "learning_rate": 8.127090301003345e-05, + "loss": 1.1224, + "step": 57 + }, + { + "epoch": 0.036375039197240513, + "grad_norm": 0.47527334094047546, + "learning_rate": 8.093645484949833e-05, + "loss": 1.1926, + "step": 58 + }, + { + "epoch": 0.0370021950454688, + "grad_norm": 0.5191435813903809, + "learning_rate": 8.060200668896321e-05, + "loss": 1.2253, + "step": 59 + }, + { + "epoch": 0.03762935089369708, + "grad_norm": 0.48137006163597107, + "learning_rate": 8.026755852842809e-05, + "loss": 1.17, + "step": 60 + }, + { + "epoch": 0.03825650674192537, + "grad_norm": 0.44135233759880066, + "learning_rate": 7.993311036789299e-05, + "loss": 1.1388, + "step": 61 + }, + { + "epoch": 0.038883662590153655, + "grad_norm": 0.46292147040367126, + "learning_rate": 7.959866220735787e-05, + "loss": 1.1531, + "step": 62 + }, + { + "epoch": 0.03951081843838194, + "grad_norm": 0.4255847930908203, + "learning_rate": 7.926421404682275e-05, + "loss": 1.1398, + "step": 63 + }, + { + "epoch": 0.04013797428661022, + "grad_norm": 0.4323138892650604, + "learning_rate": 7.892976588628763e-05, + "loss": 1.1635, + "step": 64 + }, + { + "epoch": 0.040765130134838505, + "grad_norm": 0.4316229522228241, + "learning_rate": 7.859531772575252e-05, + "loss": 1.1673, + "step": 65 + }, + { + "epoch": 0.041392285983066796, + "grad_norm": 0.42433494329452515, + "learning_rate": 7.82608695652174e-05, + "loss": 1.085, + "step": 66 + }, + { + "epoch": 0.04201944183129508, + "grad_norm": 0.42199933528900146, + "learning_rate": 7.792642140468228e-05, + "loss": 1.1098, + "step": 67 + }, + { + "epoch": 0.04264659767952336, + "grad_norm": 0.4090598523616791, + "learning_rate": 7.759197324414716e-05, + "loss": 1.0722, + "step": 68 + }, + { + "epoch": 0.043273753527751646, + "grad_norm": 0.46552446484565735, + "learning_rate": 7.725752508361204e-05, + "loss": 1.1798, + "step": 69 + }, + { + "epoch": 0.04390090937597993, + "grad_norm": 0.4066402018070221, + "learning_rate": 7.692307692307693e-05, + "loss": 1.0683, + "step": 70 + }, + { + "epoch": 0.04452806522420821, + "grad_norm": 0.46165624260902405, + "learning_rate": 7.658862876254181e-05, + "loss": 1.0879, + "step": 71 + }, + { + "epoch": 0.045155221072436504, + "grad_norm": 0.4765890836715698, + "learning_rate": 7.62541806020067e-05, + "loss": 1.17, + "step": 72 + }, + { + "epoch": 0.04578237692066479, + "grad_norm": 0.46825796365737915, + "learning_rate": 7.591973244147159e-05, + "loss": 1.1548, + "step": 73 + }, + { + "epoch": 0.04640953276889307, + "grad_norm": 0.42665427923202515, + "learning_rate": 7.558528428093647e-05, + "loss": 1.2517, + "step": 74 + }, + { + "epoch": 0.047036688617121354, + "grad_norm": 0.4694182574748993, + "learning_rate": 7.525083612040135e-05, + "loss": 1.1806, + "step": 75 + }, + { + "epoch": 0.04766384446534964, + "grad_norm": 0.45235225558280945, + "learning_rate": 7.491638795986622e-05, + "loss": 1.1515, + "step": 76 + }, + { + "epoch": 0.04829100031357792, + "grad_norm": 0.4057731628417969, + "learning_rate": 7.45819397993311e-05, + "loss": 1.0757, + "step": 77 + }, + { + "epoch": 0.04891815616180621, + "grad_norm": 0.44630780816078186, + "learning_rate": 7.424749163879598e-05, + "loss": 1.1323, + "step": 78 + }, + { + "epoch": 0.049545312010034495, + "grad_norm": 0.41524800658226013, + "learning_rate": 7.391304347826086e-05, + "loss": 1.167, + "step": 79 + }, + { + "epoch": 0.05017246785826278, + "grad_norm": 0.46300187706947327, + "learning_rate": 7.357859531772575e-05, + "loss": 1.1519, + "step": 80 + }, + { + "epoch": 0.05079962370649106, + "grad_norm": 0.4147876799106598, + "learning_rate": 7.324414715719064e-05, + "loss": 1.1566, + "step": 81 + }, + { + "epoch": 0.051426779554719346, + "grad_norm": 0.44954177737236023, + "learning_rate": 7.290969899665552e-05, + "loss": 1.1737, + "step": 82 + }, + { + "epoch": 0.05205393540294763, + "grad_norm": 0.45056983828544617, + "learning_rate": 7.25752508361204e-05, + "loss": 1.1597, + "step": 83 + }, + { + "epoch": 0.05268109125117592, + "grad_norm": 0.5045411586761475, + "learning_rate": 7.224080267558529e-05, + "loss": 1.1294, + "step": 84 + }, + { + "epoch": 0.0533082470994042, + "grad_norm": 0.5283576846122742, + "learning_rate": 7.190635451505017e-05, + "loss": 1.2078, + "step": 85 + }, + { + "epoch": 0.05393540294763249, + "grad_norm": 0.434832900762558, + "learning_rate": 7.157190635451505e-05, + "loss": 1.1519, + "step": 86 + }, + { + "epoch": 0.05456255879586077, + "grad_norm": 0.4940570294857025, + "learning_rate": 7.123745819397993e-05, + "loss": 1.2029, + "step": 87 + }, + { + "epoch": 0.055189714644089054, + "grad_norm": 0.47406020760536194, + "learning_rate": 7.090301003344481e-05, + "loss": 1.095, + "step": 88 + }, + { + "epoch": 0.055816870492317344, + "grad_norm": 0.46511971950531006, + "learning_rate": 7.05685618729097e-05, + "loss": 1.1762, + "step": 89 + }, + { + "epoch": 0.05644402634054563, + "grad_norm": 0.47649258375167847, + "learning_rate": 7.023411371237458e-05, + "loss": 1.1546, + "step": 90 + }, + { + "epoch": 0.05707118218877391, + "grad_norm": 0.4064464867115021, + "learning_rate": 6.989966555183946e-05, + "loss": 1.0969, + "step": 91 + }, + { + "epoch": 0.057698338037002195, + "grad_norm": 0.4659188389778137, + "learning_rate": 6.956521739130436e-05, + "loss": 1.1868, + "step": 92 + }, + { + "epoch": 0.05832549388523048, + "grad_norm": 0.4586854577064514, + "learning_rate": 6.923076923076924e-05, + "loss": 1.1075, + "step": 93 + }, + { + "epoch": 0.05895264973345876, + "grad_norm": 0.4805111289024353, + "learning_rate": 6.889632107023412e-05, + "loss": 1.1154, + "step": 94 + }, + { + "epoch": 0.05957980558168705, + "grad_norm": 0.38674503564834595, + "learning_rate": 6.8561872909699e-05, + "loss": 1.0882, + "step": 95 + }, + { + "epoch": 0.060206961429915336, + "grad_norm": 0.4246669411659241, + "learning_rate": 6.822742474916388e-05, + "loss": 1.0373, + "step": 96 + }, + { + "epoch": 0.06083411727814362, + "grad_norm": 0.4012146592140198, + "learning_rate": 6.789297658862876e-05, + "loss": 1.1813, + "step": 97 + }, + { + "epoch": 0.0614612731263719, + "grad_norm": 0.4579727053642273, + "learning_rate": 6.755852842809365e-05, + "loss": 1.1539, + "step": 98 + }, + { + "epoch": 0.062088428974600186, + "grad_norm": 0.44659194350242615, + "learning_rate": 6.722408026755853e-05, + "loss": 1.104, + "step": 99 + }, + { + "epoch": 0.06271558482282848, + "grad_norm": 0.43122145533561707, + "learning_rate": 6.688963210702341e-05, + "loss": 1.1289, + "step": 100 + }, + { + "epoch": 0.06334274067105676, + "grad_norm": 0.40788164734840393, + "learning_rate": 6.655518394648829e-05, + "loss": 1.1314, + "step": 101 + }, + { + "epoch": 0.06396989651928504, + "grad_norm": 0.45048585534095764, + "learning_rate": 6.622073578595317e-05, + "loss": 1.0983, + "step": 102 + }, + { + "epoch": 0.06459705236751333, + "grad_norm": 0.4325787425041199, + "learning_rate": 6.588628762541807e-05, + "loss": 1.0949, + "step": 103 + }, + { + "epoch": 0.06522420821574161, + "grad_norm": 0.5192950367927551, + "learning_rate": 6.555183946488295e-05, + "loss": 1.2203, + "step": 104 + }, + { + "epoch": 0.0658513640639699, + "grad_norm": 0.46830520033836365, + "learning_rate": 6.521739130434783e-05, + "loss": 1.1678, + "step": 105 + }, + { + "epoch": 0.06647851991219818, + "grad_norm": 0.4136311709880829, + "learning_rate": 6.488294314381272e-05, + "loss": 1.0903, + "step": 106 + }, + { + "epoch": 0.06710567576042646, + "grad_norm": 0.5483879446983337, + "learning_rate": 6.45484949832776e-05, + "loss": 1.0997, + "step": 107 + }, + { + "epoch": 0.06773283160865474, + "grad_norm": 0.4444606602191925, + "learning_rate": 6.421404682274248e-05, + "loss": 1.0104, + "step": 108 + }, + { + "epoch": 0.06835998745688304, + "grad_norm": 0.41186246275901794, + "learning_rate": 6.387959866220736e-05, + "loss": 1.0611, + "step": 109 + }, + { + "epoch": 0.06898714330511133, + "grad_norm": 0.47986000776290894, + "learning_rate": 6.354515050167224e-05, + "loss": 1.1197, + "step": 110 + }, + { + "epoch": 0.06961429915333961, + "grad_norm": 0.4698418080806732, + "learning_rate": 6.321070234113713e-05, + "loss": 1.2814, + "step": 111 + }, + { + "epoch": 0.07024145500156789, + "grad_norm": 0.4690100848674774, + "learning_rate": 6.287625418060201e-05, + "loss": 1.1208, + "step": 112 + }, + { + "epoch": 0.07086861084979618, + "grad_norm": 0.5585668683052063, + "learning_rate": 6.254180602006689e-05, + "loss": 1.1289, + "step": 113 + }, + { + "epoch": 0.07149576669802446, + "grad_norm": 0.45809587836265564, + "learning_rate": 6.220735785953178e-05, + "loss": 1.1284, + "step": 114 + }, + { + "epoch": 0.07212292254625274, + "grad_norm": 0.48366600275039673, + "learning_rate": 6.187290969899667e-05, + "loss": 1.145, + "step": 115 + }, + { + "epoch": 0.07275007839448103, + "grad_norm": 0.4505147337913513, + "learning_rate": 6.153846153846155e-05, + "loss": 1.1071, + "step": 116 + }, + { + "epoch": 0.07337723424270931, + "grad_norm": 0.5526301264762878, + "learning_rate": 6.120401337792643e-05, + "loss": 1.2021, + "step": 117 + }, + { + "epoch": 0.0740043900909376, + "grad_norm": 0.44302597641944885, + "learning_rate": 6.086956521739131e-05, + "loss": 1.0973, + "step": 118 + }, + { + "epoch": 0.07463154593916588, + "grad_norm": 0.38735851645469666, + "learning_rate": 6.0535117056856194e-05, + "loss": 0.9937, + "step": 119 + }, + { + "epoch": 0.07525870178739416, + "grad_norm": 0.45709213614463806, + "learning_rate": 6.0200668896321076e-05, + "loss": 1.0586, + "step": 120 + }, + { + "epoch": 0.07588585763562246, + "grad_norm": 0.4670064151287079, + "learning_rate": 5.986622073578596e-05, + "loss": 1.1423, + "step": 121 + }, + { + "epoch": 0.07651301348385074, + "grad_norm": 0.4870319664478302, + "learning_rate": 5.953177257525085e-05, + "loss": 1.149, + "step": 122 + }, + { + "epoch": 0.07714016933207903, + "grad_norm": 0.43948817253112793, + "learning_rate": 5.919732441471573e-05, + "loss": 1.1102, + "step": 123 + }, + { + "epoch": 0.07776732518030731, + "grad_norm": 0.5276496410369873, + "learning_rate": 5.886287625418061e-05, + "loss": 1.179, + "step": 124 + }, + { + "epoch": 0.07839448102853559, + "grad_norm": 0.456938773393631, + "learning_rate": 5.852842809364549e-05, + "loss": 1.0614, + "step": 125 + }, + { + "epoch": 0.07902163687676388, + "grad_norm": 0.48240378499031067, + "learning_rate": 5.819397993311037e-05, + "loss": 1.1562, + "step": 126 + }, + { + "epoch": 0.07964879272499216, + "grad_norm": 0.48209676146507263, + "learning_rate": 5.785953177257525e-05, + "loss": 1.0598, + "step": 127 + }, + { + "epoch": 0.08027594857322044, + "grad_norm": 0.5053116679191589, + "learning_rate": 5.752508361204013e-05, + "loss": 1.2358, + "step": 128 + }, + { + "epoch": 0.08090310442144873, + "grad_norm": 0.48982295393943787, + "learning_rate": 5.7190635451505014e-05, + "loss": 1.1236, + "step": 129 + }, + { + "epoch": 0.08153026026967701, + "grad_norm": 0.473768025636673, + "learning_rate": 5.6856187290969896e-05, + "loss": 1.1223, + "step": 130 + }, + { + "epoch": 0.0821574161179053, + "grad_norm": 0.47272294759750366, + "learning_rate": 5.652173913043478e-05, + "loss": 1.1766, + "step": 131 + }, + { + "epoch": 0.08278457196613359, + "grad_norm": 0.42775315046310425, + "learning_rate": 5.6187290969899666e-05, + "loss": 1.0777, + "step": 132 + }, + { + "epoch": 0.08341172781436187, + "grad_norm": 0.43032482266426086, + "learning_rate": 5.585284280936455e-05, + "loss": 1.0509, + "step": 133 + }, + { + "epoch": 0.08403888366259016, + "grad_norm": 0.4302811324596405, + "learning_rate": 5.551839464882943e-05, + "loss": 1.1031, + "step": 134 + }, + { + "epoch": 0.08466603951081844, + "grad_norm": 0.48152413964271545, + "learning_rate": 5.518394648829431e-05, + "loss": 1.0831, + "step": 135 + }, + { + "epoch": 0.08529319535904673, + "grad_norm": 0.44327837228775024, + "learning_rate": 5.4849498327759194e-05, + "loss": 1.0824, + "step": 136 + }, + { + "epoch": 0.08592035120727501, + "grad_norm": 0.4980611801147461, + "learning_rate": 5.451505016722408e-05, + "loss": 1.0317, + "step": 137 + }, + { + "epoch": 0.08654750705550329, + "grad_norm": 0.5285961031913757, + "learning_rate": 5.4180602006688965e-05, + "loss": 1.0671, + "step": 138 + }, + { + "epoch": 0.08717466290373158, + "grad_norm": 0.47221964597702026, + "learning_rate": 5.384615384615385e-05, + "loss": 1.1313, + "step": 139 + }, + { + "epoch": 0.08780181875195986, + "grad_norm": 0.4523843228816986, + "learning_rate": 5.351170568561873e-05, + "loss": 1.1363, + "step": 140 + }, + { + "epoch": 0.08842897460018814, + "grad_norm": 0.44657716155052185, + "learning_rate": 5.317725752508361e-05, + "loss": 1.0926, + "step": 141 + }, + { + "epoch": 0.08905613044841643, + "grad_norm": 0.4858958423137665, + "learning_rate": 5.284280936454849e-05, + "loss": 1.142, + "step": 142 + }, + { + "epoch": 0.08968328629664471, + "grad_norm": 0.5070914626121521, + "learning_rate": 5.250836120401338e-05, + "loss": 1.1487, + "step": 143 + }, + { + "epoch": 0.09031044214487301, + "grad_norm": 0.5086584687232971, + "learning_rate": 5.217391304347826e-05, + "loss": 1.1316, + "step": 144 + }, + { + "epoch": 0.09093759799310129, + "grad_norm": 0.5099964737892151, + "learning_rate": 5.1839464882943145e-05, + "loss": 1.1523, + "step": 145 + }, + { + "epoch": 0.09156475384132957, + "grad_norm": 0.42420920729637146, + "learning_rate": 5.150501672240803e-05, + "loss": 1.0709, + "step": 146 + }, + { + "epoch": 0.09219190968955786, + "grad_norm": 0.4656766355037689, + "learning_rate": 5.117056856187291e-05, + "loss": 1.0626, + "step": 147 + }, + { + "epoch": 0.09281906553778614, + "grad_norm": 0.44983309507369995, + "learning_rate": 5.08361204013378e-05, + "loss": 1.1076, + "step": 148 + }, + { + "epoch": 0.09344622138601442, + "grad_norm": 0.49652299284935, + "learning_rate": 5.050167224080268e-05, + "loss": 1.158, + "step": 149 + }, + { + "epoch": 0.09407337723424271, + "grad_norm": 0.5079907774925232, + "learning_rate": 5.016722408026756e-05, + "loss": 1.1208, + "step": 150 + }, + { + "epoch": 0.09470053308247099, + "grad_norm": 0.5696375966072083, + "learning_rate": 4.983277591973244e-05, + "loss": 1.1515, + "step": 151 + }, + { + "epoch": 0.09532768893069928, + "grad_norm": 0.44712352752685547, + "learning_rate": 4.9498327759197325e-05, + "loss": 1.045, + "step": 152 + }, + { + "epoch": 0.09595484477892756, + "grad_norm": 0.41328972578048706, + "learning_rate": 4.916387959866221e-05, + "loss": 1.0785, + "step": 153 + }, + { + "epoch": 0.09658200062715584, + "grad_norm": 0.4514102637767792, + "learning_rate": 4.8829431438127096e-05, + "loss": 1.0989, + "step": 154 + }, + { + "epoch": 0.09720915647538414, + "grad_norm": 0.5154950618743896, + "learning_rate": 4.849498327759198e-05, + "loss": 1.1652, + "step": 155 + }, + { + "epoch": 0.09783631232361242, + "grad_norm": 0.4378693997859955, + "learning_rate": 4.816053511705686e-05, + "loss": 1.1153, + "step": 156 + }, + { + "epoch": 0.0984634681718407, + "grad_norm": 0.5264151096343994, + "learning_rate": 4.782608695652174e-05, + "loss": 1.1305, + "step": 157 + }, + { + "epoch": 0.09909062402006899, + "grad_norm": 0.5044124126434326, + "learning_rate": 4.7491638795986624e-05, + "loss": 1.1603, + "step": 158 + }, + { + "epoch": 0.09971777986829727, + "grad_norm": 0.5369151830673218, + "learning_rate": 4.715719063545151e-05, + "loss": 1.0011, + "step": 159 + }, + { + "epoch": 0.10034493571652556, + "grad_norm": 0.4663112163543701, + "learning_rate": 4.6822742474916394e-05, + "loss": 1.1319, + "step": 160 + }, + { + "epoch": 0.10097209156475384, + "grad_norm": 0.497859388589859, + "learning_rate": 4.6488294314381276e-05, + "loss": 1.117, + "step": 161 + }, + { + "epoch": 0.10159924741298212, + "grad_norm": 0.5183404684066772, + "learning_rate": 4.615384615384616e-05, + "loss": 1.1978, + "step": 162 + }, + { + "epoch": 0.10222640326121041, + "grad_norm": 0.45794522762298584, + "learning_rate": 4.581939799331103e-05, + "loss": 1.1036, + "step": 163 + }, + { + "epoch": 0.10285355910943869, + "grad_norm": 0.5128252506256104, + "learning_rate": 4.548494983277592e-05, + "loss": 1.1711, + "step": 164 + }, + { + "epoch": 0.10348071495766697, + "grad_norm": 0.481505423784256, + "learning_rate": 4.5150501672240804e-05, + "loss": 1.1143, + "step": 165 + }, + { + "epoch": 0.10410787080589526, + "grad_norm": 0.48832452297210693, + "learning_rate": 4.4816053511705686e-05, + "loss": 1.1723, + "step": 166 + }, + { + "epoch": 0.10473502665412356, + "grad_norm": 0.4828014373779297, + "learning_rate": 4.448160535117057e-05, + "loss": 1.1016, + "step": 167 + }, + { + "epoch": 0.10536218250235184, + "grad_norm": 0.5178865790367126, + "learning_rate": 4.414715719063545e-05, + "loss": 1.1412, + "step": 168 + }, + { + "epoch": 0.10598933835058012, + "grad_norm": 0.4863995313644409, + "learning_rate": 4.381270903010034e-05, + "loss": 1.1386, + "step": 169 + }, + { + "epoch": 0.1066164941988084, + "grad_norm": 0.48305442929267883, + "learning_rate": 4.347826086956522e-05, + "loss": 1.1376, + "step": 170 + }, + { + "epoch": 0.10724365004703669, + "grad_norm": 0.46624070405960083, + "learning_rate": 4.31438127090301e-05, + "loss": 1.1082, + "step": 171 + }, + { + "epoch": 0.10787080589526497, + "grad_norm": 0.49723443388938904, + "learning_rate": 4.2809364548494984e-05, + "loss": 1.087, + "step": 172 + }, + { + "epoch": 0.10849796174349326, + "grad_norm": 0.47998717427253723, + "learning_rate": 4.2474916387959866e-05, + "loss": 1.1228, + "step": 173 + }, + { + "epoch": 0.10912511759172154, + "grad_norm": 0.4895256459712982, + "learning_rate": 4.214046822742475e-05, + "loss": 1.1387, + "step": 174 + }, + { + "epoch": 0.10975227343994982, + "grad_norm": 0.4830639660358429, + "learning_rate": 4.180602006688964e-05, + "loss": 1.1239, + "step": 175 + }, + { + "epoch": 0.11037942928817811, + "grad_norm": 0.43242347240448, + "learning_rate": 4.147157190635452e-05, + "loss": 1.0437, + "step": 176 + }, + { + "epoch": 0.11100658513640639, + "grad_norm": 0.4875499904155731, + "learning_rate": 4.11371237458194e-05, + "loss": 1.1805, + "step": 177 + }, + { + "epoch": 0.11163374098463469, + "grad_norm": 0.5745497941970825, + "learning_rate": 4.080267558528428e-05, + "loss": 1.1453, + "step": 178 + }, + { + "epoch": 0.11226089683286297, + "grad_norm": 0.5793723464012146, + "learning_rate": 4.0468227424749165e-05, + "loss": 1.1364, + "step": 179 + }, + { + "epoch": 0.11288805268109126, + "grad_norm": 0.48804497718811035, + "learning_rate": 4.0133779264214046e-05, + "loss": 1.0968, + "step": 180 + }, + { + "epoch": 0.11351520852931954, + "grad_norm": 0.46328264474868774, + "learning_rate": 3.9799331103678935e-05, + "loss": 1.0965, + "step": 181 + }, + { + "epoch": 0.11414236437754782, + "grad_norm": 0.4533807039260864, + "learning_rate": 3.946488294314382e-05, + "loss": 1.0968, + "step": 182 + }, + { + "epoch": 0.1147695202257761, + "grad_norm": 0.4990158975124359, + "learning_rate": 3.91304347826087e-05, + "loss": 1.133, + "step": 183 + }, + { + "epoch": 0.11539667607400439, + "grad_norm": 0.5183166265487671, + "learning_rate": 3.879598662207358e-05, + "loss": 1.1555, + "step": 184 + }, + { + "epoch": 0.11602383192223267, + "grad_norm": 0.5148091912269592, + "learning_rate": 3.846153846153846e-05, + "loss": 1.0681, + "step": 185 + }, + { + "epoch": 0.11665098777046096, + "grad_norm": 0.4919118285179138, + "learning_rate": 3.812709030100335e-05, + "loss": 1.184, + "step": 186 + }, + { + "epoch": 0.11727814361868924, + "grad_norm": 0.5206012725830078, + "learning_rate": 3.7792642140468233e-05, + "loss": 1.1401, + "step": 187 + }, + { + "epoch": 0.11790529946691752, + "grad_norm": 0.4834771156311035, + "learning_rate": 3.745819397993311e-05, + "loss": 1.088, + "step": 188 + }, + { + "epoch": 0.11853245531514581, + "grad_norm": 0.48845043778419495, + "learning_rate": 3.712374581939799e-05, + "loss": 1.1958, + "step": 189 + }, + { + "epoch": 0.1191596111633741, + "grad_norm": 0.49845704436302185, + "learning_rate": 3.678929765886287e-05, + "loss": 1.1188, + "step": 190 + }, + { + "epoch": 0.11978676701160239, + "grad_norm": 0.4738442003726959, + "learning_rate": 3.645484949832776e-05, + "loss": 1.153, + "step": 191 + }, + { + "epoch": 0.12041392285983067, + "grad_norm": 0.4959152042865753, + "learning_rate": 3.612040133779264e-05, + "loss": 1.125, + "step": 192 + }, + { + "epoch": 0.12104107870805896, + "grad_norm": 0.5036417245864868, + "learning_rate": 3.5785953177257525e-05, + "loss": 1.1273, + "step": 193 + }, + { + "epoch": 0.12166823455628724, + "grad_norm": 0.5091941952705383, + "learning_rate": 3.545150501672241e-05, + "loss": 1.0774, + "step": 194 + }, + { + "epoch": 0.12229539040451552, + "grad_norm": 0.4901737570762634, + "learning_rate": 3.511705685618729e-05, + "loss": 1.1179, + "step": 195 + }, + { + "epoch": 0.1229225462527438, + "grad_norm": 0.45219141244888306, + "learning_rate": 3.478260869565218e-05, + "loss": 1.1085, + "step": 196 + }, + { + "epoch": 0.12354970210097209, + "grad_norm": 0.4779893159866333, + "learning_rate": 3.444816053511706e-05, + "loss": 1.093, + "step": 197 + }, + { + "epoch": 0.12417685794920037, + "grad_norm": 0.4967818260192871, + "learning_rate": 3.411371237458194e-05, + "loss": 1.1042, + "step": 198 + }, + { + "epoch": 0.12480401379742866, + "grad_norm": 0.5056282877922058, + "learning_rate": 3.3779264214046823e-05, + "loss": 1.1222, + "step": 199 + }, + { + "epoch": 0.12543116964565695, + "grad_norm": 0.4826442003250122, + "learning_rate": 3.3444816053511705e-05, + "loss": 1.0909, + "step": 200 + }, + { + "epoch": 0.12605832549388524, + "grad_norm": 0.4724636673927307, + "learning_rate": 3.311036789297659e-05, + "loss": 1.0632, + "step": 201 + }, + { + "epoch": 0.12668548134211352, + "grad_norm": 0.48456090688705444, + "learning_rate": 3.2775919732441476e-05, + "loss": 1.1662, + "step": 202 + }, + { + "epoch": 0.1273126371903418, + "grad_norm": 0.43576645851135254, + "learning_rate": 3.244147157190636e-05, + "loss": 1.0106, + "step": 203 + }, + { + "epoch": 0.1279397930385701, + "grad_norm": 0.5297894477844238, + "learning_rate": 3.210702341137124e-05, + "loss": 1.1668, + "step": 204 + }, + { + "epoch": 0.12856694888679837, + "grad_norm": 0.5179861783981323, + "learning_rate": 3.177257525083612e-05, + "loss": 1.0566, + "step": 205 + }, + { + "epoch": 0.12919410473502665, + "grad_norm": 0.46285516023635864, + "learning_rate": 3.1438127090301004e-05, + "loss": 1.1157, + "step": 206 + }, + { + "epoch": 0.12982126058325494, + "grad_norm": 0.511254072189331, + "learning_rate": 3.110367892976589e-05, + "loss": 1.1425, + "step": 207 + }, + { + "epoch": 0.13044841643148322, + "grad_norm": 0.5081919431686401, + "learning_rate": 3.0769230769230774e-05, + "loss": 1.0436, + "step": 208 + }, + { + "epoch": 0.1310755722797115, + "grad_norm": 0.4743541479110718, + "learning_rate": 3.0434782608695656e-05, + "loss": 1.0811, + "step": 209 + }, + { + "epoch": 0.1317027281279398, + "grad_norm": 0.5982806086540222, + "learning_rate": 3.0100334448160538e-05, + "loss": 1.1674, + "step": 210 + }, + { + "epoch": 0.13232988397616807, + "grad_norm": 0.47165796160697937, + "learning_rate": 2.9765886287625424e-05, + "loss": 1.1274, + "step": 211 + }, + { + "epoch": 0.13295703982439636, + "grad_norm": 0.5538997054100037, + "learning_rate": 2.9431438127090305e-05, + "loss": 1.1679, + "step": 212 + }, + { + "epoch": 0.13358419567262464, + "grad_norm": 0.5390966534614563, + "learning_rate": 2.9096989966555184e-05, + "loss": 1.1613, + "step": 213 + }, + { + "epoch": 0.13421135152085292, + "grad_norm": 0.6614457368850708, + "learning_rate": 2.8762541806020066e-05, + "loss": 1.2044, + "step": 214 + }, + { + "epoch": 0.1348385073690812, + "grad_norm": 0.4833295941352844, + "learning_rate": 2.8428093645484948e-05, + "loss": 1.1787, + "step": 215 + }, + { + "epoch": 0.1354656632173095, + "grad_norm": 0.4769502282142639, + "learning_rate": 2.8093645484949833e-05, + "loss": 1.1303, + "step": 216 + }, + { + "epoch": 0.13609281906553777, + "grad_norm": 0.5252038240432739, + "learning_rate": 2.7759197324414715e-05, + "loss": 1.1373, + "step": 217 + }, + { + "epoch": 0.13671997491376608, + "grad_norm": 0.5152972936630249, + "learning_rate": 2.7424749163879597e-05, + "loss": 1.1386, + "step": 218 + }, + { + "epoch": 0.13734713076199437, + "grad_norm": 0.45123931765556335, + "learning_rate": 2.7090301003344482e-05, + "loss": 1.0077, + "step": 219 + }, + { + "epoch": 0.13797428661022265, + "grad_norm": 0.45800599455833435, + "learning_rate": 2.6755852842809364e-05, + "loss": 1.1186, + "step": 220 + }, + { + "epoch": 0.13860144245845094, + "grad_norm": 0.4793985188007355, + "learning_rate": 2.6421404682274246e-05, + "loss": 1.1138, + "step": 221 + }, + { + "epoch": 0.13922859830667922, + "grad_norm": 0.5087202787399292, + "learning_rate": 2.608695652173913e-05, + "loss": 1.0913, + "step": 222 + }, + { + "epoch": 0.1398557541549075, + "grad_norm": 0.519314169883728, + "learning_rate": 2.5752508361204013e-05, + "loss": 1.1132, + "step": 223 + }, + { + "epoch": 0.14048291000313579, + "grad_norm": 0.4722985625267029, + "learning_rate": 2.54180602006689e-05, + "loss": 1.1185, + "step": 224 + }, + { + "epoch": 0.14111006585136407, + "grad_norm": 0.5009178519248962, + "learning_rate": 2.508361204013378e-05, + "loss": 1.1063, + "step": 225 + }, + { + "epoch": 0.14173722169959235, + "grad_norm": 0.5109993815422058, + "learning_rate": 2.4749163879598663e-05, + "loss": 1.1522, + "step": 226 + }, + { + "epoch": 0.14236437754782064, + "grad_norm": 0.46906983852386475, + "learning_rate": 2.4414715719063548e-05, + "loss": 1.0815, + "step": 227 + }, + { + "epoch": 0.14299153339604892, + "grad_norm": 0.48216167092323303, + "learning_rate": 2.408026755852843e-05, + "loss": 1.1103, + "step": 228 + }, + { + "epoch": 0.1436186892442772, + "grad_norm": 0.4617651402950287, + "learning_rate": 2.3745819397993312e-05, + "loss": 1.0704, + "step": 229 + }, + { + "epoch": 0.1442458450925055, + "grad_norm": 0.5504018664360046, + "learning_rate": 2.3411371237458197e-05, + "loss": 1.1774, + "step": 230 + }, + { + "epoch": 0.14487300094073377, + "grad_norm": 0.48636704683303833, + "learning_rate": 2.307692307692308e-05, + "loss": 1.1166, + "step": 231 + }, + { + "epoch": 0.14550015678896205, + "grad_norm": 0.46996790170669556, + "learning_rate": 2.274247491638796e-05, + "loss": 1.1043, + "step": 232 + }, + { + "epoch": 0.14612731263719034, + "grad_norm": 0.4818289577960968, + "learning_rate": 2.2408026755852843e-05, + "loss": 1.0713, + "step": 233 + }, + { + "epoch": 0.14675446848541862, + "grad_norm": 0.48282650113105774, + "learning_rate": 2.2073578595317725e-05, + "loss": 1.0679, + "step": 234 + }, + { + "epoch": 0.1473816243336469, + "grad_norm": 0.4748537242412567, + "learning_rate": 2.173913043478261e-05, + "loss": 1.0467, + "step": 235 + }, + { + "epoch": 0.1480087801818752, + "grad_norm": 0.4510209560394287, + "learning_rate": 2.1404682274247492e-05, + "loss": 1.0525, + "step": 236 + }, + { + "epoch": 0.14863593603010347, + "grad_norm": 0.5060710310935974, + "learning_rate": 2.1070234113712374e-05, + "loss": 1.1351, + "step": 237 + }, + { + "epoch": 0.14926309187833176, + "grad_norm": 0.5170108675956726, + "learning_rate": 2.073578595317726e-05, + "loss": 1.1409, + "step": 238 + }, + { + "epoch": 0.14989024772656004, + "grad_norm": 0.5064777731895447, + "learning_rate": 2.040133779264214e-05, + "loss": 1.0937, + "step": 239 + }, + { + "epoch": 0.15051740357478832, + "grad_norm": 0.5006868243217468, + "learning_rate": 2.0066889632107023e-05, + "loss": 1.0439, + "step": 240 + }, + { + "epoch": 0.15114455942301663, + "grad_norm": 0.44411641359329224, + "learning_rate": 1.973244147157191e-05, + "loss": 0.9983, + "step": 241 + }, + { + "epoch": 0.15177171527124492, + "grad_norm": 0.4972614645957947, + "learning_rate": 1.939799331103679e-05, + "loss": 1.1376, + "step": 242 + }, + { + "epoch": 0.1523988711194732, + "grad_norm": 0.5033297538757324, + "learning_rate": 1.9063545150501676e-05, + "loss": 1.1652, + "step": 243 + }, + { + "epoch": 0.15302602696770148, + "grad_norm": 0.4621533751487732, + "learning_rate": 1.8729096989966554e-05, + "loss": 1.0376, + "step": 244 + }, + { + "epoch": 0.15365318281592977, + "grad_norm": 0.4779740273952484, + "learning_rate": 1.8394648829431436e-05, + "loss": 1.0468, + "step": 245 + }, + { + "epoch": 0.15428033866415805, + "grad_norm": 0.4619259536266327, + "learning_rate": 1.806020066889632e-05, + "loss": 1.0634, + "step": 246 + }, + { + "epoch": 0.15490749451238633, + "grad_norm": 0.5598598718643188, + "learning_rate": 1.7725752508361204e-05, + "loss": 1.2436, + "step": 247 + }, + { + "epoch": 0.15553465036061462, + "grad_norm": 0.4703880250453949, + "learning_rate": 1.739130434782609e-05, + "loss": 1.1322, + "step": 248 + }, + { + "epoch": 0.1561618062088429, + "grad_norm": 0.4648728668689728, + "learning_rate": 1.705685618729097e-05, + "loss": 1.0827, + "step": 249 + }, + { + "epoch": 0.15678896205707119, + "grad_norm": 0.5145319700241089, + "learning_rate": 1.6722408026755853e-05, + "loss": 1.1761, + "step": 250 + }, + { + "epoch": 0.15741611790529947, + "grad_norm": 0.5132664442062378, + "learning_rate": 1.6387959866220738e-05, + "loss": 1.0624, + "step": 251 + }, + { + "epoch": 0.15804327375352775, + "grad_norm": 0.4508794844150543, + "learning_rate": 1.605351170568562e-05, + "loss": 1.0546, + "step": 252 + }, + { + "epoch": 0.15867042960175604, + "grad_norm": 0.4499465823173523, + "learning_rate": 1.5719063545150502e-05, + "loss": 1.0157, + "step": 253 + }, + { + "epoch": 0.15929758544998432, + "grad_norm": 0.458778977394104, + "learning_rate": 1.5384615384615387e-05, + "loss": 1.0634, + "step": 254 + }, + { + "epoch": 0.1599247412982126, + "grad_norm": 0.5598222017288208, + "learning_rate": 1.5050167224080269e-05, + "loss": 1.1146, + "step": 255 + }, + { + "epoch": 0.1605518971464409, + "grad_norm": 0.5084378123283386, + "learning_rate": 1.4715719063545153e-05, + "loss": 1.075, + "step": 256 + }, + { + "epoch": 0.16117905299466917, + "grad_norm": 0.4847070276737213, + "learning_rate": 1.4381270903010033e-05, + "loss": 1.0363, + "step": 257 + }, + { + "epoch": 0.16180620884289745, + "grad_norm": 0.4656808376312256, + "learning_rate": 1.4046822742474917e-05, + "loss": 1.0491, + "step": 258 + }, + { + "epoch": 0.16243336469112574, + "grad_norm": 0.5068191289901733, + "learning_rate": 1.3712374581939799e-05, + "loss": 1.0642, + "step": 259 + }, + { + "epoch": 0.16306052053935402, + "grad_norm": 0.48283588886260986, + "learning_rate": 1.3377926421404682e-05, + "loss": 1.1085, + "step": 260 + }, + { + "epoch": 0.1636876763875823, + "grad_norm": 0.5512163043022156, + "learning_rate": 1.3043478260869566e-05, + "loss": 1.1055, + "step": 261 + }, + { + "epoch": 0.1643148322358106, + "grad_norm": 0.5653620362281799, + "learning_rate": 1.270903010033445e-05, + "loss": 1.0846, + "step": 262 + }, + { + "epoch": 0.16494198808403887, + "grad_norm": 0.48929548263549805, + "learning_rate": 1.2374581939799331e-05, + "loss": 1.0273, + "step": 263 + }, + { + "epoch": 0.16556914393226718, + "grad_norm": 0.5095831155776978, + "learning_rate": 1.2040133779264215e-05, + "loss": 1.1798, + "step": 264 + }, + { + "epoch": 0.16619629978049547, + "grad_norm": 0.5254250168800354, + "learning_rate": 1.1705685618729099e-05, + "loss": 1.0542, + "step": 265 + }, + { + "epoch": 0.16682345562872375, + "grad_norm": 0.505216658115387, + "learning_rate": 1.137123745819398e-05, + "loss": 1.1344, + "step": 266 + }, + { + "epoch": 0.16745061147695203, + "grad_norm": 0.5194329023361206, + "learning_rate": 1.1036789297658862e-05, + "loss": 1.0841, + "step": 267 + }, + { + "epoch": 0.16807776732518032, + "grad_norm": 0.5093269944190979, + "learning_rate": 1.0702341137123746e-05, + "loss": 1.1222, + "step": 268 + }, + { + "epoch": 0.1687049231734086, + "grad_norm": 0.5413921475410461, + "learning_rate": 1.036789297658863e-05, + "loss": 1.063, + "step": 269 + }, + { + "epoch": 0.16933207902163688, + "grad_norm": 0.49521705508232117, + "learning_rate": 1.0033444816053512e-05, + "loss": 1.0666, + "step": 270 + }, + { + "epoch": 0.16995923486986517, + "grad_norm": 0.479844868183136, + "learning_rate": 9.698996655518395e-06, + "loss": 1.0231, + "step": 271 + }, + { + "epoch": 0.17058639071809345, + "grad_norm": 0.5199151635169983, + "learning_rate": 9.364548494983277e-06, + "loss": 1.0276, + "step": 272 + }, + { + "epoch": 0.17121354656632173, + "grad_norm": 0.48625025153160095, + "learning_rate": 9.03010033444816e-06, + "loss": 1.0848, + "step": 273 + }, + { + "epoch": 0.17184070241455002, + "grad_norm": 0.4806617498397827, + "learning_rate": 8.695652173913044e-06, + "loss": 1.1002, + "step": 274 + }, + { + "epoch": 0.1724678582627783, + "grad_norm": 0.4978601336479187, + "learning_rate": 8.361204013377926e-06, + "loss": 1.1587, + "step": 275 + }, + { + "epoch": 0.17309501411100658, + "grad_norm": 0.4522283375263214, + "learning_rate": 8.02675585284281e-06, + "loss": 1.079, + "step": 276 + }, + { + "epoch": 0.17372216995923487, + "grad_norm": 0.5176008939743042, + "learning_rate": 7.692307692307694e-06, + "loss": 1.137, + "step": 277 + }, + { + "epoch": 0.17434932580746315, + "grad_norm": 0.5522940158843994, + "learning_rate": 7.357859531772576e-06, + "loss": 1.0736, + "step": 278 + }, + { + "epoch": 0.17497648165569143, + "grad_norm": 0.5581282377243042, + "learning_rate": 7.023411371237458e-06, + "loss": 1.1654, + "step": 279 + }, + { + "epoch": 0.17560363750391972, + "grad_norm": 0.48599082231521606, + "learning_rate": 6.688963210702341e-06, + "loss": 1.103, + "step": 280 + }, + { + "epoch": 0.176230793352148, + "grad_norm": 0.43440407514572144, + "learning_rate": 6.354515050167225e-06, + "loss": 0.9973, + "step": 281 + }, + { + "epoch": 0.17685794920037629, + "grad_norm": 0.5466330051422119, + "learning_rate": 6.0200668896321075e-06, + "loss": 1.0821, + "step": 282 + }, + { + "epoch": 0.17748510504860457, + "grad_norm": 0.5091434717178345, + "learning_rate": 5.68561872909699e-06, + "loss": 1.0652, + "step": 283 + }, + { + "epoch": 0.17811226089683285, + "grad_norm": 0.5003758072853088, + "learning_rate": 5.351170568561873e-06, + "loss": 1.0895, + "step": 284 + }, + { + "epoch": 0.17873941674506114, + "grad_norm": 0.47549617290496826, + "learning_rate": 5.016722408026756e-06, + "loss": 1.1288, + "step": 285 + }, + { + "epoch": 0.17936657259328942, + "grad_norm": 0.6036182641983032, + "learning_rate": 4.682274247491639e-06, + "loss": 1.1673, + "step": 286 + }, + { + "epoch": 0.17999372844151773, + "grad_norm": 0.4893932044506073, + "learning_rate": 4.347826086956522e-06, + "loss": 1.1532, + "step": 287 + }, + { + "epoch": 0.18062088428974601, + "grad_norm": 0.5435315370559692, + "learning_rate": 4.013377926421405e-06, + "loss": 1.0154, + "step": 288 + }, + { + "epoch": 0.1812480401379743, + "grad_norm": 0.4814181625843048, + "learning_rate": 3.678929765886288e-06, + "loss": 1.0401, + "step": 289 + }, + { + "epoch": 0.18187519598620258, + "grad_norm": 0.4699884057044983, + "learning_rate": 3.3444816053511705e-06, + "loss": 1.0851, + "step": 290 + }, + { + "epoch": 0.18250235183443086, + "grad_norm": 0.5204551815986633, + "learning_rate": 3.0100334448160537e-06, + "loss": 1.0937, + "step": 291 + }, + { + "epoch": 0.18312950768265915, + "grad_norm": 0.506284773349762, + "learning_rate": 2.6755852842809365e-06, + "loss": 1.1464, + "step": 292 + }, + { + "epoch": 0.18375666353088743, + "grad_norm": 0.4346812963485718, + "learning_rate": 2.3411371237458193e-06, + "loss": 1.054, + "step": 293 + }, + { + "epoch": 0.18438381937911572, + "grad_norm": 0.5776488780975342, + "learning_rate": 2.0066889632107025e-06, + "loss": 1.1026, + "step": 294 + }, + { + "epoch": 0.185010975227344, + "grad_norm": 0.49304401874542236, + "learning_rate": 1.6722408026755853e-06, + "loss": 1.0577, + "step": 295 + }, + { + "epoch": 0.18563813107557228, + "grad_norm": 0.5319218039512634, + "learning_rate": 1.3377926421404683e-06, + "loss": 1.0623, + "step": 296 + }, + { + "epoch": 0.18626528692380057, + "grad_norm": 0.45602694153785706, + "learning_rate": 1.0033444816053512e-06, + "loss": 1.0911, + "step": 297 + }, + { + "epoch": 0.18689244277202885, + "grad_norm": 0.5336869955062866, + "learning_rate": 6.688963210702341e-07, + "loss": 1.111, + "step": 298 + }, + { + "epoch": 0.18751959862025713, + "grad_norm": 0.4785906970500946, + "learning_rate": 3.3444816053511706e-07, + "loss": 1.0679, + "step": 299 + }, + { + "epoch": 0.18814675446848542, + "grad_norm": 0.5171682834625244, + "learning_rate": 0.0, + "loss": 1.1548, + "step": 300 + } + ], + "logging_steps": 1, + "max_steps": 300, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 3.181837914803405e+16, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/llama_English_Vietnamese/checkpoint-300/training_args.bin b/llama_English_Vietnamese/checkpoint-300/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..984a84386c6ae4cc8edda29a7b99c16abc5e8bcc --- /dev/null +++ b/llama_English_Vietnamese/checkpoint-300/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:747a7c15dfe44f00ef01aa5460a06c96d8853464a473d8b5701d974e74dd0f9c +size 5624 diff --git a/llama_French_Chinese/checkpoint-200/README.md b/llama_French_Chinese/checkpoint-200/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ec91fdd050e2a5613ef04bb6c8fc77a63b8b10a1 --- /dev/null +++ b/llama_French_Chinese/checkpoint-200/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.14.0 \ No newline at end of file diff --git a/llama_French_Chinese/checkpoint-200/adapter_config.json b/llama_French_Chinese/checkpoint-200/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..535d7d0a481d711e4571545184475f2cbe4f4225 --- /dev/null +++ b/llama_French_Chinese/checkpoint-200/adapter_config.json @@ -0,0 +1,37 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit", + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "q_proj", + "down_proj", + "up_proj", + "gate_proj", + "o_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/llama_French_Chinese/checkpoint-200/adapter_model.safetensors b/llama_French_Chinese/checkpoint-200/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..875274676b4e827fac2b59d6b58d8b5ec1d12982 --- /dev/null +++ b/llama_French_Chinese/checkpoint-200/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c640c506d4af8c0a22a446df138e24297b1023f48d39908ce4280a59c65950d0 +size 167832240 diff --git a/llama_French_Chinese/checkpoint-200/optimizer.pt b/llama_French_Chinese/checkpoint-200/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..5c14229c87a2801b2c4acddcd43636b8de8c0341 --- /dev/null +++ b/llama_French_Chinese/checkpoint-200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af277d207f16dc0ff8b482440fe209ed825b50e026c8e3747e82a5adc2130138 +size 85723284 diff --git a/llama_French_Chinese/checkpoint-200/rng_state.pth b/llama_French_Chinese/checkpoint-200/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..f21f2ff1c1a816463781d51760f8156e041f5979 --- /dev/null +++ b/llama_French_Chinese/checkpoint-200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4981123ff3cf7bd5b7f76839e90e4776f747ca4c38dcb41876fa010c0dea8b23 +size 14244 diff --git a/llama_French_Chinese/checkpoint-200/scheduler.pt b/llama_French_Chinese/checkpoint-200/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6ad11d6d5288d2841ba96a6d7d3838c80c80097a --- /dev/null +++ b/llama_French_Chinese/checkpoint-200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96 +size 1064 diff --git a/llama_French_Chinese/checkpoint-200/special_tokens_map.json b/llama_French_Chinese/checkpoint-200/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/llama_French_Chinese/checkpoint-200/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/llama_French_Chinese/checkpoint-200/tokenizer.json b/llama_French_Chinese/checkpoint-200/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/llama_French_Chinese/checkpoint-200/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/llama_French_Chinese/checkpoint-200/tokenizer_config.json b/llama_French_Chinese/checkpoint-200/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..447bd30f21e8c20e9e6497d6cfc650a3881e26a6 --- /dev/null +++ b/llama_French_Chinese/checkpoint-200/tokenizer_config.json @@ -0,0 +1,2064 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/llama_French_Chinese/checkpoint-200/trainer_state.json b/llama_French_Chinese/checkpoint-200/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ddc633b1bf6931588be72e2c3db2e02a371e592f --- /dev/null +++ b/llama_French_Chinese/checkpoint-200/trainer_state.json @@ -0,0 +1,1433 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.2792022792022792, + "eval_steps": 500, + "global_step": 200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.011396011396011397, + "grad_norm": 0.9871039986610413, + "learning_rate": 0.0001, + "loss": 2.6668, + "step": 1 + }, + { + "epoch": 0.022792022792022793, + "grad_norm": 0.9231429100036621, + "learning_rate": 9.949748743718594e-05, + "loss": 2.505, + "step": 2 + }, + { + "epoch": 0.03418803418803419, + "grad_norm": 0.8871465921401978, + "learning_rate": 9.899497487437186e-05, + "loss": 2.5749, + "step": 3 + }, + { + "epoch": 0.045584045584045586, + "grad_norm": 0.9523301124572754, + "learning_rate": 9.84924623115578e-05, + "loss": 2.3674, + "step": 4 + }, + { + "epoch": 0.05698005698005698, + "grad_norm": 0.9899984002113342, + "learning_rate": 9.798994974874372e-05, + "loss": 2.2957, + "step": 5 + }, + { + "epoch": 0.06837606837606838, + "grad_norm": 1.0576900243759155, + "learning_rate": 9.748743718592965e-05, + "loss": 2.181, + "step": 6 + }, + { + "epoch": 0.07977207977207977, + "grad_norm": 0.9251910448074341, + "learning_rate": 9.698492462311559e-05, + "loss": 1.9938, + "step": 7 + }, + { + "epoch": 0.09116809116809117, + "grad_norm": 0.9626227617263794, + "learning_rate": 9.64824120603015e-05, + "loss": 1.9808, + "step": 8 + }, + { + "epoch": 0.10256410256410256, + "grad_norm": 0.8051118850708008, + "learning_rate": 9.597989949748745e-05, + "loss": 1.7974, + "step": 9 + }, + { + "epoch": 0.11396011396011396, + "grad_norm": 0.77226722240448, + "learning_rate": 9.547738693467337e-05, + "loss": 1.6734, + "step": 10 + }, + { + "epoch": 0.12535612535612536, + "grad_norm": 0.8098772764205933, + "learning_rate": 9.49748743718593e-05, + "loss": 1.6016, + "step": 11 + }, + { + "epoch": 0.13675213675213677, + "grad_norm": 0.8129666447639465, + "learning_rate": 9.447236180904523e-05, + "loss": 1.6037, + "step": 12 + }, + { + "epoch": 0.14814814814814814, + "grad_norm": 0.7679610848426819, + "learning_rate": 9.396984924623115e-05, + "loss": 1.5289, + "step": 13 + }, + { + "epoch": 0.15954415954415954, + "grad_norm": 0.6708409786224365, + "learning_rate": 9.34673366834171e-05, + "loss": 1.5366, + "step": 14 + }, + { + "epoch": 0.17094017094017094, + "grad_norm": 0.5417850613594055, + "learning_rate": 9.296482412060302e-05, + "loss": 1.4358, + "step": 15 + }, + { + "epoch": 0.18233618233618235, + "grad_norm": 0.6034852266311646, + "learning_rate": 9.246231155778895e-05, + "loss": 1.4843, + "step": 16 + }, + { + "epoch": 0.19373219373219372, + "grad_norm": 0.5972656011581421, + "learning_rate": 9.195979899497488e-05, + "loss": 1.5441, + "step": 17 + }, + { + "epoch": 0.20512820512820512, + "grad_norm": 0.7616991996765137, + "learning_rate": 9.14572864321608e-05, + "loss": 1.4856, + "step": 18 + }, + { + "epoch": 0.21652421652421652, + "grad_norm": 0.5629099011421204, + "learning_rate": 9.095477386934675e-05, + "loss": 1.4172, + "step": 19 + }, + { + "epoch": 0.22792022792022792, + "grad_norm": 0.5825836658477783, + "learning_rate": 9.045226130653267e-05, + "loss": 1.426, + "step": 20 + }, + { + "epoch": 0.23931623931623933, + "grad_norm": 0.565010130405426, + "learning_rate": 8.99497487437186e-05, + "loss": 1.3778, + "step": 21 + }, + { + "epoch": 0.25071225071225073, + "grad_norm": 0.556542158126831, + "learning_rate": 8.944723618090453e-05, + "loss": 1.4464, + "step": 22 + }, + { + "epoch": 0.2621082621082621, + "grad_norm": 0.584949254989624, + "learning_rate": 8.894472361809045e-05, + "loss": 1.3704, + "step": 23 + }, + { + "epoch": 0.27350427350427353, + "grad_norm": 0.6332094073295593, + "learning_rate": 8.84422110552764e-05, + "loss": 1.3247, + "step": 24 + }, + { + "epoch": 0.2849002849002849, + "grad_norm": 0.6917906403541565, + "learning_rate": 8.793969849246232e-05, + "loss": 1.4498, + "step": 25 + }, + { + "epoch": 0.2962962962962963, + "grad_norm": 0.5642353296279907, + "learning_rate": 8.743718592964825e-05, + "loss": 1.3229, + "step": 26 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 0.6337741017341614, + "learning_rate": 8.693467336683418e-05, + "loss": 1.3227, + "step": 27 + }, + { + "epoch": 0.3190883190883191, + "grad_norm": 0.7496374249458313, + "learning_rate": 8.64321608040201e-05, + "loss": 1.353, + "step": 28 + }, + { + "epoch": 0.33048433048433046, + "grad_norm": 0.6813539862632751, + "learning_rate": 8.592964824120603e-05, + "loss": 1.428, + "step": 29 + }, + { + "epoch": 0.3418803418803419, + "grad_norm": 0.6388533711433411, + "learning_rate": 8.542713567839196e-05, + "loss": 1.385, + "step": 30 + }, + { + "epoch": 0.35327635327635326, + "grad_norm": 0.7283644080162048, + "learning_rate": 8.49246231155779e-05, + "loss": 1.354, + "step": 31 + }, + { + "epoch": 0.3646723646723647, + "grad_norm": 0.7372009754180908, + "learning_rate": 8.442211055276383e-05, + "loss": 1.2992, + "step": 32 + }, + { + "epoch": 0.37606837606837606, + "grad_norm": 0.7183334827423096, + "learning_rate": 8.391959798994975e-05, + "loss": 1.3532, + "step": 33 + }, + { + "epoch": 0.38746438746438744, + "grad_norm": 0.7960866093635559, + "learning_rate": 8.341708542713568e-05, + "loss": 1.2791, + "step": 34 + }, + { + "epoch": 0.39886039886039887, + "grad_norm": 0.8270393013954163, + "learning_rate": 8.291457286432161e-05, + "loss": 1.2487, + "step": 35 + }, + { + "epoch": 0.41025641025641024, + "grad_norm": 0.8280441761016846, + "learning_rate": 8.241206030150754e-05, + "loss": 1.342, + "step": 36 + }, + { + "epoch": 0.42165242165242167, + "grad_norm": 0.8388327360153198, + "learning_rate": 8.190954773869348e-05, + "loss": 1.3211, + "step": 37 + }, + { + "epoch": 0.43304843304843305, + "grad_norm": 0.806606650352478, + "learning_rate": 8.14070351758794e-05, + "loss": 1.2908, + "step": 38 + }, + { + "epoch": 0.4444444444444444, + "grad_norm": 0.892876386642456, + "learning_rate": 8.090452261306533e-05, + "loss": 1.2993, + "step": 39 + }, + { + "epoch": 0.45584045584045585, + "grad_norm": 0.8041790127754211, + "learning_rate": 8.040201005025126e-05, + "loss": 1.3272, + "step": 40 + }, + { + "epoch": 0.4672364672364672, + "grad_norm": 0.8192927837371826, + "learning_rate": 7.989949748743719e-05, + "loss": 1.248, + "step": 41 + }, + { + "epoch": 0.47863247863247865, + "grad_norm": 0.7512325048446655, + "learning_rate": 7.939698492462313e-05, + "loss": 1.2338, + "step": 42 + }, + { + "epoch": 0.49002849002849, + "grad_norm": 0.6297069787979126, + "learning_rate": 7.889447236180904e-05, + "loss": 1.2802, + "step": 43 + }, + { + "epoch": 0.5014245014245015, + "grad_norm": 0.6053838133811951, + "learning_rate": 7.839195979899498e-05, + "loss": 1.2129, + "step": 44 + }, + { + "epoch": 0.5128205128205128, + "grad_norm": 0.5925951600074768, + "learning_rate": 7.788944723618091e-05, + "loss": 1.2667, + "step": 45 + }, + { + "epoch": 0.5242165242165242, + "grad_norm": 0.5782240629196167, + "learning_rate": 7.738693467336684e-05, + "loss": 1.2517, + "step": 46 + }, + { + "epoch": 0.5356125356125356, + "grad_norm": 0.5985696315765381, + "learning_rate": 7.688442211055277e-05, + "loss": 1.3541, + "step": 47 + }, + { + "epoch": 0.5470085470085471, + "grad_norm": 0.5704004764556885, + "learning_rate": 7.638190954773869e-05, + "loss": 1.373, + "step": 48 + }, + { + "epoch": 0.5584045584045584, + "grad_norm": 0.5727671384811401, + "learning_rate": 7.587939698492463e-05, + "loss": 1.3171, + "step": 49 + }, + { + "epoch": 0.5698005698005698, + "grad_norm": 0.6639573574066162, + "learning_rate": 7.537688442211056e-05, + "loss": 1.2673, + "step": 50 + }, + { + "epoch": 0.5811965811965812, + "grad_norm": 0.5043905973434448, + "learning_rate": 7.487437185929649e-05, + "loss": 1.2857, + "step": 51 + }, + { + "epoch": 0.5925925925925926, + "grad_norm": 0.5526666641235352, + "learning_rate": 7.437185929648241e-05, + "loss": 1.2303, + "step": 52 + }, + { + "epoch": 0.603988603988604, + "grad_norm": 0.5974281430244446, + "learning_rate": 7.386934673366834e-05, + "loss": 1.2534, + "step": 53 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 0.5859419703483582, + "learning_rate": 7.336683417085427e-05, + "loss": 1.2561, + "step": 54 + }, + { + "epoch": 0.6267806267806267, + "grad_norm": 0.5765503644943237, + "learning_rate": 7.28643216080402e-05, + "loss": 1.3016, + "step": 55 + }, + { + "epoch": 0.6381766381766382, + "grad_norm": 0.5191543698310852, + "learning_rate": 7.236180904522614e-05, + "loss": 1.2568, + "step": 56 + }, + { + "epoch": 0.6495726495726496, + "grad_norm": 0.6059960126876831, + "learning_rate": 7.185929648241206e-05, + "loss": 1.3077, + "step": 57 + }, + { + "epoch": 0.6609686609686609, + "grad_norm": 0.5887176990509033, + "learning_rate": 7.135678391959799e-05, + "loss": 1.2578, + "step": 58 + }, + { + "epoch": 0.6723646723646723, + "grad_norm": 0.6424916982650757, + "learning_rate": 7.085427135678392e-05, + "loss": 1.2132, + "step": 59 + }, + { + "epoch": 0.6837606837606838, + "grad_norm": 0.6358301043510437, + "learning_rate": 7.035175879396985e-05, + "loss": 1.2403, + "step": 60 + }, + { + "epoch": 0.6951566951566952, + "grad_norm": 0.6269975900650024, + "learning_rate": 6.984924623115579e-05, + "loss": 1.2293, + "step": 61 + }, + { + "epoch": 0.7065527065527065, + "grad_norm": 0.5706725716590881, + "learning_rate": 6.93467336683417e-05, + "loss": 1.2993, + "step": 62 + }, + { + "epoch": 0.717948717948718, + "grad_norm": 0.5702869296073914, + "learning_rate": 6.884422110552764e-05, + "loss": 1.1705, + "step": 63 + }, + { + "epoch": 0.7293447293447294, + "grad_norm": 0.5276445150375366, + "learning_rate": 6.834170854271357e-05, + "loss": 1.1537, + "step": 64 + }, + { + "epoch": 0.7407407407407407, + "grad_norm": 0.5852922201156616, + "learning_rate": 6.78391959798995e-05, + "loss": 1.0854, + "step": 65 + }, + { + "epoch": 0.7521367521367521, + "grad_norm": 0.5870815515518188, + "learning_rate": 6.733668341708544e-05, + "loss": 1.2502, + "step": 66 + }, + { + "epoch": 0.7635327635327636, + "grad_norm": 0.5926993489265442, + "learning_rate": 6.683417085427135e-05, + "loss": 1.2716, + "step": 67 + }, + { + "epoch": 0.7749287749287749, + "grad_norm": 0.7422418594360352, + "learning_rate": 6.633165829145729e-05, + "loss": 1.2789, + "step": 68 + }, + { + "epoch": 0.7863247863247863, + "grad_norm": 0.5254485011100769, + "learning_rate": 6.582914572864322e-05, + "loss": 1.1482, + "step": 69 + }, + { + "epoch": 0.7977207977207977, + "grad_norm": 0.5987156629562378, + "learning_rate": 6.532663316582915e-05, + "loss": 1.2925, + "step": 70 + }, + { + "epoch": 0.8091168091168092, + "grad_norm": 0.5969635248184204, + "learning_rate": 6.482412060301508e-05, + "loss": 1.2682, + "step": 71 + }, + { + "epoch": 0.8205128205128205, + "grad_norm": 0.6535382866859436, + "learning_rate": 6.4321608040201e-05, + "loss": 1.2574, + "step": 72 + }, + { + "epoch": 0.8319088319088319, + "grad_norm": 0.6279965043067932, + "learning_rate": 6.381909547738694e-05, + "loss": 1.2619, + "step": 73 + }, + { + "epoch": 0.8433048433048433, + "grad_norm": 0.5674328207969666, + "learning_rate": 6.331658291457287e-05, + "loss": 1.1574, + "step": 74 + }, + { + "epoch": 0.8547008547008547, + "grad_norm": 0.6168116331100464, + "learning_rate": 6.28140703517588e-05, + "loss": 1.2001, + "step": 75 + }, + { + "epoch": 0.8660968660968661, + "grad_norm": 0.6072240471839905, + "learning_rate": 6.231155778894473e-05, + "loss": 1.2604, + "step": 76 + }, + { + "epoch": 0.8774928774928775, + "grad_norm": 0.5809881091117859, + "learning_rate": 6.180904522613065e-05, + "loss": 1.1362, + "step": 77 + }, + { + "epoch": 0.8888888888888888, + "grad_norm": 0.6495585441589355, + "learning_rate": 6.130653266331658e-05, + "loss": 1.202, + "step": 78 + }, + { + "epoch": 0.9002849002849003, + "grad_norm": 0.7464923858642578, + "learning_rate": 6.080402010050251e-05, + "loss": 1.2657, + "step": 79 + }, + { + "epoch": 0.9116809116809117, + "grad_norm": 0.6762282848358154, + "learning_rate": 6.030150753768844e-05, + "loss": 1.2432, + "step": 80 + }, + { + "epoch": 0.9230769230769231, + "grad_norm": 0.6145843267440796, + "learning_rate": 5.979899497487438e-05, + "loss": 1.2786, + "step": 81 + }, + { + "epoch": 0.9344729344729344, + "grad_norm": 0.6269271969795227, + "learning_rate": 5.929648241206031e-05, + "loss": 1.2957, + "step": 82 + }, + { + "epoch": 0.9458689458689459, + "grad_norm": 0.6096307039260864, + "learning_rate": 5.879396984924623e-05, + "loss": 1.3012, + "step": 83 + }, + { + "epoch": 0.9572649572649573, + "grad_norm": 0.6496424078941345, + "learning_rate": 5.829145728643216e-05, + "loss": 1.2216, + "step": 84 + }, + { + "epoch": 0.9686609686609686, + "grad_norm": 0.5756160616874695, + "learning_rate": 5.778894472361809e-05, + "loss": 1.1236, + "step": 85 + }, + { + "epoch": 0.98005698005698, + "grad_norm": 0.6061781644821167, + "learning_rate": 5.728643216080403e-05, + "loss": 1.2202, + "step": 86 + }, + { + "epoch": 0.9914529914529915, + "grad_norm": 0.6368456482887268, + "learning_rate": 5.6783919597989955e-05, + "loss": 1.3051, + "step": 87 + }, + { + "epoch": 1.002849002849003, + "grad_norm": 0.8351167440414429, + "learning_rate": 5.628140703517588e-05, + "loss": 1.4625, + "step": 88 + }, + { + "epoch": 1.0142450142450143, + "grad_norm": 0.6175570487976074, + "learning_rate": 5.577889447236181e-05, + "loss": 1.2589, + "step": 89 + }, + { + "epoch": 1.0256410256410255, + "grad_norm": 0.549256443977356, + "learning_rate": 5.527638190954774e-05, + "loss": 1.1082, + "step": 90 + }, + { + "epoch": 1.037037037037037, + "grad_norm": 0.6634002923965454, + "learning_rate": 5.477386934673368e-05, + "loss": 1.1863, + "step": 91 + }, + { + "epoch": 1.0484330484330484, + "grad_norm": 0.6986566781997681, + "learning_rate": 5.4271356783919604e-05, + "loss": 1.1625, + "step": 92 + }, + { + "epoch": 1.0598290598290598, + "grad_norm": 0.6375746726989746, + "learning_rate": 5.376884422110553e-05, + "loss": 1.1476, + "step": 93 + }, + { + "epoch": 1.0712250712250713, + "grad_norm": 0.6185861229896545, + "learning_rate": 5.3266331658291455e-05, + "loss": 1.0827, + "step": 94 + }, + { + "epoch": 1.0826210826210827, + "grad_norm": 0.5859505534172058, + "learning_rate": 5.276381909547739e-05, + "loss": 1.2181, + "step": 95 + }, + { + "epoch": 1.0940170940170941, + "grad_norm": 0.5940748453140259, + "learning_rate": 5.226130653266332e-05, + "loss": 1.1346, + "step": 96 + }, + { + "epoch": 1.1054131054131053, + "grad_norm": 0.6339956521987915, + "learning_rate": 5.175879396984925e-05, + "loss": 1.1623, + "step": 97 + }, + { + "epoch": 1.1168091168091168, + "grad_norm": 0.6005598902702332, + "learning_rate": 5.125628140703518e-05, + "loss": 1.2123, + "step": 98 + }, + { + "epoch": 1.1282051282051282, + "grad_norm": 0.574739396572113, + "learning_rate": 5.0753768844221104e-05, + "loss": 1.1068, + "step": 99 + }, + { + "epoch": 1.1396011396011396, + "grad_norm": 0.6985849738121033, + "learning_rate": 5.0251256281407036e-05, + "loss": 1.2292, + "step": 100 + }, + { + "epoch": 1.150997150997151, + "grad_norm": 0.6487123966217041, + "learning_rate": 4.974874371859297e-05, + "loss": 1.1255, + "step": 101 + }, + { + "epoch": 1.1623931623931625, + "grad_norm": 0.6572225093841553, + "learning_rate": 4.92462311557789e-05, + "loss": 1.2205, + "step": 102 + }, + { + "epoch": 1.173789173789174, + "grad_norm": 0.5901849269866943, + "learning_rate": 4.874371859296483e-05, + "loss": 1.0574, + "step": 103 + }, + { + "epoch": 1.1851851851851851, + "grad_norm": 0.6682485342025757, + "learning_rate": 4.824120603015075e-05, + "loss": 1.1849, + "step": 104 + }, + { + "epoch": 1.1965811965811965, + "grad_norm": 0.6518735885620117, + "learning_rate": 4.7738693467336685e-05, + "loss": 1.1048, + "step": 105 + }, + { + "epoch": 1.207977207977208, + "grad_norm": 0.6796931028366089, + "learning_rate": 4.723618090452262e-05, + "loss": 1.1022, + "step": 106 + }, + { + "epoch": 1.2193732193732194, + "grad_norm": 0.8284789323806763, + "learning_rate": 4.673366834170855e-05, + "loss": 1.216, + "step": 107 + }, + { + "epoch": 1.2307692307692308, + "grad_norm": 0.6832686066627502, + "learning_rate": 4.6231155778894475e-05, + "loss": 1.1083, + "step": 108 + }, + { + "epoch": 1.242165242165242, + "grad_norm": 0.7995088696479797, + "learning_rate": 4.57286432160804e-05, + "loss": 1.2254, + "step": 109 + }, + { + "epoch": 1.2535612535612537, + "grad_norm": 0.773669958114624, + "learning_rate": 4.522613065326633e-05, + "loss": 1.1642, + "step": 110 + }, + { + "epoch": 1.264957264957265, + "grad_norm": 0.6698294878005981, + "learning_rate": 4.4723618090452266e-05, + "loss": 0.9971, + "step": 111 + }, + { + "epoch": 1.2763532763532763, + "grad_norm": 0.7574476003646851, + "learning_rate": 4.42211055276382e-05, + "loss": 1.2919, + "step": 112 + }, + { + "epoch": 1.2877492877492878, + "grad_norm": 0.8053959012031555, + "learning_rate": 4.3718592964824124e-05, + "loss": 1.1633, + "step": 113 + }, + { + "epoch": 1.2991452991452992, + "grad_norm": 0.6861171722412109, + "learning_rate": 4.321608040201005e-05, + "loss": 1.1545, + "step": 114 + }, + { + "epoch": 1.3105413105413106, + "grad_norm": 0.6895201802253723, + "learning_rate": 4.271356783919598e-05, + "loss": 1.1844, + "step": 115 + }, + { + "epoch": 1.3219373219373218, + "grad_norm": 0.6611140370368958, + "learning_rate": 4.2211055276381914e-05, + "loss": 1.1064, + "step": 116 + }, + { + "epoch": 1.3333333333333333, + "grad_norm": 0.7180519700050354, + "learning_rate": 4.170854271356784e-05, + "loss": 1.1405, + "step": 117 + }, + { + "epoch": 1.3447293447293447, + "grad_norm": 0.6216800808906555, + "learning_rate": 4.120603015075377e-05, + "loss": 1.0658, + "step": 118 + }, + { + "epoch": 1.3561253561253561, + "grad_norm": 0.7003360986709595, + "learning_rate": 4.07035175879397e-05, + "loss": 1.219, + "step": 119 + }, + { + "epoch": 1.3675213675213675, + "grad_norm": 0.9064440131187439, + "learning_rate": 4.020100502512563e-05, + "loss": 1.1546, + "step": 120 + }, + { + "epoch": 1.378917378917379, + "grad_norm": 0.7493278980255127, + "learning_rate": 3.969849246231156e-05, + "loss": 1.062, + "step": 121 + }, + { + "epoch": 1.3903133903133904, + "grad_norm": 0.6933432817459106, + "learning_rate": 3.919597989949749e-05, + "loss": 1.1915, + "step": 122 + }, + { + "epoch": 1.4017094017094016, + "grad_norm": 0.6816499829292297, + "learning_rate": 3.869346733668342e-05, + "loss": 1.0731, + "step": 123 + }, + { + "epoch": 1.413105413105413, + "grad_norm": 0.6943508982658386, + "learning_rate": 3.8190954773869346e-05, + "loss": 1.1365, + "step": 124 + }, + { + "epoch": 1.4245014245014245, + "grad_norm": 0.6723116636276245, + "learning_rate": 3.768844221105528e-05, + "loss": 1.192, + "step": 125 + }, + { + "epoch": 1.435897435897436, + "grad_norm": 0.6686513423919678, + "learning_rate": 3.7185929648241204e-05, + "loss": 1.1027, + "step": 126 + }, + { + "epoch": 1.4472934472934473, + "grad_norm": 0.6846156716346741, + "learning_rate": 3.668341708542714e-05, + "loss": 1.1051, + "step": 127 + }, + { + "epoch": 1.4586894586894588, + "grad_norm": 0.8109652996063232, + "learning_rate": 3.618090452261307e-05, + "loss": 1.2156, + "step": 128 + }, + { + "epoch": 1.4700854700854702, + "grad_norm": 0.6826179027557373, + "learning_rate": 3.5678391959798995e-05, + "loss": 1.0751, + "step": 129 + }, + { + "epoch": 1.4814814814814814, + "grad_norm": 0.7359035611152649, + "learning_rate": 3.517587939698493e-05, + "loss": 1.2413, + "step": 130 + }, + { + "epoch": 1.4928774928774928, + "grad_norm": 0.7042872905731201, + "learning_rate": 3.467336683417085e-05, + "loss": 1.1766, + "step": 131 + }, + { + "epoch": 1.5042735042735043, + "grad_norm": 0.7514035105705261, + "learning_rate": 3.4170854271356785e-05, + "loss": 1.2111, + "step": 132 + }, + { + "epoch": 1.5156695156695157, + "grad_norm": 0.7298324108123779, + "learning_rate": 3.366834170854272e-05, + "loss": 1.1239, + "step": 133 + }, + { + "epoch": 1.5270655270655271, + "grad_norm": 0.7031937837600708, + "learning_rate": 3.3165829145728643e-05, + "loss": 1.0637, + "step": 134 + }, + { + "epoch": 1.5384615384615383, + "grad_norm": 0.6622602939605713, + "learning_rate": 3.2663316582914576e-05, + "loss": 1.2246, + "step": 135 + }, + { + "epoch": 1.54985754985755, + "grad_norm": 0.7596352100372314, + "learning_rate": 3.21608040201005e-05, + "loss": 1.0501, + "step": 136 + }, + { + "epoch": 1.5612535612535612, + "grad_norm": 0.7074221968650818, + "learning_rate": 3.1658291457286434e-05, + "loss": 1.0558, + "step": 137 + }, + { + "epoch": 1.5726495726495726, + "grad_norm": 0.6557238101959229, + "learning_rate": 3.1155778894472366e-05, + "loss": 1.0601, + "step": 138 + }, + { + "epoch": 1.584045584045584, + "grad_norm": 0.7118769288063049, + "learning_rate": 3.065326633165829e-05, + "loss": 1.147, + "step": 139 + }, + { + "epoch": 1.5954415954415955, + "grad_norm": 0.704740583896637, + "learning_rate": 3.015075376884422e-05, + "loss": 1.189, + "step": 140 + }, + { + "epoch": 1.606837606837607, + "grad_norm": 0.7223531007766724, + "learning_rate": 2.9648241206030153e-05, + "loss": 1.0355, + "step": 141 + }, + { + "epoch": 1.618233618233618, + "grad_norm": 0.7895699739456177, + "learning_rate": 2.914572864321608e-05, + "loss": 1.2845, + "step": 142 + }, + { + "epoch": 1.6296296296296298, + "grad_norm": 0.7170248031616211, + "learning_rate": 2.8643216080402015e-05, + "loss": 0.9934, + "step": 143 + }, + { + "epoch": 1.641025641025641, + "grad_norm": 0.8193749189376831, + "learning_rate": 2.814070351758794e-05, + "loss": 1.2065, + "step": 144 + }, + { + "epoch": 1.6524216524216524, + "grad_norm": 0.7957361936569214, + "learning_rate": 2.763819095477387e-05, + "loss": 1.1068, + "step": 145 + }, + { + "epoch": 1.6638176638176638, + "grad_norm": 0.6627408266067505, + "learning_rate": 2.7135678391959802e-05, + "loss": 0.9091, + "step": 146 + }, + { + "epoch": 1.6752136752136753, + "grad_norm": 0.7310336828231812, + "learning_rate": 2.6633165829145728e-05, + "loss": 1.1442, + "step": 147 + }, + { + "epoch": 1.6866096866096867, + "grad_norm": 0.7140065431594849, + "learning_rate": 2.613065326633166e-05, + "loss": 1.1883, + "step": 148 + }, + { + "epoch": 1.698005698005698, + "grad_norm": 0.7325850129127502, + "learning_rate": 2.562814070351759e-05, + "loss": 1.0358, + "step": 149 + }, + { + "epoch": 1.7094017094017095, + "grad_norm": 0.776992917060852, + "learning_rate": 2.5125628140703518e-05, + "loss": 1.1951, + "step": 150 + }, + { + "epoch": 1.7207977207977208, + "grad_norm": 0.740685224533081, + "learning_rate": 2.462311557788945e-05, + "loss": 1.0279, + "step": 151 + }, + { + "epoch": 1.7321937321937322, + "grad_norm": 0.7767820954322815, + "learning_rate": 2.4120603015075376e-05, + "loss": 1.2009, + "step": 152 + }, + { + "epoch": 1.7435897435897436, + "grad_norm": 0.7557656764984131, + "learning_rate": 2.361809045226131e-05, + "loss": 1.1054, + "step": 153 + }, + { + "epoch": 1.7549857549857548, + "grad_norm": 0.8359375, + "learning_rate": 2.3115577889447238e-05, + "loss": 1.1682, + "step": 154 + }, + { + "epoch": 1.7663817663817665, + "grad_norm": 0.712339460849762, + "learning_rate": 2.2613065326633167e-05, + "loss": 1.0838, + "step": 155 + }, + { + "epoch": 1.7777777777777777, + "grad_norm": 0.7319413423538208, + "learning_rate": 2.21105527638191e-05, + "loss": 1.1231, + "step": 156 + }, + { + "epoch": 1.7891737891737893, + "grad_norm": 0.8280380368232727, + "learning_rate": 2.1608040201005025e-05, + "loss": 1.2348, + "step": 157 + }, + { + "epoch": 1.8005698005698005, + "grad_norm": 0.7554569244384766, + "learning_rate": 2.1105527638190957e-05, + "loss": 1.102, + "step": 158 + }, + { + "epoch": 1.811965811965812, + "grad_norm": 0.8154715895652771, + "learning_rate": 2.0603015075376886e-05, + "loss": 1.2029, + "step": 159 + }, + { + "epoch": 1.8233618233618234, + "grad_norm": 0.7579538226127625, + "learning_rate": 2.0100502512562815e-05, + "loss": 1.1129, + "step": 160 + }, + { + "epoch": 1.8347578347578346, + "grad_norm": 0.7366315126419067, + "learning_rate": 1.9597989949748744e-05, + "loss": 1.1557, + "step": 161 + }, + { + "epoch": 1.8461538461538463, + "grad_norm": 0.7522907257080078, + "learning_rate": 1.9095477386934673e-05, + "loss": 1.0794, + "step": 162 + }, + { + "epoch": 1.8575498575498575, + "grad_norm": 0.7626732587814331, + "learning_rate": 1.8592964824120602e-05, + "loss": 1.0256, + "step": 163 + }, + { + "epoch": 1.868945868945869, + "grad_norm": 0.7189956307411194, + "learning_rate": 1.8090452261306535e-05, + "loss": 1.1057, + "step": 164 + }, + { + "epoch": 1.8803418803418803, + "grad_norm": 0.7951769828796387, + "learning_rate": 1.7587939698492464e-05, + "loss": 1.107, + "step": 165 + }, + { + "epoch": 1.8917378917378918, + "grad_norm": 0.7904325723648071, + "learning_rate": 1.7085427135678393e-05, + "loss": 1.1165, + "step": 166 + }, + { + "epoch": 1.9031339031339032, + "grad_norm": 0.7615857124328613, + "learning_rate": 1.6582914572864322e-05, + "loss": 1.0682, + "step": 167 + }, + { + "epoch": 1.9145299145299144, + "grad_norm": 0.7807989716529846, + "learning_rate": 1.608040201005025e-05, + "loss": 1.1352, + "step": 168 + }, + { + "epoch": 1.925925925925926, + "grad_norm": 0.7385022640228271, + "learning_rate": 1.5577889447236183e-05, + "loss": 1.0313, + "step": 169 + }, + { + "epoch": 1.9373219373219372, + "grad_norm": 0.7195942401885986, + "learning_rate": 1.507537688442211e-05, + "loss": 1.092, + "step": 170 + }, + { + "epoch": 1.9487179487179487, + "grad_norm": 0.693107545375824, + "learning_rate": 1.457286432160804e-05, + "loss": 1.0655, + "step": 171 + }, + { + "epoch": 1.96011396011396, + "grad_norm": 0.7481961846351624, + "learning_rate": 1.407035175879397e-05, + "loss": 1.2206, + "step": 172 + }, + { + "epoch": 1.9715099715099715, + "grad_norm": 0.736751139163971, + "learning_rate": 1.3567839195979901e-05, + "loss": 1.0628, + "step": 173 + }, + { + "epoch": 1.982905982905983, + "grad_norm": 0.7184632420539856, + "learning_rate": 1.306532663316583e-05, + "loss": 1.0972, + "step": 174 + }, + { + "epoch": 1.9943019943019942, + "grad_norm": 0.9438643455505371, + "learning_rate": 1.2562814070351759e-05, + "loss": 1.3963, + "step": 175 + }, + { + "epoch": 2.005698005698006, + "grad_norm": 0.9274888038635254, + "learning_rate": 1.2060301507537688e-05, + "loss": 1.3134, + "step": 176 + }, + { + "epoch": 2.017094017094017, + "grad_norm": 0.7346077561378479, + "learning_rate": 1.1557788944723619e-05, + "loss": 1.0771, + "step": 177 + }, + { + "epoch": 2.0284900284900287, + "grad_norm": 0.7025390267372131, + "learning_rate": 1.105527638190955e-05, + "loss": 1.088, + "step": 178 + }, + { + "epoch": 2.03988603988604, + "grad_norm": 0.6729663610458374, + "learning_rate": 1.0552763819095479e-05, + "loss": 0.9734, + "step": 179 + }, + { + "epoch": 2.051282051282051, + "grad_norm": 0.7307983040809631, + "learning_rate": 1.0050251256281408e-05, + "loss": 1.1273, + "step": 180 + }, + { + "epoch": 2.0626780626780628, + "grad_norm": 0.7329637408256531, + "learning_rate": 9.547738693467337e-06, + "loss": 1.0891, + "step": 181 + }, + { + "epoch": 2.074074074074074, + "grad_norm": 0.8447920680046082, + "learning_rate": 9.045226130653267e-06, + "loss": 1.0384, + "step": 182 + }, + { + "epoch": 2.0854700854700856, + "grad_norm": 0.6870452761650085, + "learning_rate": 8.542713567839196e-06, + "loss": 1.0756, + "step": 183 + }, + { + "epoch": 2.096866096866097, + "grad_norm": 0.7378408908843994, + "learning_rate": 8.040201005025125e-06, + "loss": 1.0938, + "step": 184 + }, + { + "epoch": 2.1082621082621085, + "grad_norm": 0.6987632513046265, + "learning_rate": 7.537688442211055e-06, + "loss": 1.0897, + "step": 185 + }, + { + "epoch": 2.1196581196581197, + "grad_norm": 0.754769504070282, + "learning_rate": 7.035175879396985e-06, + "loss": 1.068, + "step": 186 + }, + { + "epoch": 2.131054131054131, + "grad_norm": 0.7191391587257385, + "learning_rate": 6.532663316582915e-06, + "loss": 1.1171, + "step": 187 + }, + { + "epoch": 2.1424501424501425, + "grad_norm": 0.6744558811187744, + "learning_rate": 6.030150753768844e-06, + "loss": 1.0239, + "step": 188 + }, + { + "epoch": 2.1538461538461537, + "grad_norm": 0.7478334307670593, + "learning_rate": 5.527638190954775e-06, + "loss": 1.2004, + "step": 189 + }, + { + "epoch": 2.1652421652421654, + "grad_norm": 0.7183063626289368, + "learning_rate": 5.025125628140704e-06, + "loss": 0.9658, + "step": 190 + }, + { + "epoch": 2.1766381766381766, + "grad_norm": 0.7518768906593323, + "learning_rate": 4.522613065326634e-06, + "loss": 1.119, + "step": 191 + }, + { + "epoch": 2.1880341880341883, + "grad_norm": 0.6845582127571106, + "learning_rate": 4.020100502512563e-06, + "loss": 1.0178, + "step": 192 + }, + { + "epoch": 2.1994301994301995, + "grad_norm": 0.7310692071914673, + "learning_rate": 3.5175879396984926e-06, + "loss": 1.0958, + "step": 193 + }, + { + "epoch": 2.2108262108262107, + "grad_norm": 0.6994593739509583, + "learning_rate": 3.015075376884422e-06, + "loss": 1.0336, + "step": 194 + }, + { + "epoch": 2.2222222222222223, + "grad_norm": 0.6808854937553406, + "learning_rate": 2.512562814070352e-06, + "loss": 1.0674, + "step": 195 + }, + { + "epoch": 2.2336182336182335, + "grad_norm": 0.7088428139686584, + "learning_rate": 2.0100502512562813e-06, + "loss": 1.0415, + "step": 196 + }, + { + "epoch": 2.245014245014245, + "grad_norm": 0.7182581424713135, + "learning_rate": 1.507537688442211e-06, + "loss": 1.114, + "step": 197 + }, + { + "epoch": 2.2564102564102564, + "grad_norm": 0.7024622559547424, + "learning_rate": 1.0050251256281407e-06, + "loss": 1.0319, + "step": 198 + }, + { + "epoch": 2.267806267806268, + "grad_norm": 0.642276406288147, + "learning_rate": 5.025125628140703e-07, + "loss": 0.9277, + "step": 199 + }, + { + "epoch": 2.2792022792022792, + "grad_norm": 0.7150776982307434, + "learning_rate": 0.0, + "loss": 1.1776, + "step": 200 + } + ], + "logging_steps": 1, + "max_steps": 200, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 2.8034138177470464e+16, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/llama_French_Chinese/checkpoint-200/training_args.bin b/llama_French_Chinese/checkpoint-200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..6f72a609d65a53d8f0b4def9badb67a4bae18b76 --- /dev/null +++ b/llama_French_Chinese/checkpoint-200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a48e722fed4cdcf3ae5c85a8b6bf146040dd433d380782eb1f2f85c6f010075 +size 5624 diff --git a/llama_French_English/checkpoint-200/README.md b/llama_French_English/checkpoint-200/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ec91fdd050e2a5613ef04bb6c8fc77a63b8b10a1 --- /dev/null +++ b/llama_French_English/checkpoint-200/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.14.0 \ No newline at end of file diff --git a/llama_French_English/checkpoint-200/adapter_config.json b/llama_French_English/checkpoint-200/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..75596259f22ada5c0d2219659b1255235677bd64 --- /dev/null +++ b/llama_French_English/checkpoint-200/adapter_config.json @@ -0,0 +1,37 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit", + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "down_proj", + "up_proj", + "o_proj", + "k_proj", + "gate_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/llama_French_English/checkpoint-200/adapter_model.safetensors b/llama_French_English/checkpoint-200/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4db754eecc38b51a3fe423d328cc0d3c20aa3dac --- /dev/null +++ b/llama_French_English/checkpoint-200/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:950104d3419401716c02f83eeca9b7a8d5e0755529054e51788365d0a501a1f5 +size 167832240 diff --git a/llama_French_English/checkpoint-200/optimizer.pt b/llama_French_English/checkpoint-200/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..1f4d768d0d73a85abd505becaa8ff1fb4b7e1a40 --- /dev/null +++ b/llama_French_English/checkpoint-200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42e3b9deb1538d1fe4e016c5c0801715f1d5fc842ee97492674b59be23727cb2 +size 85723284 diff --git a/llama_French_English/checkpoint-200/rng_state.pth b/llama_French_English/checkpoint-200/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..f21f2ff1c1a816463781d51760f8156e041f5979 --- /dev/null +++ b/llama_French_English/checkpoint-200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4981123ff3cf7bd5b7f76839e90e4776f747ca4c38dcb41876fa010c0dea8b23 +size 14244 diff --git a/llama_French_English/checkpoint-200/scheduler.pt b/llama_French_English/checkpoint-200/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6ad11d6d5288d2841ba96a6d7d3838c80c80097a --- /dev/null +++ b/llama_French_English/checkpoint-200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96 +size 1064 diff --git a/llama_French_English/checkpoint-200/special_tokens_map.json b/llama_French_English/checkpoint-200/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/llama_French_English/checkpoint-200/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/llama_French_English/checkpoint-200/tokenizer.json b/llama_French_English/checkpoint-200/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/llama_French_English/checkpoint-200/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/llama_French_English/checkpoint-200/tokenizer_config.json b/llama_French_English/checkpoint-200/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..447bd30f21e8c20e9e6497d6cfc650a3881e26a6 --- /dev/null +++ b/llama_French_English/checkpoint-200/tokenizer_config.json @@ -0,0 +1,2064 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/llama_French_English/checkpoint-200/trainer_state.json b/llama_French_English/checkpoint-200/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..2cbd44d50aa6e634d96e3c771f987f3fbf50a908 --- /dev/null +++ b/llama_French_English/checkpoint-200/trainer_state.json @@ -0,0 +1,1433 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.2792022792022792, + "eval_steps": 500, + "global_step": 200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.011396011396011397, + "grad_norm": 1.1320723295211792, + "learning_rate": 0.0001, + "loss": 2.6194, + "step": 1 + }, + { + "epoch": 0.022792022792022793, + "grad_norm": 1.074873685836792, + "learning_rate": 9.949748743718594e-05, + "loss": 2.4967, + "step": 2 + }, + { + "epoch": 0.03418803418803419, + "grad_norm": 0.9819158315658569, + "learning_rate": 9.899497487437186e-05, + "loss": 2.5706, + "step": 3 + }, + { + "epoch": 0.045584045584045586, + "grad_norm": 1.0893902778625488, + "learning_rate": 9.84924623115578e-05, + "loss": 2.3442, + "step": 4 + }, + { + "epoch": 0.05698005698005698, + "grad_norm": 1.0800572633743286, + "learning_rate": 9.798994974874372e-05, + "loss": 2.2453, + "step": 5 + }, + { + "epoch": 0.06837606837606838, + "grad_norm": 1.123698353767395, + "learning_rate": 9.748743718592965e-05, + "loss": 2.0481, + "step": 6 + }, + { + "epoch": 0.07977207977207977, + "grad_norm": 1.0124568939208984, + "learning_rate": 9.698492462311559e-05, + "loss": 1.8597, + "step": 7 + }, + { + "epoch": 0.09116809116809117, + "grad_norm": 1.0251628160476685, + "learning_rate": 9.64824120603015e-05, + "loss": 1.8416, + "step": 8 + }, + { + "epoch": 0.10256410256410256, + "grad_norm": 0.9519540667533875, + "learning_rate": 9.597989949748745e-05, + "loss": 1.6856, + "step": 9 + }, + { + "epoch": 0.11396011396011396, + "grad_norm": 0.8699062466621399, + "learning_rate": 9.547738693467337e-05, + "loss": 1.5728, + "step": 10 + }, + { + "epoch": 0.12535612535612536, + "grad_norm": 0.8299568295478821, + "learning_rate": 9.49748743718593e-05, + "loss": 1.4448, + "step": 11 + }, + { + "epoch": 0.13675213675213677, + "grad_norm": 0.8417969346046448, + "learning_rate": 9.447236180904523e-05, + "loss": 1.4682, + "step": 12 + }, + { + "epoch": 0.14814814814814814, + "grad_norm": 0.5581567883491516, + "learning_rate": 9.396984924623115e-05, + "loss": 1.4331, + "step": 13 + }, + { + "epoch": 0.15954415954415954, + "grad_norm": 0.587202787399292, + "learning_rate": 9.34673366834171e-05, + "loss": 1.4543, + "step": 14 + }, + { + "epoch": 0.17094017094017094, + "grad_norm": 0.48191845417022705, + "learning_rate": 9.296482412060302e-05, + "loss": 1.3521, + "step": 15 + }, + { + "epoch": 0.18233618233618235, + "grad_norm": 0.5977433323860168, + "learning_rate": 9.246231155778895e-05, + "loss": 1.4072, + "step": 16 + }, + { + "epoch": 0.19373219373219372, + "grad_norm": 0.5247688889503479, + "learning_rate": 9.195979899497488e-05, + "loss": 1.4521, + "step": 17 + }, + { + "epoch": 0.20512820512820512, + "grad_norm": 0.5417962670326233, + "learning_rate": 9.14572864321608e-05, + "loss": 1.3901, + "step": 18 + }, + { + "epoch": 0.21652421652421652, + "grad_norm": 0.5432206392288208, + "learning_rate": 9.095477386934675e-05, + "loss": 1.343, + "step": 19 + }, + { + "epoch": 0.22792022792022792, + "grad_norm": 0.5314046144485474, + "learning_rate": 9.045226130653267e-05, + "loss": 1.334, + "step": 20 + }, + { + "epoch": 0.23931623931623933, + "grad_norm": 0.5910053849220276, + "learning_rate": 8.99497487437186e-05, + "loss": 1.2363, + "step": 21 + }, + { + "epoch": 0.25071225071225073, + "grad_norm": 0.6016846895217896, + "learning_rate": 8.944723618090453e-05, + "loss": 1.3871, + "step": 22 + }, + { + "epoch": 0.2621082621082621, + "grad_norm": 0.6052981019020081, + "learning_rate": 8.894472361809045e-05, + "loss": 1.3115, + "step": 23 + }, + { + "epoch": 0.27350427350427353, + "grad_norm": 0.6964677572250366, + "learning_rate": 8.84422110552764e-05, + "loss": 1.2591, + "step": 24 + }, + { + "epoch": 0.2849002849002849, + "grad_norm": 0.7068831324577332, + "learning_rate": 8.793969849246232e-05, + "loss": 1.3526, + "step": 25 + }, + { + "epoch": 0.2962962962962963, + "grad_norm": 0.682155191898346, + "learning_rate": 8.743718592964825e-05, + "loss": 1.2832, + "step": 26 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 0.6879757642745972, + "learning_rate": 8.693467336683418e-05, + "loss": 1.2843, + "step": 27 + }, + { + "epoch": 0.3190883190883191, + "grad_norm": 0.7914606332778931, + "learning_rate": 8.64321608040201e-05, + "loss": 1.2874, + "step": 28 + }, + { + "epoch": 0.33048433048433046, + "grad_norm": 0.8195975422859192, + "learning_rate": 8.592964824120603e-05, + "loss": 1.356, + "step": 29 + }, + { + "epoch": 0.3418803418803419, + "grad_norm": 0.7226553559303284, + "learning_rate": 8.542713567839196e-05, + "loss": 1.3485, + "step": 30 + }, + { + "epoch": 0.35327635327635326, + "grad_norm": 0.8337236046791077, + "learning_rate": 8.49246231155779e-05, + "loss": 1.2785, + "step": 31 + }, + { + "epoch": 0.3646723646723647, + "grad_norm": 0.938544511795044, + "learning_rate": 8.442211055276383e-05, + "loss": 1.2715, + "step": 32 + }, + { + "epoch": 0.37606837606837606, + "grad_norm": 0.8477500677108765, + "learning_rate": 8.391959798994975e-05, + "loss": 1.3044, + "step": 33 + }, + { + "epoch": 0.38746438746438744, + "grad_norm": 0.9286187887191772, + "learning_rate": 8.341708542713568e-05, + "loss": 1.2345, + "step": 34 + }, + { + "epoch": 0.39886039886039887, + "grad_norm": 0.9435834884643555, + "learning_rate": 8.291457286432161e-05, + "loss": 1.2018, + "step": 35 + }, + { + "epoch": 0.41025641025641024, + "grad_norm": 0.8870838284492493, + "learning_rate": 8.241206030150754e-05, + "loss": 1.2831, + "step": 36 + }, + { + "epoch": 0.42165242165242167, + "grad_norm": 0.7902332544326782, + "learning_rate": 8.190954773869348e-05, + "loss": 1.2597, + "step": 37 + }, + { + "epoch": 0.43304843304843305, + "grad_norm": 0.6859201788902283, + "learning_rate": 8.14070351758794e-05, + "loss": 1.1896, + "step": 38 + }, + { + "epoch": 0.4444444444444444, + "grad_norm": 0.5700832605361938, + "learning_rate": 8.090452261306533e-05, + "loss": 1.2188, + "step": 39 + }, + { + "epoch": 0.45584045584045585, + "grad_norm": 0.4886611998081207, + "learning_rate": 8.040201005025126e-05, + "loss": 1.2997, + "step": 40 + }, + { + "epoch": 0.4672364672364672, + "grad_norm": 0.5042343735694885, + "learning_rate": 7.989949748743719e-05, + "loss": 1.2139, + "step": 41 + }, + { + "epoch": 0.47863247863247865, + "grad_norm": 0.5992743968963623, + "learning_rate": 7.939698492462313e-05, + "loss": 1.2207, + "step": 42 + }, + { + "epoch": 0.49002849002849, + "grad_norm": 0.4791593551635742, + "learning_rate": 7.889447236180904e-05, + "loss": 1.2305, + "step": 43 + }, + { + "epoch": 0.5014245014245015, + "grad_norm": 0.4417913258075714, + "learning_rate": 7.839195979899498e-05, + "loss": 1.1662, + "step": 44 + }, + { + "epoch": 0.5128205128205128, + "grad_norm": 0.566320538520813, + "learning_rate": 7.788944723618091e-05, + "loss": 1.24, + "step": 45 + }, + { + "epoch": 0.5242165242165242, + "grad_norm": 0.5278592705726624, + "learning_rate": 7.738693467336684e-05, + "loss": 1.259, + "step": 46 + }, + { + "epoch": 0.5356125356125356, + "grad_norm": 0.5079882740974426, + "learning_rate": 7.688442211055277e-05, + "loss": 1.3768, + "step": 47 + }, + { + "epoch": 0.5470085470085471, + "grad_norm": 0.4967348277568817, + "learning_rate": 7.638190954773869e-05, + "loss": 1.3438, + "step": 48 + }, + { + "epoch": 0.5584045584045584, + "grad_norm": 0.46559643745422363, + "learning_rate": 7.587939698492463e-05, + "loss": 1.2912, + "step": 49 + }, + { + "epoch": 0.5698005698005698, + "grad_norm": 0.5026686787605286, + "learning_rate": 7.537688442211056e-05, + "loss": 1.246, + "step": 50 + }, + { + "epoch": 0.5811965811965812, + "grad_norm": 0.40778547525405884, + "learning_rate": 7.487437185929649e-05, + "loss": 1.2496, + "step": 51 + }, + { + "epoch": 0.5925925925925926, + "grad_norm": 0.48586440086364746, + "learning_rate": 7.437185929648241e-05, + "loss": 1.209, + "step": 52 + }, + { + "epoch": 0.603988603988604, + "grad_norm": 0.49833163619041443, + "learning_rate": 7.386934673366834e-05, + "loss": 1.2395, + "step": 53 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 0.4486771523952484, + "learning_rate": 7.336683417085427e-05, + "loss": 1.2258, + "step": 54 + }, + { + "epoch": 0.6267806267806267, + "grad_norm": 0.5028964281082153, + "learning_rate": 7.28643216080402e-05, + "loss": 1.2461, + "step": 55 + }, + { + "epoch": 0.6381766381766382, + "grad_norm": 0.5064070820808411, + "learning_rate": 7.236180904522614e-05, + "loss": 1.2321, + "step": 56 + }, + { + "epoch": 0.6495726495726496, + "grad_norm": 0.46759483218193054, + "learning_rate": 7.185929648241206e-05, + "loss": 1.3154, + "step": 57 + }, + { + "epoch": 0.6609686609686609, + "grad_norm": 0.49418556690216064, + "learning_rate": 7.135678391959799e-05, + "loss": 1.2402, + "step": 58 + }, + { + "epoch": 0.6723646723646723, + "grad_norm": 0.502903938293457, + "learning_rate": 7.085427135678392e-05, + "loss": 1.1549, + "step": 59 + }, + { + "epoch": 0.6837606837606838, + "grad_norm": 0.5024760961532593, + "learning_rate": 7.035175879396985e-05, + "loss": 1.2205, + "step": 60 + }, + { + "epoch": 0.6951566951566952, + "grad_norm": 0.4872761368751526, + "learning_rate": 6.984924623115579e-05, + "loss": 1.2184, + "step": 61 + }, + { + "epoch": 0.7065527065527065, + "grad_norm": 0.45815521478652954, + "learning_rate": 6.93467336683417e-05, + "loss": 1.2792, + "step": 62 + }, + { + "epoch": 0.717948717948718, + "grad_norm": 0.4826825261116028, + "learning_rate": 6.884422110552764e-05, + "loss": 1.1719, + "step": 63 + }, + { + "epoch": 0.7293447293447294, + "grad_norm": 0.5008621215820312, + "learning_rate": 6.834170854271357e-05, + "loss": 1.1454, + "step": 64 + }, + { + "epoch": 0.7407407407407407, + "grad_norm": 0.48568105697631836, + "learning_rate": 6.78391959798995e-05, + "loss": 1.1041, + "step": 65 + }, + { + "epoch": 0.7521367521367521, + "grad_norm": 0.4749748408794403, + "learning_rate": 6.733668341708544e-05, + "loss": 1.1979, + "step": 66 + }, + { + "epoch": 0.7635327635327636, + "grad_norm": 0.47079846262931824, + "learning_rate": 6.683417085427135e-05, + "loss": 1.2365, + "step": 67 + }, + { + "epoch": 0.7749287749287749, + "grad_norm": 0.6191930174827576, + "learning_rate": 6.633165829145729e-05, + "loss": 1.2581, + "step": 68 + }, + { + "epoch": 0.7863247863247863, + "grad_norm": 0.44552376866340637, + "learning_rate": 6.582914572864322e-05, + "loss": 1.1365, + "step": 69 + }, + { + "epoch": 0.7977207977207977, + "grad_norm": 0.5019327998161316, + "learning_rate": 6.532663316582915e-05, + "loss": 1.3265, + "step": 70 + }, + { + "epoch": 0.8091168091168092, + "grad_norm": 0.5034716129302979, + "learning_rate": 6.482412060301508e-05, + "loss": 1.2444, + "step": 71 + }, + { + "epoch": 0.8205128205128205, + "grad_norm": 0.48688504099845886, + "learning_rate": 6.4321608040201e-05, + "loss": 1.1905, + "step": 72 + }, + { + "epoch": 0.8319088319088319, + "grad_norm": 0.5158437490463257, + "learning_rate": 6.381909547738694e-05, + "loss": 1.226, + "step": 73 + }, + { + "epoch": 0.8433048433048433, + "grad_norm": 0.5071325898170471, + "learning_rate": 6.331658291457287e-05, + "loss": 1.1593, + "step": 74 + }, + { + "epoch": 0.8547008547008547, + "grad_norm": 0.5373358130455017, + "learning_rate": 6.28140703517588e-05, + "loss": 1.2257, + "step": 75 + }, + { + "epoch": 0.8660968660968661, + "grad_norm": 0.5040206909179688, + "learning_rate": 6.231155778894473e-05, + "loss": 1.3011, + "step": 76 + }, + { + "epoch": 0.8774928774928775, + "grad_norm": 0.4803583025932312, + "learning_rate": 6.180904522613065e-05, + "loss": 1.1372, + "step": 77 + }, + { + "epoch": 0.8888888888888888, + "grad_norm": 0.5414230227470398, + "learning_rate": 6.130653266331658e-05, + "loss": 1.22, + "step": 78 + }, + { + "epoch": 0.9002849002849003, + "grad_norm": 0.6115792989730835, + "learning_rate": 6.080402010050251e-05, + "loss": 1.2321, + "step": 79 + }, + { + "epoch": 0.9116809116809117, + "grad_norm": 0.5015672445297241, + "learning_rate": 6.030150753768844e-05, + "loss": 1.2157, + "step": 80 + }, + { + "epoch": 0.9230769230769231, + "grad_norm": 0.5275970101356506, + "learning_rate": 5.979899497487438e-05, + "loss": 1.2875, + "step": 81 + }, + { + "epoch": 0.9344729344729344, + "grad_norm": 0.5303878784179688, + "learning_rate": 5.929648241206031e-05, + "loss": 1.2977, + "step": 82 + }, + { + "epoch": 0.9458689458689459, + "grad_norm": 0.4689614772796631, + "learning_rate": 5.879396984924623e-05, + "loss": 1.263, + "step": 83 + }, + { + "epoch": 0.9572649572649573, + "grad_norm": 0.5349608659744263, + "learning_rate": 5.829145728643216e-05, + "loss": 1.2119, + "step": 84 + }, + { + "epoch": 0.9686609686609686, + "grad_norm": 0.49011704325675964, + "learning_rate": 5.778894472361809e-05, + "loss": 1.1288, + "step": 85 + }, + { + "epoch": 0.98005698005698, + "grad_norm": 0.48137786984443665, + "learning_rate": 5.728643216080403e-05, + "loss": 1.1813, + "step": 86 + }, + { + "epoch": 0.9914529914529915, + "grad_norm": 0.5291913151741028, + "learning_rate": 5.6783919597989955e-05, + "loss": 1.3011, + "step": 87 + }, + { + "epoch": 1.002849002849003, + "grad_norm": 0.7322906255722046, + "learning_rate": 5.628140703517588e-05, + "loss": 1.4381, + "step": 88 + }, + { + "epoch": 1.0142450142450143, + "grad_norm": 0.5482646822929382, + "learning_rate": 5.577889447236181e-05, + "loss": 1.2868, + "step": 89 + }, + { + "epoch": 1.0256410256410255, + "grad_norm": 0.4250563681125641, + "learning_rate": 5.527638190954774e-05, + "loss": 1.1044, + "step": 90 + }, + { + "epoch": 1.037037037037037, + "grad_norm": 0.5369375348091125, + "learning_rate": 5.477386934673368e-05, + "loss": 1.2195, + "step": 91 + }, + { + "epoch": 1.0484330484330484, + "grad_norm": 0.5130016207695007, + "learning_rate": 5.4271356783919604e-05, + "loss": 1.1992, + "step": 92 + }, + { + "epoch": 1.0598290598290598, + "grad_norm": 0.48509305715560913, + "learning_rate": 5.376884422110553e-05, + "loss": 1.1552, + "step": 93 + }, + { + "epoch": 1.0712250712250713, + "grad_norm": 0.45681053400039673, + "learning_rate": 5.3266331658291455e-05, + "loss": 1.0611, + "step": 94 + }, + { + "epoch": 1.0826210826210827, + "grad_norm": 0.5242003202438354, + "learning_rate": 5.276381909547739e-05, + "loss": 1.2777, + "step": 95 + }, + { + "epoch": 1.0940170940170941, + "grad_norm": 0.519043505191803, + "learning_rate": 5.226130653266332e-05, + "loss": 1.1207, + "step": 96 + }, + { + "epoch": 1.1054131054131053, + "grad_norm": 0.5411263704299927, + "learning_rate": 5.175879396984925e-05, + "loss": 1.1754, + "step": 97 + }, + { + "epoch": 1.1168091168091168, + "grad_norm": 0.5008968710899353, + "learning_rate": 5.125628140703518e-05, + "loss": 1.2308, + "step": 98 + }, + { + "epoch": 1.1282051282051282, + "grad_norm": 0.5314839482307434, + "learning_rate": 5.0753768844221104e-05, + "loss": 1.1087, + "step": 99 + }, + { + "epoch": 1.1396011396011396, + "grad_norm": 0.5465586185455322, + "learning_rate": 5.0251256281407036e-05, + "loss": 1.2684, + "step": 100 + }, + { + "epoch": 1.150997150997151, + "grad_norm": 0.5156195163726807, + "learning_rate": 4.974874371859297e-05, + "loss": 1.1316, + "step": 101 + }, + { + "epoch": 1.1623931623931625, + "grad_norm": 0.5936316251754761, + "learning_rate": 4.92462311557789e-05, + "loss": 1.2812, + "step": 102 + }, + { + "epoch": 1.173789173789174, + "grad_norm": 0.5087826251983643, + "learning_rate": 4.874371859296483e-05, + "loss": 1.0706, + "step": 103 + }, + { + "epoch": 1.1851851851851851, + "grad_norm": 0.5224874019622803, + "learning_rate": 4.824120603015075e-05, + "loss": 1.1901, + "step": 104 + }, + { + "epoch": 1.1965811965811965, + "grad_norm": 0.5126091241836548, + "learning_rate": 4.7738693467336685e-05, + "loss": 1.1199, + "step": 105 + }, + { + "epoch": 1.207977207977208, + "grad_norm": 0.5112568140029907, + "learning_rate": 4.723618090452262e-05, + "loss": 1.1079, + "step": 106 + }, + { + "epoch": 1.2193732193732194, + "grad_norm": 0.601634681224823, + "learning_rate": 4.673366834170855e-05, + "loss": 1.2429, + "step": 107 + }, + { + "epoch": 1.2307692307692308, + "grad_norm": 0.5239788293838501, + "learning_rate": 4.6231155778894475e-05, + "loss": 1.124, + "step": 108 + }, + { + "epoch": 1.242165242165242, + "grad_norm": 0.607867419719696, + "learning_rate": 4.57286432160804e-05, + "loss": 1.215, + "step": 109 + }, + { + "epoch": 1.2535612535612537, + "grad_norm": 0.5706148147583008, + "learning_rate": 4.522613065326633e-05, + "loss": 1.1468, + "step": 110 + }, + { + "epoch": 1.264957264957265, + "grad_norm": 0.5177438855171204, + "learning_rate": 4.4723618090452266e-05, + "loss": 1.0216, + "step": 111 + }, + { + "epoch": 1.2763532763532763, + "grad_norm": 0.630548357963562, + "learning_rate": 4.42211055276382e-05, + "loss": 1.2849, + "step": 112 + }, + { + "epoch": 1.2877492877492878, + "grad_norm": 0.5843987464904785, + "learning_rate": 4.3718592964824124e-05, + "loss": 1.1668, + "step": 113 + }, + { + "epoch": 1.2991452991452992, + "grad_norm": 0.5504516959190369, + "learning_rate": 4.321608040201005e-05, + "loss": 1.1931, + "step": 114 + }, + { + "epoch": 1.3105413105413106, + "grad_norm": 0.5568366050720215, + "learning_rate": 4.271356783919598e-05, + "loss": 1.1895, + "step": 115 + }, + { + "epoch": 1.3219373219373218, + "grad_norm": 0.5665252804756165, + "learning_rate": 4.2211055276381914e-05, + "loss": 1.094, + "step": 116 + }, + { + "epoch": 1.3333333333333333, + "grad_norm": 0.5702048540115356, + "learning_rate": 4.170854271356784e-05, + "loss": 1.1691, + "step": 117 + }, + { + "epoch": 1.3447293447293447, + "grad_norm": 0.5556389093399048, + "learning_rate": 4.120603015075377e-05, + "loss": 1.0961, + "step": 118 + }, + { + "epoch": 1.3561253561253561, + "grad_norm": 0.5471904277801514, + "learning_rate": 4.07035175879397e-05, + "loss": 1.2256, + "step": 119 + }, + { + "epoch": 1.3675213675213675, + "grad_norm": 0.5537399053573608, + "learning_rate": 4.020100502512563e-05, + "loss": 1.146, + "step": 120 + }, + { + "epoch": 1.378917378917379, + "grad_norm": 0.5466405153274536, + "learning_rate": 3.969849246231156e-05, + "loss": 1.0433, + "step": 121 + }, + { + "epoch": 1.3903133903133904, + "grad_norm": 0.5884879231452942, + "learning_rate": 3.919597989949749e-05, + "loss": 1.2018, + "step": 122 + }, + { + "epoch": 1.4017094017094016, + "grad_norm": 0.5696362257003784, + "learning_rate": 3.869346733668342e-05, + "loss": 1.0881, + "step": 123 + }, + { + "epoch": 1.413105413105413, + "grad_norm": 0.5904156565666199, + "learning_rate": 3.8190954773869346e-05, + "loss": 1.1611, + "step": 124 + }, + { + "epoch": 1.4245014245014245, + "grad_norm": 0.5661723613739014, + "learning_rate": 3.768844221105528e-05, + "loss": 1.2058, + "step": 125 + }, + { + "epoch": 1.435897435897436, + "grad_norm": 0.5651929974555969, + "learning_rate": 3.7185929648241204e-05, + "loss": 1.1127, + "step": 126 + }, + { + "epoch": 1.4472934472934473, + "grad_norm": 0.5619604587554932, + "learning_rate": 3.668341708542714e-05, + "loss": 1.0743, + "step": 127 + }, + { + "epoch": 1.4586894586894588, + "grad_norm": 0.6110875010490417, + "learning_rate": 3.618090452261307e-05, + "loss": 1.2174, + "step": 128 + }, + { + "epoch": 1.4700854700854702, + "grad_norm": 0.5553095936775208, + "learning_rate": 3.5678391959798995e-05, + "loss": 1.056, + "step": 129 + }, + { + "epoch": 1.4814814814814814, + "grad_norm": 0.6339269280433655, + "learning_rate": 3.517587939698493e-05, + "loss": 1.2721, + "step": 130 + }, + { + "epoch": 1.4928774928774928, + "grad_norm": 0.6113449931144714, + "learning_rate": 3.467336683417085e-05, + "loss": 1.193, + "step": 131 + }, + { + "epoch": 1.5042735042735043, + "grad_norm": 0.650422990322113, + "learning_rate": 3.4170854271356785e-05, + "loss": 1.2042, + "step": 132 + }, + { + "epoch": 1.5156695156695157, + "grad_norm": 0.5726408362388611, + "learning_rate": 3.366834170854272e-05, + "loss": 1.0943, + "step": 133 + }, + { + "epoch": 1.5270655270655271, + "grad_norm": 0.6288692355155945, + "learning_rate": 3.3165829145728643e-05, + "loss": 1.0757, + "step": 134 + }, + { + "epoch": 1.5384615384615383, + "grad_norm": 0.5904447436332703, + "learning_rate": 3.2663316582914576e-05, + "loss": 1.2262, + "step": 135 + }, + { + "epoch": 1.54985754985755, + "grad_norm": 0.6374230980873108, + "learning_rate": 3.21608040201005e-05, + "loss": 1.0702, + "step": 136 + }, + { + "epoch": 1.5612535612535612, + "grad_norm": 0.6134806275367737, + "learning_rate": 3.1658291457286434e-05, + "loss": 1.087, + "step": 137 + }, + { + "epoch": 1.5726495726495726, + "grad_norm": 0.6067680716514587, + "learning_rate": 3.1155778894472366e-05, + "loss": 1.0904, + "step": 138 + }, + { + "epoch": 1.584045584045584, + "grad_norm": 0.6703988313674927, + "learning_rate": 3.065326633165829e-05, + "loss": 1.1456, + "step": 139 + }, + { + "epoch": 1.5954415954415955, + "grad_norm": 0.6422675848007202, + "learning_rate": 3.015075376884422e-05, + "loss": 1.1982, + "step": 140 + }, + { + "epoch": 1.606837606837607, + "grad_norm": 0.6382200121879578, + "learning_rate": 2.9648241206030153e-05, + "loss": 1.0302, + "step": 141 + }, + { + "epoch": 1.618233618233618, + "grad_norm": 0.6702827215194702, + "learning_rate": 2.914572864321608e-05, + "loss": 1.2956, + "step": 142 + }, + { + "epoch": 1.6296296296296298, + "grad_norm": 0.5787515640258789, + "learning_rate": 2.8643216080402015e-05, + "loss": 0.9991, + "step": 143 + }, + { + "epoch": 1.641025641025641, + "grad_norm": 0.6443578600883484, + "learning_rate": 2.814070351758794e-05, + "loss": 1.2185, + "step": 144 + }, + { + "epoch": 1.6524216524216524, + "grad_norm": 0.6572853326797485, + "learning_rate": 2.763819095477387e-05, + "loss": 1.0896, + "step": 145 + }, + { + "epoch": 1.6638176638176638, + "grad_norm": 0.5817992091178894, + "learning_rate": 2.7135678391959802e-05, + "loss": 0.9568, + "step": 146 + }, + { + "epoch": 1.6752136752136753, + "grad_norm": 0.611811101436615, + "learning_rate": 2.6633165829145728e-05, + "loss": 1.1837, + "step": 147 + }, + { + "epoch": 1.6866096866096867, + "grad_norm": 0.6850467324256897, + "learning_rate": 2.613065326633166e-05, + "loss": 1.2428, + "step": 148 + }, + { + "epoch": 1.698005698005698, + "grad_norm": 0.636823832988739, + "learning_rate": 2.562814070351759e-05, + "loss": 1.014, + "step": 149 + }, + { + "epoch": 1.7094017094017095, + "grad_norm": 0.6554390788078308, + "learning_rate": 2.5125628140703518e-05, + "loss": 1.2007, + "step": 150 + }, + { + "epoch": 1.7207977207977208, + "grad_norm": 0.6579884886741638, + "learning_rate": 2.462311557788945e-05, + "loss": 1.0495, + "step": 151 + }, + { + "epoch": 1.7321937321937322, + "grad_norm": 0.7605540752410889, + "learning_rate": 2.4120603015075376e-05, + "loss": 1.2544, + "step": 152 + }, + { + "epoch": 1.7435897435897436, + "grad_norm": 0.6511518955230713, + "learning_rate": 2.361809045226131e-05, + "loss": 1.1639, + "step": 153 + }, + { + "epoch": 1.7549857549857548, + "grad_norm": 0.6654579639434814, + "learning_rate": 2.3115577889447238e-05, + "loss": 1.1876, + "step": 154 + }, + { + "epoch": 1.7663817663817665, + "grad_norm": 0.6339576244354248, + "learning_rate": 2.2613065326633167e-05, + "loss": 1.1089, + "step": 155 + }, + { + "epoch": 1.7777777777777777, + "grad_norm": 0.6115437746047974, + "learning_rate": 2.21105527638191e-05, + "loss": 1.1241, + "step": 156 + }, + { + "epoch": 1.7891737891737893, + "grad_norm": 0.6483911871910095, + "learning_rate": 2.1608040201005025e-05, + "loss": 1.2675, + "step": 157 + }, + { + "epoch": 1.8005698005698005, + "grad_norm": 0.6294212937355042, + "learning_rate": 2.1105527638190957e-05, + "loss": 1.1681, + "step": 158 + }, + { + "epoch": 1.811965811965812, + "grad_norm": 0.7219116687774658, + "learning_rate": 2.0603015075376886e-05, + "loss": 1.2162, + "step": 159 + }, + { + "epoch": 1.8233618233618234, + "grad_norm": 0.6669836044311523, + "learning_rate": 2.0100502512562815e-05, + "loss": 1.1103, + "step": 160 + }, + { + "epoch": 1.8347578347578346, + "grad_norm": 0.6326732635498047, + "learning_rate": 1.9597989949748744e-05, + "loss": 1.1337, + "step": 161 + }, + { + "epoch": 1.8461538461538463, + "grad_norm": 0.6156632304191589, + "learning_rate": 1.9095477386934673e-05, + "loss": 1.083, + "step": 162 + }, + { + "epoch": 1.8575498575498575, + "grad_norm": 0.6444670557975769, + "learning_rate": 1.8592964824120602e-05, + "loss": 1.022, + "step": 163 + }, + { + "epoch": 1.868945868945869, + "grad_norm": 0.6594868898391724, + "learning_rate": 1.8090452261306535e-05, + "loss": 1.0944, + "step": 164 + }, + { + "epoch": 1.8803418803418803, + "grad_norm": 0.6822905540466309, + "learning_rate": 1.7587939698492464e-05, + "loss": 1.1148, + "step": 165 + }, + { + "epoch": 1.8917378917378918, + "grad_norm": 0.6073247194290161, + "learning_rate": 1.7085427135678393e-05, + "loss": 1.1233, + "step": 166 + }, + { + "epoch": 1.9031339031339032, + "grad_norm": 0.6545522809028625, + "learning_rate": 1.6582914572864322e-05, + "loss": 1.0492, + "step": 167 + }, + { + "epoch": 1.9145299145299144, + "grad_norm": 0.6646963357925415, + "learning_rate": 1.608040201005025e-05, + "loss": 1.1268, + "step": 168 + }, + { + "epoch": 1.925925925925926, + "grad_norm": 0.6583707332611084, + "learning_rate": 1.5577889447236183e-05, + "loss": 1.0664, + "step": 169 + }, + { + "epoch": 1.9373219373219372, + "grad_norm": 0.6870848536491394, + "learning_rate": 1.507537688442211e-05, + "loss": 1.1419, + "step": 170 + }, + { + "epoch": 1.9487179487179487, + "grad_norm": 0.6093568205833435, + "learning_rate": 1.457286432160804e-05, + "loss": 1.0886, + "step": 171 + }, + { + "epoch": 1.96011396011396, + "grad_norm": 0.6664398908615112, + "learning_rate": 1.407035175879397e-05, + "loss": 1.3011, + "step": 172 + }, + { + "epoch": 1.9715099715099715, + "grad_norm": 0.6134812831878662, + "learning_rate": 1.3567839195979901e-05, + "loss": 1.1229, + "step": 173 + }, + { + "epoch": 1.982905982905983, + "grad_norm": 0.6148936152458191, + "learning_rate": 1.306532663316583e-05, + "loss": 1.1105, + "step": 174 + }, + { + "epoch": 1.9943019943019942, + "grad_norm": 0.8514490723609924, + "learning_rate": 1.2562814070351759e-05, + "loss": 1.4433, + "step": 175 + }, + { + "epoch": 2.005698005698006, + "grad_norm": 0.8297043442726135, + "learning_rate": 1.2060301507537688e-05, + "loss": 1.3522, + "step": 176 + }, + { + "epoch": 2.017094017094017, + "grad_norm": 0.6481033563613892, + "learning_rate": 1.1557788944723619e-05, + "loss": 1.0948, + "step": 177 + }, + { + "epoch": 2.0284900284900287, + "grad_norm": 0.6178433895111084, + "learning_rate": 1.105527638190955e-05, + "loss": 1.1191, + "step": 178 + }, + { + "epoch": 2.03988603988604, + "grad_norm": 0.5894613265991211, + "learning_rate": 1.0552763819095479e-05, + "loss": 0.961, + "step": 179 + }, + { + "epoch": 2.051282051282051, + "grad_norm": 0.614154040813446, + "learning_rate": 1.0050251256281408e-05, + "loss": 1.1262, + "step": 180 + }, + { + "epoch": 2.0626780626780628, + "grad_norm": 0.6188438534736633, + "learning_rate": 9.547738693467337e-06, + "loss": 1.0766, + "step": 181 + }, + { + "epoch": 2.074074074074074, + "grad_norm": 0.6117876768112183, + "learning_rate": 9.045226130653267e-06, + "loss": 1.0208, + "step": 182 + }, + { + "epoch": 2.0854700854700856, + "grad_norm": 0.6556265354156494, + "learning_rate": 8.542713567839196e-06, + "loss": 1.1446, + "step": 183 + }, + { + "epoch": 2.096866096866097, + "grad_norm": 0.6300605535507202, + "learning_rate": 8.040201005025125e-06, + "loss": 1.1062, + "step": 184 + }, + { + "epoch": 2.1082621082621085, + "grad_norm": 0.6257303953170776, + "learning_rate": 7.537688442211055e-06, + "loss": 1.1265, + "step": 185 + }, + { + "epoch": 2.1196581196581197, + "grad_norm": 0.655892550945282, + "learning_rate": 7.035175879396985e-06, + "loss": 1.1045, + "step": 186 + }, + { + "epoch": 2.131054131054131, + "grad_norm": 0.6385506987571716, + "learning_rate": 6.532663316582915e-06, + "loss": 1.1365, + "step": 187 + }, + { + "epoch": 2.1424501424501425, + "grad_norm": 0.6057552099227905, + "learning_rate": 6.030150753768844e-06, + "loss": 1.0207, + "step": 188 + }, + { + "epoch": 2.1538461538461537, + "grad_norm": 0.6790822148323059, + "learning_rate": 5.527638190954775e-06, + "loss": 1.2365, + "step": 189 + }, + { + "epoch": 2.1652421652421654, + "grad_norm": 0.6334704160690308, + "learning_rate": 5.025125628140704e-06, + "loss": 0.987, + "step": 190 + }, + { + "epoch": 2.1766381766381766, + "grad_norm": 0.6228213906288147, + "learning_rate": 4.522613065326634e-06, + "loss": 1.1322, + "step": 191 + }, + { + "epoch": 2.1880341880341883, + "grad_norm": 0.6533617377281189, + "learning_rate": 4.020100502512563e-06, + "loss": 1.0865, + "step": 192 + }, + { + "epoch": 2.1994301994301995, + "grad_norm": 0.6909888386726379, + "learning_rate": 3.5175879396984926e-06, + "loss": 1.1537, + "step": 193 + }, + { + "epoch": 2.2108262108262107, + "grad_norm": 0.6190997958183289, + "learning_rate": 3.015075376884422e-06, + "loss": 1.057, + "step": 194 + }, + { + "epoch": 2.2222222222222223, + "grad_norm": 0.6228768229484558, + "learning_rate": 2.512562814070352e-06, + "loss": 1.1244, + "step": 195 + }, + { + "epoch": 2.2336182336182335, + "grad_norm": 0.6112802624702454, + "learning_rate": 2.0100502512562813e-06, + "loss": 1.06, + "step": 196 + }, + { + "epoch": 2.245014245014245, + "grad_norm": 0.6091369390487671, + "learning_rate": 1.507537688442211e-06, + "loss": 1.1064, + "step": 197 + }, + { + "epoch": 2.2564102564102564, + "grad_norm": 0.604691743850708, + "learning_rate": 1.0050251256281407e-06, + "loss": 1.0923, + "step": 198 + }, + { + "epoch": 2.267806267806268, + "grad_norm": 0.585267186164856, + "learning_rate": 5.025125628140703e-07, + "loss": 0.9541, + "step": 199 + }, + { + "epoch": 2.2792022792022792, + "grad_norm": 0.6536011099815369, + "learning_rate": 0.0, + "loss": 1.2459, + "step": 200 + } + ], + "logging_steps": 1, + "max_steps": 200, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 2.2304707147874304e+16, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/llama_French_English/checkpoint-200/training_args.bin b/llama_French_English/checkpoint-200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..831ef46622ae86a9d4c8d2a31ea7d5617a8e4395 --- /dev/null +++ b/llama_French_English/checkpoint-200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b179441b48e4af18bdef2a219cad4c0cc0af5f5e5433e7e94dd57f6ed1e5d167 +size 5624 diff --git a/llama_French_German/checkpoint-200/README.md b/llama_French_German/checkpoint-200/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ec91fdd050e2a5613ef04bb6c8fc77a63b8b10a1 --- /dev/null +++ b/llama_French_German/checkpoint-200/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.14.0 \ No newline at end of file diff --git a/llama_French_German/checkpoint-200/adapter_config.json b/llama_French_German/checkpoint-200/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..79146523083b4e95b3f8051443d12c1b60a1d477 --- /dev/null +++ b/llama_French_German/checkpoint-200/adapter_config.json @@ -0,0 +1,37 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit", + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "down_proj", + "k_proj", + "o_proj", + "gate_proj", + "up_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/llama_French_German/checkpoint-200/adapter_model.safetensors b/llama_French_German/checkpoint-200/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..75226b606de19794b5e67915dc9508f997d746ca --- /dev/null +++ b/llama_French_German/checkpoint-200/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:081b10f63be4ad20c7c3681fb94aa07d6c1f957226b913fc12d66dd3b66a44c8 +size 167832240 diff --git a/llama_French_German/checkpoint-200/optimizer.pt b/llama_French_German/checkpoint-200/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..323f971f7e63397aa19669dfb63c2c90ba6725b4 --- /dev/null +++ b/llama_French_German/checkpoint-200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:725479450cdd19d8fb6f2191a8c8a70b3bf015022825a21a18305986f827ad41 +size 85723284 diff --git a/llama_French_German/checkpoint-200/rng_state.pth b/llama_French_German/checkpoint-200/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..f21f2ff1c1a816463781d51760f8156e041f5979 --- /dev/null +++ b/llama_French_German/checkpoint-200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4981123ff3cf7bd5b7f76839e90e4776f747ca4c38dcb41876fa010c0dea8b23 +size 14244 diff --git a/llama_French_German/checkpoint-200/scheduler.pt b/llama_French_German/checkpoint-200/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6ad11d6d5288d2841ba96a6d7d3838c80c80097a --- /dev/null +++ b/llama_French_German/checkpoint-200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96 +size 1064 diff --git a/llama_French_German/checkpoint-200/special_tokens_map.json b/llama_French_German/checkpoint-200/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/llama_French_German/checkpoint-200/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/llama_French_German/checkpoint-200/tokenizer.json b/llama_French_German/checkpoint-200/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/llama_French_German/checkpoint-200/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/llama_French_German/checkpoint-200/tokenizer_config.json b/llama_French_German/checkpoint-200/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..447bd30f21e8c20e9e6497d6cfc650a3881e26a6 --- /dev/null +++ b/llama_French_German/checkpoint-200/tokenizer_config.json @@ -0,0 +1,2064 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/llama_French_German/checkpoint-200/trainer_state.json b/llama_French_German/checkpoint-200/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..362e0a60788767f2e6d33cf5cc9e963d58a1205b --- /dev/null +++ b/llama_French_German/checkpoint-200/trainer_state.json @@ -0,0 +1,1433 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.2792022792022792, + "eval_steps": 500, + "global_step": 200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.011396011396011397, + "grad_norm": 1.0688412189483643, + "learning_rate": 0.0001, + "loss": 2.5442, + "step": 1 + }, + { + "epoch": 0.022792022792022793, + "grad_norm": 0.9676798582077026, + "learning_rate": 9.949748743718594e-05, + "loss": 2.4169, + "step": 2 + }, + { + "epoch": 0.03418803418803419, + "grad_norm": 0.9322265386581421, + "learning_rate": 9.899497487437186e-05, + "loss": 2.4585, + "step": 3 + }, + { + "epoch": 0.045584045584045586, + "grad_norm": 1.0203752517700195, + "learning_rate": 9.84924623115578e-05, + "loss": 2.2605, + "step": 4 + }, + { + "epoch": 0.05698005698005698, + "grad_norm": 1.0615314245224, + "learning_rate": 9.798994974874372e-05, + "loss": 2.2029, + "step": 5 + }, + { + "epoch": 0.06837606837606838, + "grad_norm": 1.1191452741622925, + "learning_rate": 9.748743718592965e-05, + "loss": 1.977, + "step": 6 + }, + { + "epoch": 0.07977207977207977, + "grad_norm": 1.0572948455810547, + "learning_rate": 9.698492462311559e-05, + "loss": 1.8041, + "step": 7 + }, + { + "epoch": 0.09116809116809117, + "grad_norm": 1.0000556707382202, + "learning_rate": 9.64824120603015e-05, + "loss": 1.8135, + "step": 8 + }, + { + "epoch": 0.10256410256410256, + "grad_norm": 0.815621554851532, + "learning_rate": 9.597989949748745e-05, + "loss": 1.6268, + "step": 9 + }, + { + "epoch": 0.11396011396011396, + "grad_norm": 0.8220420479774475, + "learning_rate": 9.547738693467337e-05, + "loss": 1.556, + "step": 10 + }, + { + "epoch": 0.12535612535612536, + "grad_norm": 0.8398631811141968, + "learning_rate": 9.49748743718593e-05, + "loss": 1.4275, + "step": 11 + }, + { + "epoch": 0.13675213675213677, + "grad_norm": 0.7354830503463745, + "learning_rate": 9.447236180904523e-05, + "loss": 1.4687, + "step": 12 + }, + { + "epoch": 0.14814814814814814, + "grad_norm": 0.4654861092567444, + "learning_rate": 9.396984924623115e-05, + "loss": 1.3937, + "step": 13 + }, + { + "epoch": 0.15954415954415954, + "grad_norm": 0.5842018127441406, + "learning_rate": 9.34673366834171e-05, + "loss": 1.4105, + "step": 14 + }, + { + "epoch": 0.17094017094017094, + "grad_norm": 0.6819984912872314, + "learning_rate": 9.296482412060302e-05, + "loss": 1.3469, + "step": 15 + }, + { + "epoch": 0.18233618233618235, + "grad_norm": 0.5046871900558472, + "learning_rate": 9.246231155778895e-05, + "loss": 1.3817, + "step": 16 + }, + { + "epoch": 0.19373219373219372, + "grad_norm": 0.4931313693523407, + "learning_rate": 9.195979899497488e-05, + "loss": 1.4015, + "step": 17 + }, + { + "epoch": 0.20512820512820512, + "grad_norm": 0.5237986445426941, + "learning_rate": 9.14572864321608e-05, + "loss": 1.3251, + "step": 18 + }, + { + "epoch": 0.21652421652421652, + "grad_norm": 0.48392462730407715, + "learning_rate": 9.095477386934675e-05, + "loss": 1.2821, + "step": 19 + }, + { + "epoch": 0.22792022792022792, + "grad_norm": 0.5208500623703003, + "learning_rate": 9.045226130653267e-05, + "loss": 1.3226, + "step": 20 + }, + { + "epoch": 0.23931623931623933, + "grad_norm": 0.5178021192550659, + "learning_rate": 8.99497487437186e-05, + "loss": 1.2268, + "step": 21 + }, + { + "epoch": 0.25071225071225073, + "grad_norm": 0.5599659085273743, + "learning_rate": 8.944723618090453e-05, + "loss": 1.3742, + "step": 22 + }, + { + "epoch": 0.2621082621082621, + "grad_norm": 0.5551819801330566, + "learning_rate": 8.894472361809045e-05, + "loss": 1.3156, + "step": 23 + }, + { + "epoch": 0.27350427350427353, + "grad_norm": 0.7024655342102051, + "learning_rate": 8.84422110552764e-05, + "loss": 1.2187, + "step": 24 + }, + { + "epoch": 0.2849002849002849, + "grad_norm": 0.6340409517288208, + "learning_rate": 8.793969849246232e-05, + "loss": 1.3423, + "step": 25 + }, + { + "epoch": 0.2962962962962963, + "grad_norm": 0.5692094564437866, + "learning_rate": 8.743718592964825e-05, + "loss": 1.2315, + "step": 26 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 0.5804877877235413, + "learning_rate": 8.693467336683418e-05, + "loss": 1.2316, + "step": 27 + }, + { + "epoch": 0.3190883190883191, + "grad_norm": 0.7234011292457581, + "learning_rate": 8.64321608040201e-05, + "loss": 1.2583, + "step": 28 + }, + { + "epoch": 0.33048433048433046, + "grad_norm": 0.8010082244873047, + "learning_rate": 8.592964824120603e-05, + "loss": 1.3704, + "step": 29 + }, + { + "epoch": 0.3418803418803419, + "grad_norm": 0.6479464173316956, + "learning_rate": 8.542713567839196e-05, + "loss": 1.3294, + "step": 30 + }, + { + "epoch": 0.35327635327635326, + "grad_norm": 0.778668224811554, + "learning_rate": 8.49246231155779e-05, + "loss": 1.2566, + "step": 31 + }, + { + "epoch": 0.3646723646723647, + "grad_norm": 0.8151825070381165, + "learning_rate": 8.442211055276383e-05, + "loss": 1.2372, + "step": 32 + }, + { + "epoch": 0.37606837606837606, + "grad_norm": 0.777619481086731, + "learning_rate": 8.391959798994975e-05, + "loss": 1.2749, + "step": 33 + }, + { + "epoch": 0.38746438746438744, + "grad_norm": 0.8822659850120544, + "learning_rate": 8.341708542713568e-05, + "loss": 1.2229, + "step": 34 + }, + { + "epoch": 0.39886039886039887, + "grad_norm": 0.870242178440094, + "learning_rate": 8.291457286432161e-05, + "loss": 1.1641, + "step": 35 + }, + { + "epoch": 0.41025641025641024, + "grad_norm": 0.8463490605354309, + "learning_rate": 8.241206030150754e-05, + "loss": 1.2443, + "step": 36 + }, + { + "epoch": 0.42165242165242167, + "grad_norm": 0.8682456612586975, + "learning_rate": 8.190954773869348e-05, + "loss": 1.2818, + "step": 37 + }, + { + "epoch": 0.43304843304843305, + "grad_norm": 0.6559503674507141, + "learning_rate": 8.14070351758794e-05, + "loss": 1.1601, + "step": 38 + }, + { + "epoch": 0.4444444444444444, + "grad_norm": 0.603629469871521, + "learning_rate": 8.090452261306533e-05, + "loss": 1.2256, + "step": 39 + }, + { + "epoch": 0.45584045584045585, + "grad_norm": 0.5692597031593323, + "learning_rate": 8.040201005025126e-05, + "loss": 1.2869, + "step": 40 + }, + { + "epoch": 0.4672364672364672, + "grad_norm": 0.5320606231689453, + "learning_rate": 7.989949748743719e-05, + "loss": 1.179, + "step": 41 + }, + { + "epoch": 0.47863247863247865, + "grad_norm": 0.5642545819282532, + "learning_rate": 7.939698492462313e-05, + "loss": 1.1968, + "step": 42 + }, + { + "epoch": 0.49002849002849, + "grad_norm": 0.4435971975326538, + "learning_rate": 7.889447236180904e-05, + "loss": 1.2094, + "step": 43 + }, + { + "epoch": 0.5014245014245015, + "grad_norm": 0.4562317430973053, + "learning_rate": 7.839195979899498e-05, + "loss": 1.1734, + "step": 44 + }, + { + "epoch": 0.5128205128205128, + "grad_norm": 0.5164006948471069, + "learning_rate": 7.788944723618091e-05, + "loss": 1.2177, + "step": 45 + }, + { + "epoch": 0.5242165242165242, + "grad_norm": 0.5158300399780273, + "learning_rate": 7.738693467336684e-05, + "loss": 1.2289, + "step": 46 + }, + { + "epoch": 0.5356125356125356, + "grad_norm": 0.49650484323501587, + "learning_rate": 7.688442211055277e-05, + "loss": 1.3316, + "step": 47 + }, + { + "epoch": 0.5470085470085471, + "grad_norm": 0.4756147563457489, + "learning_rate": 7.638190954773869e-05, + "loss": 1.333, + "step": 48 + }, + { + "epoch": 0.5584045584045584, + "grad_norm": 0.4514218866825104, + "learning_rate": 7.587939698492463e-05, + "loss": 1.2733, + "step": 49 + }, + { + "epoch": 0.5698005698005698, + "grad_norm": 0.445081889629364, + "learning_rate": 7.537688442211056e-05, + "loss": 1.2024, + "step": 50 + }, + { + "epoch": 0.5811965811965812, + "grad_norm": 0.45231911540031433, + "learning_rate": 7.487437185929649e-05, + "loss": 1.267, + "step": 51 + }, + { + "epoch": 0.5925925925925926, + "grad_norm": 0.5107349753379822, + "learning_rate": 7.437185929648241e-05, + "loss": 1.2068, + "step": 52 + }, + { + "epoch": 0.603988603988604, + "grad_norm": 0.4770635664463043, + "learning_rate": 7.386934673366834e-05, + "loss": 1.2372, + "step": 53 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 0.4619287848472595, + "learning_rate": 7.336683417085427e-05, + "loss": 1.2132, + "step": 54 + }, + { + "epoch": 0.6267806267806267, + "grad_norm": 0.5125857591629028, + "learning_rate": 7.28643216080402e-05, + "loss": 1.2367, + "step": 55 + }, + { + "epoch": 0.6381766381766382, + "grad_norm": 0.456436425447464, + "learning_rate": 7.236180904522614e-05, + "loss": 1.1989, + "step": 56 + }, + { + "epoch": 0.6495726495726496, + "grad_norm": 0.4466511011123657, + "learning_rate": 7.185929648241206e-05, + "loss": 1.2789, + "step": 57 + }, + { + "epoch": 0.6609686609686609, + "grad_norm": 0.45993903279304504, + "learning_rate": 7.135678391959799e-05, + "loss": 1.2228, + "step": 58 + }, + { + "epoch": 0.6723646723646723, + "grad_norm": 0.4762590825557709, + "learning_rate": 7.085427135678392e-05, + "loss": 1.1418, + "step": 59 + }, + { + "epoch": 0.6837606837606838, + "grad_norm": 0.4990002512931824, + "learning_rate": 7.035175879396985e-05, + "loss": 1.1788, + "step": 60 + }, + { + "epoch": 0.6951566951566952, + "grad_norm": 0.4810471534729004, + "learning_rate": 6.984924623115579e-05, + "loss": 1.1878, + "step": 61 + }, + { + "epoch": 0.7065527065527065, + "grad_norm": 0.4777512550354004, + "learning_rate": 6.93467336683417e-05, + "loss": 1.2621, + "step": 62 + }, + { + "epoch": 0.717948717948718, + "grad_norm": 0.5083452463150024, + "learning_rate": 6.884422110552764e-05, + "loss": 1.1642, + "step": 63 + }, + { + "epoch": 0.7293447293447294, + "grad_norm": 0.4965672194957733, + "learning_rate": 6.834170854271357e-05, + "loss": 1.1356, + "step": 64 + }, + { + "epoch": 0.7407407407407407, + "grad_norm": 0.48566991090774536, + "learning_rate": 6.78391959798995e-05, + "loss": 1.0954, + "step": 65 + }, + { + "epoch": 0.7521367521367521, + "grad_norm": 0.46452316641807556, + "learning_rate": 6.733668341708544e-05, + "loss": 1.1905, + "step": 66 + }, + { + "epoch": 0.7635327635327636, + "grad_norm": 0.51093989610672, + "learning_rate": 6.683417085427135e-05, + "loss": 1.2285, + "step": 67 + }, + { + "epoch": 0.7749287749287749, + "grad_norm": 0.5919416546821594, + "learning_rate": 6.633165829145729e-05, + "loss": 1.2368, + "step": 68 + }, + { + "epoch": 0.7863247863247863, + "grad_norm": 0.46627846360206604, + "learning_rate": 6.582914572864322e-05, + "loss": 1.1286, + "step": 69 + }, + { + "epoch": 0.7977207977207977, + "grad_norm": 0.4974450170993805, + "learning_rate": 6.532663316582915e-05, + "loss": 1.2874, + "step": 70 + }, + { + "epoch": 0.8091168091168092, + "grad_norm": 0.5373516082763672, + "learning_rate": 6.482412060301508e-05, + "loss": 1.2573, + "step": 71 + }, + { + "epoch": 0.8205128205128205, + "grad_norm": 0.4522000849246979, + "learning_rate": 6.4321608040201e-05, + "loss": 1.137, + "step": 72 + }, + { + "epoch": 0.8319088319088319, + "grad_norm": 0.4694693088531494, + "learning_rate": 6.381909547738694e-05, + "loss": 1.1971, + "step": 73 + }, + { + "epoch": 0.8433048433048433, + "grad_norm": 0.4889134466648102, + "learning_rate": 6.331658291457287e-05, + "loss": 1.1435, + "step": 74 + }, + { + "epoch": 0.8547008547008547, + "grad_norm": 0.512048602104187, + "learning_rate": 6.28140703517588e-05, + "loss": 1.2086, + "step": 75 + }, + { + "epoch": 0.8660968660968661, + "grad_norm": 0.49475014209747314, + "learning_rate": 6.231155778894473e-05, + "loss": 1.2496, + "step": 76 + }, + { + "epoch": 0.8774928774928775, + "grad_norm": 0.4658482074737549, + "learning_rate": 6.180904522613065e-05, + "loss": 1.0942, + "step": 77 + }, + { + "epoch": 0.8888888888888888, + "grad_norm": 0.5323877334594727, + "learning_rate": 6.130653266331658e-05, + "loss": 1.1853, + "step": 78 + }, + { + "epoch": 0.9002849002849003, + "grad_norm": 0.6251657605171204, + "learning_rate": 6.080402010050251e-05, + "loss": 1.2209, + "step": 79 + }, + { + "epoch": 0.9116809116809117, + "grad_norm": 0.4768678545951843, + "learning_rate": 6.030150753768844e-05, + "loss": 1.164, + "step": 80 + }, + { + "epoch": 0.9230769230769231, + "grad_norm": 0.5291458964347839, + "learning_rate": 5.979899497487438e-05, + "loss": 1.2754, + "step": 81 + }, + { + "epoch": 0.9344729344729344, + "grad_norm": 0.5092456340789795, + "learning_rate": 5.929648241206031e-05, + "loss": 1.248, + "step": 82 + }, + { + "epoch": 0.9458689458689459, + "grad_norm": 0.4834723174571991, + "learning_rate": 5.879396984924623e-05, + "loss": 1.2681, + "step": 83 + }, + { + "epoch": 0.9572649572649573, + "grad_norm": 0.5478146076202393, + "learning_rate": 5.829145728643216e-05, + "loss": 1.2045, + "step": 84 + }, + { + "epoch": 0.9686609686609686, + "grad_norm": 0.4918864071369171, + "learning_rate": 5.778894472361809e-05, + "loss": 1.1216, + "step": 85 + }, + { + "epoch": 0.98005698005698, + "grad_norm": 0.4812568128108978, + "learning_rate": 5.728643216080403e-05, + "loss": 1.1732, + "step": 86 + }, + { + "epoch": 0.9914529914529915, + "grad_norm": 0.5308701992034912, + "learning_rate": 5.6783919597989955e-05, + "loss": 1.2643, + "step": 87 + }, + { + "epoch": 1.002849002849003, + "grad_norm": 0.7254398465156555, + "learning_rate": 5.628140703517588e-05, + "loss": 1.4278, + "step": 88 + }, + { + "epoch": 1.0142450142450143, + "grad_norm": 0.5298280715942383, + "learning_rate": 5.577889447236181e-05, + "loss": 1.2691, + "step": 89 + }, + { + "epoch": 1.0256410256410255, + "grad_norm": 0.4411105811595917, + "learning_rate": 5.527638190954774e-05, + "loss": 1.1033, + "step": 90 + }, + { + "epoch": 1.037037037037037, + "grad_norm": 0.5160195231437683, + "learning_rate": 5.477386934673368e-05, + "loss": 1.2041, + "step": 91 + }, + { + "epoch": 1.0484330484330484, + "grad_norm": 0.4904952943325043, + "learning_rate": 5.4271356783919604e-05, + "loss": 1.1634, + "step": 92 + }, + { + "epoch": 1.0598290598290598, + "grad_norm": 0.4598182439804077, + "learning_rate": 5.376884422110553e-05, + "loss": 1.1095, + "step": 93 + }, + { + "epoch": 1.0712250712250713, + "grad_norm": 0.4579429626464844, + "learning_rate": 5.3266331658291455e-05, + "loss": 1.0614, + "step": 94 + }, + { + "epoch": 1.0826210826210827, + "grad_norm": 0.49531427025794983, + "learning_rate": 5.276381909547739e-05, + "loss": 1.2415, + "step": 95 + }, + { + "epoch": 1.0940170940170941, + "grad_norm": 0.4900212287902832, + "learning_rate": 5.226130653266332e-05, + "loss": 1.08, + "step": 96 + }, + { + "epoch": 1.1054131054131053, + "grad_norm": 0.5321851372718811, + "learning_rate": 5.175879396984925e-05, + "loss": 1.1464, + "step": 97 + }, + { + "epoch": 1.1168091168091168, + "grad_norm": 0.4976498782634735, + "learning_rate": 5.125628140703518e-05, + "loss": 1.1962, + "step": 98 + }, + { + "epoch": 1.1282051282051282, + "grad_norm": 0.48046809434890747, + "learning_rate": 5.0753768844221104e-05, + "loss": 1.0534, + "step": 99 + }, + { + "epoch": 1.1396011396011396, + "grad_norm": 0.5641231536865234, + "learning_rate": 5.0251256281407036e-05, + "loss": 1.2342, + "step": 100 + }, + { + "epoch": 1.150997150997151, + "grad_norm": 0.5218859314918518, + "learning_rate": 4.974874371859297e-05, + "loss": 1.1125, + "step": 101 + }, + { + "epoch": 1.1623931623931625, + "grad_norm": 0.672106146812439, + "learning_rate": 4.92462311557789e-05, + "loss": 1.2577, + "step": 102 + }, + { + "epoch": 1.173789173789174, + "grad_norm": 0.5047236680984497, + "learning_rate": 4.874371859296483e-05, + "loss": 1.0518, + "step": 103 + }, + { + "epoch": 1.1851851851851851, + "grad_norm": 0.5189758539199829, + "learning_rate": 4.824120603015075e-05, + "loss": 1.1672, + "step": 104 + }, + { + "epoch": 1.1965811965811965, + "grad_norm": 0.5212465524673462, + "learning_rate": 4.7738693467336685e-05, + "loss": 1.0869, + "step": 105 + }, + { + "epoch": 1.207977207977208, + "grad_norm": 0.5158497095108032, + "learning_rate": 4.723618090452262e-05, + "loss": 1.0674, + "step": 106 + }, + { + "epoch": 1.2193732193732194, + "grad_norm": 0.6164978742599487, + "learning_rate": 4.673366834170855e-05, + "loss": 1.2204, + "step": 107 + }, + { + "epoch": 1.2307692307692308, + "grad_norm": 0.5276105403900146, + "learning_rate": 4.6231155778894475e-05, + "loss": 1.0782, + "step": 108 + }, + { + "epoch": 1.242165242165242, + "grad_norm": 0.5990796685218811, + "learning_rate": 4.57286432160804e-05, + "loss": 1.1748, + "step": 109 + }, + { + "epoch": 1.2535612535612537, + "grad_norm": 0.5942894816398621, + "learning_rate": 4.522613065326633e-05, + "loss": 1.1417, + "step": 110 + }, + { + "epoch": 1.264957264957265, + "grad_norm": 0.5517327189445496, + "learning_rate": 4.4723618090452266e-05, + "loss": 1.0214, + "step": 111 + }, + { + "epoch": 1.2763532763532763, + "grad_norm": 0.599429190158844, + "learning_rate": 4.42211055276382e-05, + "loss": 1.2503, + "step": 112 + }, + { + "epoch": 1.2877492877492878, + "grad_norm": 0.5922709107398987, + "learning_rate": 4.3718592964824124e-05, + "loss": 1.1294, + "step": 113 + }, + { + "epoch": 1.2991452991452992, + "grad_norm": 0.569146990776062, + "learning_rate": 4.321608040201005e-05, + "loss": 1.1523, + "step": 114 + }, + { + "epoch": 1.3105413105413106, + "grad_norm": 0.5592817664146423, + "learning_rate": 4.271356783919598e-05, + "loss": 1.1484, + "step": 115 + }, + { + "epoch": 1.3219373219373218, + "grad_norm": 0.5917912125587463, + "learning_rate": 4.2211055276381914e-05, + "loss": 1.0667, + "step": 116 + }, + { + "epoch": 1.3333333333333333, + "grad_norm": 0.6044990420341492, + "learning_rate": 4.170854271356784e-05, + "loss": 1.1646, + "step": 117 + }, + { + "epoch": 1.3447293447293447, + "grad_norm": 0.5149083137512207, + "learning_rate": 4.120603015075377e-05, + "loss": 1.0567, + "step": 118 + }, + { + "epoch": 1.3561253561253561, + "grad_norm": 0.5668403506278992, + "learning_rate": 4.07035175879397e-05, + "loss": 1.2072, + "step": 119 + }, + { + "epoch": 1.3675213675213675, + "grad_norm": 0.5478379726409912, + "learning_rate": 4.020100502512563e-05, + "loss": 1.1353, + "step": 120 + }, + { + "epoch": 1.378917378917379, + "grad_norm": 0.5570518374443054, + "learning_rate": 3.969849246231156e-05, + "loss": 1.0336, + "step": 121 + }, + { + "epoch": 1.3903133903133904, + "grad_norm": 0.5794707536697388, + "learning_rate": 3.919597989949749e-05, + "loss": 1.1696, + "step": 122 + }, + { + "epoch": 1.4017094017094016, + "grad_norm": 0.567593514919281, + "learning_rate": 3.869346733668342e-05, + "loss": 1.0635, + "step": 123 + }, + { + "epoch": 1.413105413105413, + "grad_norm": 0.5602433085441589, + "learning_rate": 3.8190954773869346e-05, + "loss": 1.1443, + "step": 124 + }, + { + "epoch": 1.4245014245014245, + "grad_norm": 0.578921377658844, + "learning_rate": 3.768844221105528e-05, + "loss": 1.1843, + "step": 125 + }, + { + "epoch": 1.435897435897436, + "grad_norm": 0.5648573040962219, + "learning_rate": 3.7185929648241204e-05, + "loss": 1.1076, + "step": 126 + }, + { + "epoch": 1.4472934472934473, + "grad_norm": 0.5635711550712585, + "learning_rate": 3.668341708542714e-05, + "loss": 1.0589, + "step": 127 + }, + { + "epoch": 1.4586894586894588, + "grad_norm": 0.6067689061164856, + "learning_rate": 3.618090452261307e-05, + "loss": 1.1941, + "step": 128 + }, + { + "epoch": 1.4700854700854702, + "grad_norm": 0.5677550435066223, + "learning_rate": 3.5678391959798995e-05, + "loss": 1.0384, + "step": 129 + }, + { + "epoch": 1.4814814814814814, + "grad_norm": 0.599470853805542, + "learning_rate": 3.517587939698493e-05, + "loss": 1.2259, + "step": 130 + }, + { + "epoch": 1.4928774928774928, + "grad_norm": 0.6475313305854797, + "learning_rate": 3.467336683417085e-05, + "loss": 1.1618, + "step": 131 + }, + { + "epoch": 1.5042735042735043, + "grad_norm": 0.6463358402252197, + "learning_rate": 3.4170854271356785e-05, + "loss": 1.1712, + "step": 132 + }, + { + "epoch": 1.5156695156695157, + "grad_norm": 0.5662721395492554, + "learning_rate": 3.366834170854272e-05, + "loss": 1.0731, + "step": 133 + }, + { + "epoch": 1.5270655270655271, + "grad_norm": 0.5981451869010925, + "learning_rate": 3.3165829145728643e-05, + "loss": 1.0298, + "step": 134 + }, + { + "epoch": 1.5384615384615383, + "grad_norm": 0.5991083979606628, + "learning_rate": 3.2663316582914576e-05, + "loss": 1.2169, + "step": 135 + }, + { + "epoch": 1.54985754985755, + "grad_norm": 0.6036386489868164, + "learning_rate": 3.21608040201005e-05, + "loss": 1.0498, + "step": 136 + }, + { + "epoch": 1.5612535612535612, + "grad_norm": 0.5927392840385437, + "learning_rate": 3.1658291457286434e-05, + "loss": 1.0389, + "step": 137 + }, + { + "epoch": 1.5726495726495726, + "grad_norm": 0.5733420848846436, + "learning_rate": 3.1155778894472366e-05, + "loss": 1.0608, + "step": 138 + }, + { + "epoch": 1.584045584045584, + "grad_norm": 0.6083365678787231, + "learning_rate": 3.065326633165829e-05, + "loss": 1.1203, + "step": 139 + }, + { + "epoch": 1.5954415954415955, + "grad_norm": 0.6153535842895508, + "learning_rate": 3.015075376884422e-05, + "loss": 1.1729, + "step": 140 + }, + { + "epoch": 1.606837606837607, + "grad_norm": 0.6425400376319885, + "learning_rate": 2.9648241206030153e-05, + "loss": 1.0155, + "step": 141 + }, + { + "epoch": 1.618233618233618, + "grad_norm": 0.6353899240493774, + "learning_rate": 2.914572864321608e-05, + "loss": 1.245, + "step": 142 + }, + { + "epoch": 1.6296296296296298, + "grad_norm": 0.6624664664268494, + "learning_rate": 2.8643216080402015e-05, + "loss": 0.9795, + "step": 143 + }, + { + "epoch": 1.641025641025641, + "grad_norm": 0.6005571484565735, + "learning_rate": 2.814070351758794e-05, + "loss": 1.1581, + "step": 144 + }, + { + "epoch": 1.6524216524216524, + "grad_norm": 0.6583634614944458, + "learning_rate": 2.763819095477387e-05, + "loss": 1.0538, + "step": 145 + }, + { + "epoch": 1.6638176638176638, + "grad_norm": 0.5750309228897095, + "learning_rate": 2.7135678391959802e-05, + "loss": 0.9153, + "step": 146 + }, + { + "epoch": 1.6752136752136753, + "grad_norm": 0.6019430160522461, + "learning_rate": 2.6633165829145728e-05, + "loss": 1.159, + "step": 147 + }, + { + "epoch": 1.6866096866096867, + "grad_norm": 0.6395899057388306, + "learning_rate": 2.613065326633166e-05, + "loss": 1.1699, + "step": 148 + }, + { + "epoch": 1.698005698005698, + "grad_norm": 0.635377824306488, + "learning_rate": 2.562814070351759e-05, + "loss": 1.0286, + "step": 149 + }, + { + "epoch": 1.7094017094017095, + "grad_norm": 0.6403070688247681, + "learning_rate": 2.5125628140703518e-05, + "loss": 1.1748, + "step": 150 + }, + { + "epoch": 1.7207977207977208, + "grad_norm": 0.6614859104156494, + "learning_rate": 2.462311557788945e-05, + "loss": 1.0518, + "step": 151 + }, + { + "epoch": 1.7321937321937322, + "grad_norm": 0.7036588788032532, + "learning_rate": 2.4120603015075376e-05, + "loss": 1.2191, + "step": 152 + }, + { + "epoch": 1.7435897435897436, + "grad_norm": 0.6717102527618408, + "learning_rate": 2.361809045226131e-05, + "loss": 1.1376, + "step": 153 + }, + { + "epoch": 1.7549857549857548, + "grad_norm": 0.6435654759407043, + "learning_rate": 2.3115577889447238e-05, + "loss": 1.1601, + "step": 154 + }, + { + "epoch": 1.7663817663817665, + "grad_norm": 0.6151710748672485, + "learning_rate": 2.2613065326633167e-05, + "loss": 1.0929, + "step": 155 + }, + { + "epoch": 1.7777777777777777, + "grad_norm": 0.6438819169998169, + "learning_rate": 2.21105527638191e-05, + "loss": 1.111, + "step": 156 + }, + { + "epoch": 1.7891737891737893, + "grad_norm": 0.6459853649139404, + "learning_rate": 2.1608040201005025e-05, + "loss": 1.2303, + "step": 157 + }, + { + "epoch": 1.8005698005698005, + "grad_norm": 0.6052287220954895, + "learning_rate": 2.1105527638190957e-05, + "loss": 1.1341, + "step": 158 + }, + { + "epoch": 1.811965811965812, + "grad_norm": 0.6797654032707214, + "learning_rate": 2.0603015075376886e-05, + "loss": 1.1724, + "step": 159 + }, + { + "epoch": 1.8233618233618234, + "grad_norm": 0.6900933980941772, + "learning_rate": 2.0100502512562815e-05, + "loss": 1.0881, + "step": 160 + }, + { + "epoch": 1.8347578347578346, + "grad_norm": 0.6317200064659119, + "learning_rate": 1.9597989949748744e-05, + "loss": 1.1256, + "step": 161 + }, + { + "epoch": 1.8461538461538463, + "grad_norm": 0.6044368743896484, + "learning_rate": 1.9095477386934673e-05, + "loss": 1.0595, + "step": 162 + }, + { + "epoch": 1.8575498575498575, + "grad_norm": 0.6719862818717957, + "learning_rate": 1.8592964824120602e-05, + "loss": 0.9983, + "step": 163 + }, + { + "epoch": 1.868945868945869, + "grad_norm": 0.6419474482536316, + "learning_rate": 1.8090452261306535e-05, + "loss": 1.0876, + "step": 164 + }, + { + "epoch": 1.8803418803418803, + "grad_norm": 0.6861122250556946, + "learning_rate": 1.7587939698492464e-05, + "loss": 1.0904, + "step": 165 + }, + { + "epoch": 1.8917378917378918, + "grad_norm": 0.6277052760124207, + "learning_rate": 1.7085427135678393e-05, + "loss": 1.1036, + "step": 166 + }, + { + "epoch": 1.9031339031339032, + "grad_norm": 0.7358347177505493, + "learning_rate": 1.6582914572864322e-05, + "loss": 1.0499, + "step": 167 + }, + { + "epoch": 1.9145299145299144, + "grad_norm": 0.6961327195167542, + "learning_rate": 1.608040201005025e-05, + "loss": 1.097, + "step": 168 + }, + { + "epoch": 1.925925925925926, + "grad_norm": 0.6499162316322327, + "learning_rate": 1.5577889447236183e-05, + "loss": 1.0566, + "step": 169 + }, + { + "epoch": 1.9373219373219372, + "grad_norm": 0.6426655650138855, + "learning_rate": 1.507537688442211e-05, + "loss": 1.1127, + "step": 170 + }, + { + "epoch": 1.9487179487179487, + "grad_norm": 0.6038071513175964, + "learning_rate": 1.457286432160804e-05, + "loss": 1.0345, + "step": 171 + }, + { + "epoch": 1.96011396011396, + "grad_norm": 0.6887624263763428, + "learning_rate": 1.407035175879397e-05, + "loss": 1.2775, + "step": 172 + }, + { + "epoch": 1.9715099715099715, + "grad_norm": 0.6664908528327942, + "learning_rate": 1.3567839195979901e-05, + "loss": 1.099, + "step": 173 + }, + { + "epoch": 1.982905982905983, + "grad_norm": 0.6395720839500427, + "learning_rate": 1.306532663316583e-05, + "loss": 1.0795, + "step": 174 + }, + { + "epoch": 1.9943019943019942, + "grad_norm": 0.8258576989173889, + "learning_rate": 1.2562814070351759e-05, + "loss": 1.4081, + "step": 175 + }, + { + "epoch": 2.005698005698006, + "grad_norm": 0.7664657831192017, + "learning_rate": 1.2060301507537688e-05, + "loss": 1.2999, + "step": 176 + }, + { + "epoch": 2.017094017094017, + "grad_norm": 0.6419854164123535, + "learning_rate": 1.1557788944723619e-05, + "loss": 1.0661, + "step": 177 + }, + { + "epoch": 2.0284900284900287, + "grad_norm": 0.6126803755760193, + "learning_rate": 1.105527638190955e-05, + "loss": 1.0989, + "step": 178 + }, + { + "epoch": 2.03988603988604, + "grad_norm": 0.5971490740776062, + "learning_rate": 1.0552763819095479e-05, + "loss": 0.9485, + "step": 179 + }, + { + "epoch": 2.051282051282051, + "grad_norm": 0.6343738436698914, + "learning_rate": 1.0050251256281408e-05, + "loss": 1.1226, + "step": 180 + }, + { + "epoch": 2.0626780626780628, + "grad_norm": 0.6217495203018188, + "learning_rate": 9.547738693467337e-06, + "loss": 1.0628, + "step": 181 + }, + { + "epoch": 2.074074074074074, + "grad_norm": 0.5966653823852539, + "learning_rate": 9.045226130653267e-06, + "loss": 1.0092, + "step": 182 + }, + { + "epoch": 2.0854700854700856, + "grad_norm": 0.6323566436767578, + "learning_rate": 8.542713567839196e-06, + "loss": 1.0992, + "step": 183 + }, + { + "epoch": 2.096866096866097, + "grad_norm": 0.6440880298614502, + "learning_rate": 8.040201005025125e-06, + "loss": 1.0804, + "step": 184 + }, + { + "epoch": 2.1082621082621085, + "grad_norm": 0.5995816588401794, + "learning_rate": 7.537688442211055e-06, + "loss": 1.1067, + "step": 185 + }, + { + "epoch": 2.1196581196581197, + "grad_norm": 0.6618144512176514, + "learning_rate": 7.035175879396985e-06, + "loss": 1.0689, + "step": 186 + }, + { + "epoch": 2.131054131054131, + "grad_norm": 0.6532097458839417, + "learning_rate": 6.532663316582915e-06, + "loss": 1.1028, + "step": 187 + }, + { + "epoch": 2.1424501424501425, + "grad_norm": 0.5830849409103394, + "learning_rate": 6.030150753768844e-06, + "loss": 0.9996, + "step": 188 + }, + { + "epoch": 2.1538461538461537, + "grad_norm": 0.6783652901649475, + "learning_rate": 5.527638190954775e-06, + "loss": 1.2311, + "step": 189 + }, + { + "epoch": 2.1652421652421654, + "grad_norm": 0.6712796688079834, + "learning_rate": 5.025125628140704e-06, + "loss": 0.9609, + "step": 190 + }, + { + "epoch": 2.1766381766381766, + "grad_norm": 0.6146546006202698, + "learning_rate": 4.522613065326634e-06, + "loss": 1.1135, + "step": 191 + }, + { + "epoch": 2.1880341880341883, + "grad_norm": 0.6589621901512146, + "learning_rate": 4.020100502512563e-06, + "loss": 1.0524, + "step": 192 + }, + { + "epoch": 2.1994301994301995, + "grad_norm": 0.648345947265625, + "learning_rate": 3.5175879396984926e-06, + "loss": 1.1136, + "step": 193 + }, + { + "epoch": 2.2108262108262107, + "grad_norm": 0.6554787158966064, + "learning_rate": 3.015075376884422e-06, + "loss": 1.0393, + "step": 194 + }, + { + "epoch": 2.2222222222222223, + "grad_norm": 0.60575270652771, + "learning_rate": 2.512562814070352e-06, + "loss": 1.0688, + "step": 195 + }, + { + "epoch": 2.2336182336182335, + "grad_norm": 0.609583854675293, + "learning_rate": 2.0100502512562813e-06, + "loss": 1.047, + "step": 196 + }, + { + "epoch": 2.245014245014245, + "grad_norm": 0.6172504425048828, + "learning_rate": 1.507537688442211e-06, + "loss": 1.0835, + "step": 197 + }, + { + "epoch": 2.2564102564102564, + "grad_norm": 0.6250450015068054, + "learning_rate": 1.0050251256281407e-06, + "loss": 1.0684, + "step": 198 + }, + { + "epoch": 2.267806267806268, + "grad_norm": 0.5692541599273682, + "learning_rate": 5.025125628140703e-07, + "loss": 0.9156, + "step": 199 + }, + { + "epoch": 2.2792022792022792, + "grad_norm": 0.6259585022926331, + "learning_rate": 0.0, + "loss": 1.2106, + "step": 200 + } + ], + "logging_steps": 1, + "max_steps": 200, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 2.469981127242547e+16, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/llama_French_German/checkpoint-200/training_args.bin b/llama_French_German/checkpoint-200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..83f6b86f8635485ab6467ef15624a156ea342338 --- /dev/null +++ b/llama_French_German/checkpoint-200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deee2a337faaf60bd350045a0f826a1fa542c1eeb0741f78c25a1609f126f242 +size 5624 diff --git a/llama_French_Vietnamese/checkpoint-200/README.md b/llama_French_Vietnamese/checkpoint-200/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ec91fdd050e2a5613ef04bb6c8fc77a63b8b10a1 --- /dev/null +++ b/llama_French_Vietnamese/checkpoint-200/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.14.0 \ No newline at end of file diff --git a/llama_French_Vietnamese/checkpoint-200/adapter_config.json b/llama_French_Vietnamese/checkpoint-200/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..da338a3db899b4c5301f89419503da4210df9b3f --- /dev/null +++ b/llama_French_Vietnamese/checkpoint-200/adapter_config.json @@ -0,0 +1,37 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit", + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "gate_proj", + "o_proj", + "v_proj", + "down_proj", + "k_proj", + "q_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/llama_French_Vietnamese/checkpoint-200/adapter_model.safetensors b/llama_French_Vietnamese/checkpoint-200/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b2b379559a19acad33f7adc345714fddabc0fe8c --- /dev/null +++ b/llama_French_Vietnamese/checkpoint-200/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23cebe820c638f46aaef65150c2f49d4b9613c89c4cb995c045d29dbd0cf2608 +size 167832240 diff --git a/llama_French_Vietnamese/checkpoint-200/optimizer.pt b/llama_French_Vietnamese/checkpoint-200/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..f4ab5badfd74061e849fb3802f6900388df5bb37 --- /dev/null +++ b/llama_French_Vietnamese/checkpoint-200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec646223a1173255d043417cefcddffee976cf700e5de8f3631a542c2ef55f38 +size 85723284 diff --git a/llama_French_Vietnamese/checkpoint-200/rng_state.pth b/llama_French_Vietnamese/checkpoint-200/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..f21f2ff1c1a816463781d51760f8156e041f5979 --- /dev/null +++ b/llama_French_Vietnamese/checkpoint-200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4981123ff3cf7bd5b7f76839e90e4776f747ca4c38dcb41876fa010c0dea8b23 +size 14244 diff --git a/llama_French_Vietnamese/checkpoint-200/scheduler.pt b/llama_French_Vietnamese/checkpoint-200/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6ad11d6d5288d2841ba96a6d7d3838c80c80097a --- /dev/null +++ b/llama_French_Vietnamese/checkpoint-200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96 +size 1064 diff --git a/llama_French_Vietnamese/checkpoint-200/special_tokens_map.json b/llama_French_Vietnamese/checkpoint-200/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/llama_French_Vietnamese/checkpoint-200/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/llama_French_Vietnamese/checkpoint-200/tokenizer.json b/llama_French_Vietnamese/checkpoint-200/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/llama_French_Vietnamese/checkpoint-200/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/llama_French_Vietnamese/checkpoint-200/tokenizer_config.json b/llama_French_Vietnamese/checkpoint-200/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..447bd30f21e8c20e9e6497d6cfc650a3881e26a6 --- /dev/null +++ b/llama_French_Vietnamese/checkpoint-200/tokenizer_config.json @@ -0,0 +1,2064 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/llama_French_Vietnamese/checkpoint-200/trainer_state.json b/llama_French_Vietnamese/checkpoint-200/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..27af25ea20549cded58010bac673187208a9acc8 --- /dev/null +++ b/llama_French_Vietnamese/checkpoint-200/trainer_state.json @@ -0,0 +1,1433 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.2792022792022792, + "eval_steps": 500, + "global_step": 200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.011396011396011397, + "grad_norm": 1.0784692764282227, + "learning_rate": 0.0001, + "loss": 2.6501, + "step": 1 + }, + { + "epoch": 0.022792022792022793, + "grad_norm": 0.9594693779945374, + "learning_rate": 9.949748743718594e-05, + "loss": 2.5373, + "step": 2 + }, + { + "epoch": 0.03418803418803419, + "grad_norm": 0.934341549873352, + "learning_rate": 9.899497487437186e-05, + "loss": 2.6002, + "step": 3 + }, + { + "epoch": 0.045584045584045586, + "grad_norm": 1.0499581098556519, + "learning_rate": 9.84924623115578e-05, + "loss": 2.4077, + "step": 4 + }, + { + "epoch": 0.05698005698005698, + "grad_norm": 1.0792641639709473, + "learning_rate": 9.798994974874372e-05, + "loss": 2.3371, + "step": 5 + }, + { + "epoch": 0.06837606837606838, + "grad_norm": 1.1536877155303955, + "learning_rate": 9.748743718592965e-05, + "loss": 2.1002, + "step": 6 + }, + { + "epoch": 0.07977207977207977, + "grad_norm": 1.0796245336532593, + "learning_rate": 9.698492462311559e-05, + "loss": 1.9199, + "step": 7 + }, + { + "epoch": 0.09116809116809117, + "grad_norm": 0.9470813870429993, + "learning_rate": 9.64824120603015e-05, + "loss": 1.897, + "step": 8 + }, + { + "epoch": 0.10256410256410256, + "grad_norm": 0.8194458484649658, + "learning_rate": 9.597989949748745e-05, + "loss": 1.7407, + "step": 9 + }, + { + "epoch": 0.11396011396011396, + "grad_norm": 0.9465616345405579, + "learning_rate": 9.547738693467337e-05, + "loss": 1.6384, + "step": 10 + }, + { + "epoch": 0.12535612535612536, + "grad_norm": 0.8541427254676819, + "learning_rate": 9.49748743718593e-05, + "loss": 1.5281, + "step": 11 + }, + { + "epoch": 0.13675213675213677, + "grad_norm": 0.8208656311035156, + "learning_rate": 9.447236180904523e-05, + "loss": 1.5299, + "step": 12 + }, + { + "epoch": 0.14814814814814814, + "grad_norm": 0.48005664348602295, + "learning_rate": 9.396984924623115e-05, + "loss": 1.4816, + "step": 13 + }, + { + "epoch": 0.15954415954415954, + "grad_norm": 0.6090331077575684, + "learning_rate": 9.34673366834171e-05, + "loss": 1.4914, + "step": 14 + }, + { + "epoch": 0.17094017094017094, + "grad_norm": 1.0723884105682373, + "learning_rate": 9.296482412060302e-05, + "loss": 1.4245, + "step": 15 + }, + { + "epoch": 0.18233618233618235, + "grad_norm": 0.5261980891227722, + "learning_rate": 9.246231155778895e-05, + "loss": 1.4722, + "step": 16 + }, + { + "epoch": 0.19373219373219372, + "grad_norm": 0.5036055445671082, + "learning_rate": 9.195979899497488e-05, + "loss": 1.5034, + "step": 17 + }, + { + "epoch": 0.20512820512820512, + "grad_norm": 0.5439502596855164, + "learning_rate": 9.14572864321608e-05, + "loss": 1.4562, + "step": 18 + }, + { + "epoch": 0.21652421652421652, + "grad_norm": 0.5104373693466187, + "learning_rate": 9.095477386934675e-05, + "loss": 1.3939, + "step": 19 + }, + { + "epoch": 0.22792022792022792, + "grad_norm": 0.5232931971549988, + "learning_rate": 9.045226130653267e-05, + "loss": 1.4273, + "step": 20 + }, + { + "epoch": 0.23931623931623933, + "grad_norm": 0.57173752784729, + "learning_rate": 8.99497487437186e-05, + "loss": 1.3465, + "step": 21 + }, + { + "epoch": 0.25071225071225073, + "grad_norm": 0.5474035739898682, + "learning_rate": 8.944723618090453e-05, + "loss": 1.4501, + "step": 22 + }, + { + "epoch": 0.2621082621082621, + "grad_norm": 0.5647464990615845, + "learning_rate": 8.894472361809045e-05, + "loss": 1.3941, + "step": 23 + }, + { + "epoch": 0.27350427350427353, + "grad_norm": 0.6339774131774902, + "learning_rate": 8.84422110552764e-05, + "loss": 1.2837, + "step": 24 + }, + { + "epoch": 0.2849002849002849, + "grad_norm": 0.6236124634742737, + "learning_rate": 8.793969849246232e-05, + "loss": 1.4142, + "step": 25 + }, + { + "epoch": 0.2962962962962963, + "grad_norm": 0.5817052721977234, + "learning_rate": 8.743718592964825e-05, + "loss": 1.3434, + "step": 26 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 0.6462389230728149, + "learning_rate": 8.693467336683418e-05, + "loss": 1.3225, + "step": 27 + }, + { + "epoch": 0.3190883190883191, + "grad_norm": 0.7501896619796753, + "learning_rate": 8.64321608040201e-05, + "loss": 1.3523, + "step": 28 + }, + { + "epoch": 0.33048433048433046, + "grad_norm": 0.7449207901954651, + "learning_rate": 8.592964824120603e-05, + "loss": 1.4182, + "step": 29 + }, + { + "epoch": 0.3418803418803419, + "grad_norm": 0.6687287092208862, + "learning_rate": 8.542713567839196e-05, + "loss": 1.4138, + "step": 30 + }, + { + "epoch": 0.35327635327635326, + "grad_norm": 0.7908384203910828, + "learning_rate": 8.49246231155779e-05, + "loss": 1.3443, + "step": 31 + }, + { + "epoch": 0.3646723646723647, + "grad_norm": 0.835872232913971, + "learning_rate": 8.442211055276383e-05, + "loss": 1.3324, + "step": 32 + }, + { + "epoch": 0.37606837606837606, + "grad_norm": 0.8062137961387634, + "learning_rate": 8.391959798994975e-05, + "loss": 1.3436, + "step": 33 + }, + { + "epoch": 0.38746438746438744, + "grad_norm": 0.8746720552444458, + "learning_rate": 8.341708542713568e-05, + "loss": 1.3199, + "step": 34 + }, + { + "epoch": 0.39886039886039887, + "grad_norm": 0.8971230983734131, + "learning_rate": 8.291457286432161e-05, + "loss": 1.2359, + "step": 35 + }, + { + "epoch": 0.41025641025641024, + "grad_norm": 0.8474389910697937, + "learning_rate": 8.241206030150754e-05, + "loss": 1.3445, + "step": 36 + }, + { + "epoch": 0.42165242165242167, + "grad_norm": 0.9369707703590393, + "learning_rate": 8.190954773869348e-05, + "loss": 1.3685, + "step": 37 + }, + { + "epoch": 0.43304843304843305, + "grad_norm": 0.8477537631988525, + "learning_rate": 8.14070351758794e-05, + "loss": 1.244, + "step": 38 + }, + { + "epoch": 0.4444444444444444, + "grad_norm": 0.7631419897079468, + "learning_rate": 8.090452261306533e-05, + "loss": 1.3115, + "step": 39 + }, + { + "epoch": 0.45584045584045585, + "grad_norm": 0.6620834469795227, + "learning_rate": 8.040201005025126e-05, + "loss": 1.3725, + "step": 40 + }, + { + "epoch": 0.4672364672364672, + "grad_norm": 0.6581817865371704, + "learning_rate": 7.989949748743719e-05, + "loss": 1.2879, + "step": 41 + }, + { + "epoch": 0.47863247863247865, + "grad_norm": 0.6299817562103271, + "learning_rate": 7.939698492462313e-05, + "loss": 1.261, + "step": 42 + }, + { + "epoch": 0.49002849002849, + "grad_norm": 0.4784335494041443, + "learning_rate": 7.889447236180904e-05, + "loss": 1.273, + "step": 43 + }, + { + "epoch": 0.5014245014245015, + "grad_norm": 0.4865088164806366, + "learning_rate": 7.839195979899498e-05, + "loss": 1.2418, + "step": 44 + }, + { + "epoch": 0.5128205128205128, + "grad_norm": 0.5278174877166748, + "learning_rate": 7.788944723618091e-05, + "loss": 1.2665, + "step": 45 + }, + { + "epoch": 0.5242165242165242, + "grad_norm": 0.525149405002594, + "learning_rate": 7.738693467336684e-05, + "loss": 1.2711, + "step": 46 + }, + { + "epoch": 0.5356125356125356, + "grad_norm": 0.5783369541168213, + "learning_rate": 7.688442211055277e-05, + "loss": 1.4261, + "step": 47 + }, + { + "epoch": 0.5470085470085471, + "grad_norm": 0.5363317728042603, + "learning_rate": 7.638190954773869e-05, + "loss": 1.3644, + "step": 48 + }, + { + "epoch": 0.5584045584045584, + "grad_norm": 0.5305460095405579, + "learning_rate": 7.587939698492463e-05, + "loss": 1.3315, + "step": 49 + }, + { + "epoch": 0.5698005698005698, + "grad_norm": 0.5221759080886841, + "learning_rate": 7.537688442211056e-05, + "loss": 1.2889, + "step": 50 + }, + { + "epoch": 0.5811965811965812, + "grad_norm": 0.45660942792892456, + "learning_rate": 7.487437185929649e-05, + "loss": 1.303, + "step": 51 + }, + { + "epoch": 0.5925925925925926, + "grad_norm": 0.6248636841773987, + "learning_rate": 7.437185929648241e-05, + "loss": 1.2554, + "step": 52 + }, + { + "epoch": 0.603988603988604, + "grad_norm": 0.5292128920555115, + "learning_rate": 7.386934673366834e-05, + "loss": 1.2843, + "step": 53 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 0.49508729577064514, + "learning_rate": 7.336683417085427e-05, + "loss": 1.2373, + "step": 54 + }, + { + "epoch": 0.6267806267806267, + "grad_norm": 0.5450984239578247, + "learning_rate": 7.28643216080402e-05, + "loss": 1.2929, + "step": 55 + }, + { + "epoch": 0.6381766381766382, + "grad_norm": 0.5375221967697144, + "learning_rate": 7.236180904522614e-05, + "loss": 1.2837, + "step": 56 + }, + { + "epoch": 0.6495726495726496, + "grad_norm": 0.5044800043106079, + "learning_rate": 7.185929648241206e-05, + "loss": 1.3569, + "step": 57 + }, + { + "epoch": 0.6609686609686609, + "grad_norm": 0.5803720951080322, + "learning_rate": 7.135678391959799e-05, + "loss": 1.31, + "step": 58 + }, + { + "epoch": 0.6723646723646723, + "grad_norm": 0.5438725352287292, + "learning_rate": 7.085427135678392e-05, + "loss": 1.2009, + "step": 59 + }, + { + "epoch": 0.6837606837606838, + "grad_norm": 0.5455295443534851, + "learning_rate": 7.035175879396985e-05, + "loss": 1.2751, + "step": 60 + }, + { + "epoch": 0.6951566951566952, + "grad_norm": 0.5530592799186707, + "learning_rate": 6.984924623115579e-05, + "loss": 1.2654, + "step": 61 + }, + { + "epoch": 0.7065527065527065, + "grad_norm": 0.5058171153068542, + "learning_rate": 6.93467336683417e-05, + "loss": 1.3489, + "step": 62 + }, + { + "epoch": 0.717948717948718, + "grad_norm": 0.5556838512420654, + "learning_rate": 6.884422110552764e-05, + "loss": 1.2397, + "step": 63 + }, + { + "epoch": 0.7293447293447294, + "grad_norm": 0.5675415396690369, + "learning_rate": 6.834170854271357e-05, + "loss": 1.1964, + "step": 64 + }, + { + "epoch": 0.7407407407407407, + "grad_norm": 0.5471984148025513, + "learning_rate": 6.78391959798995e-05, + "loss": 1.1733, + "step": 65 + }, + { + "epoch": 0.7521367521367521, + "grad_norm": 0.4921834170818329, + "learning_rate": 6.733668341708544e-05, + "loss": 1.2328, + "step": 66 + }, + { + "epoch": 0.7635327635327636, + "grad_norm": 0.5362589955329895, + "learning_rate": 6.683417085427135e-05, + "loss": 1.3159, + "step": 67 + }, + { + "epoch": 0.7749287749287749, + "grad_norm": 0.6789804697036743, + "learning_rate": 6.633165829145729e-05, + "loss": 1.3032, + "step": 68 + }, + { + "epoch": 0.7863247863247863, + "grad_norm": 0.519393265247345, + "learning_rate": 6.582914572864322e-05, + "loss": 1.2142, + "step": 69 + }, + { + "epoch": 0.7977207977207977, + "grad_norm": 0.5909695625305176, + "learning_rate": 6.532663316582915e-05, + "loss": 1.3604, + "step": 70 + }, + { + "epoch": 0.8091168091168092, + "grad_norm": 0.6058588624000549, + "learning_rate": 6.482412060301508e-05, + "loss": 1.3357, + "step": 71 + }, + { + "epoch": 0.8205128205128205, + "grad_norm": 0.5424550175666809, + "learning_rate": 6.4321608040201e-05, + "loss": 1.2659, + "step": 72 + }, + { + "epoch": 0.8319088319088319, + "grad_norm": 0.5178602337837219, + "learning_rate": 6.381909547738694e-05, + "loss": 1.281, + "step": 73 + }, + { + "epoch": 0.8433048433048433, + "grad_norm": 0.5486872792243958, + "learning_rate": 6.331658291457287e-05, + "loss": 1.2068, + "step": 74 + }, + { + "epoch": 0.8547008547008547, + "grad_norm": 0.6421880722045898, + "learning_rate": 6.28140703517588e-05, + "loss": 1.277, + "step": 75 + }, + { + "epoch": 0.8660968660968661, + "grad_norm": 0.548304557800293, + "learning_rate": 6.231155778894473e-05, + "loss": 1.3307, + "step": 76 + }, + { + "epoch": 0.8774928774928775, + "grad_norm": 0.5255091786384583, + "learning_rate": 6.180904522613065e-05, + "loss": 1.1562, + "step": 77 + }, + { + "epoch": 0.8888888888888888, + "grad_norm": 0.5991901755332947, + "learning_rate": 6.130653266331658e-05, + "loss": 1.2373, + "step": 78 + }, + { + "epoch": 0.9002849002849003, + "grad_norm": 0.6089536547660828, + "learning_rate": 6.080402010050251e-05, + "loss": 1.2849, + "step": 79 + }, + { + "epoch": 0.9116809116809117, + "grad_norm": 0.5381121039390564, + "learning_rate": 6.030150753768844e-05, + "loss": 1.2289, + "step": 80 + }, + { + "epoch": 0.9230769230769231, + "grad_norm": 0.573054313659668, + "learning_rate": 5.979899497487438e-05, + "loss": 1.3407, + "step": 81 + }, + { + "epoch": 0.9344729344729344, + "grad_norm": 0.625826358795166, + "learning_rate": 5.929648241206031e-05, + "loss": 1.3459, + "step": 82 + }, + { + "epoch": 0.9458689458689459, + "grad_norm": 0.5570762157440186, + "learning_rate": 5.879396984924623e-05, + "loss": 1.3511, + "step": 83 + }, + { + "epoch": 0.9572649572649573, + "grad_norm": 0.553999662399292, + "learning_rate": 5.829145728643216e-05, + "loss": 1.2588, + "step": 84 + }, + { + "epoch": 0.9686609686609686, + "grad_norm": 0.5458688139915466, + "learning_rate": 5.778894472361809e-05, + "loss": 1.1637, + "step": 85 + }, + { + "epoch": 0.98005698005698, + "grad_norm": 0.5578490495681763, + "learning_rate": 5.728643216080403e-05, + "loss": 1.2362, + "step": 86 + }, + { + "epoch": 0.9914529914529915, + "grad_norm": 0.560295820236206, + "learning_rate": 5.6783919597989955e-05, + "loss": 1.3437, + "step": 87 + }, + { + "epoch": 1.002849002849003, + "grad_norm": 0.7770240902900696, + "learning_rate": 5.628140703517588e-05, + "loss": 1.4883, + "step": 88 + }, + { + "epoch": 1.0142450142450143, + "grad_norm": 0.5888678431510925, + "learning_rate": 5.577889447236181e-05, + "loss": 1.3217, + "step": 89 + }, + { + "epoch": 1.0256410256410255, + "grad_norm": 0.490393728017807, + "learning_rate": 5.527638190954774e-05, + "loss": 1.1422, + "step": 90 + }, + { + "epoch": 1.037037037037037, + "grad_norm": 0.5216094255447388, + "learning_rate": 5.477386934673368e-05, + "loss": 1.2432, + "step": 91 + }, + { + "epoch": 1.0484330484330484, + "grad_norm": 0.5268694758415222, + "learning_rate": 5.4271356783919604e-05, + "loss": 1.219, + "step": 92 + }, + { + "epoch": 1.0598290598290598, + "grad_norm": 0.5001344084739685, + "learning_rate": 5.376884422110553e-05, + "loss": 1.1595, + "step": 93 + }, + { + "epoch": 1.0712250712250713, + "grad_norm": 0.5058109760284424, + "learning_rate": 5.3266331658291455e-05, + "loss": 1.113, + "step": 94 + }, + { + "epoch": 1.0826210826210827, + "grad_norm": 0.5767057538032532, + "learning_rate": 5.276381909547739e-05, + "loss": 1.2735, + "step": 95 + }, + { + "epoch": 1.0940170940170941, + "grad_norm": 0.5390145182609558, + "learning_rate": 5.226130653266332e-05, + "loss": 1.1482, + "step": 96 + }, + { + "epoch": 1.1054131054131053, + "grad_norm": 0.5533584356307983, + "learning_rate": 5.175879396984925e-05, + "loss": 1.2073, + "step": 97 + }, + { + "epoch": 1.1168091168091168, + "grad_norm": 0.5460371971130371, + "learning_rate": 5.125628140703518e-05, + "loss": 1.2277, + "step": 98 + }, + { + "epoch": 1.1282051282051282, + "grad_norm": 0.5108863711357117, + "learning_rate": 5.0753768844221104e-05, + "loss": 1.0811, + "step": 99 + }, + { + "epoch": 1.1396011396011396, + "grad_norm": 0.5864809155464172, + "learning_rate": 5.0251256281407036e-05, + "loss": 1.2959, + "step": 100 + }, + { + "epoch": 1.150997150997151, + "grad_norm": 0.6016486287117004, + "learning_rate": 4.974874371859297e-05, + "loss": 1.2025, + "step": 101 + }, + { + "epoch": 1.1623931623931625, + "grad_norm": 0.6888948678970337, + "learning_rate": 4.92462311557789e-05, + "loss": 1.3175, + "step": 102 + }, + { + "epoch": 1.173789173789174, + "grad_norm": 0.5565383434295654, + "learning_rate": 4.874371859296483e-05, + "loss": 1.0789, + "step": 103 + }, + { + "epoch": 1.1851851851851851, + "grad_norm": 0.5908952355384827, + "learning_rate": 4.824120603015075e-05, + "loss": 1.2476, + "step": 104 + }, + { + "epoch": 1.1965811965811965, + "grad_norm": 0.5548040866851807, + "learning_rate": 4.7738693467336685e-05, + "loss": 1.1433, + "step": 105 + }, + { + "epoch": 1.207977207977208, + "grad_norm": 0.5334557890892029, + "learning_rate": 4.723618090452262e-05, + "loss": 1.0989, + "step": 106 + }, + { + "epoch": 1.2193732193732194, + "grad_norm": 0.6825379133224487, + "learning_rate": 4.673366834170855e-05, + "loss": 1.2794, + "step": 107 + }, + { + "epoch": 1.2307692307692308, + "grad_norm": 0.605975329875946, + "learning_rate": 4.6231155778894475e-05, + "loss": 1.144, + "step": 108 + }, + { + "epoch": 1.242165242165242, + "grad_norm": 0.6651902198791504, + "learning_rate": 4.57286432160804e-05, + "loss": 1.2584, + "step": 109 + }, + { + "epoch": 1.2535612535612537, + "grad_norm": 0.6563193798065186, + "learning_rate": 4.522613065326633e-05, + "loss": 1.1967, + "step": 110 + }, + { + "epoch": 1.264957264957265, + "grad_norm": 0.5959343910217285, + "learning_rate": 4.4723618090452266e-05, + "loss": 1.0391, + "step": 111 + }, + { + "epoch": 1.2763532763532763, + "grad_norm": 0.6498646140098572, + "learning_rate": 4.42211055276382e-05, + "loss": 1.3178, + "step": 112 + }, + { + "epoch": 1.2877492877492878, + "grad_norm": 0.6595070958137512, + "learning_rate": 4.3718592964824124e-05, + "loss": 1.2349, + "step": 113 + }, + { + "epoch": 1.2991452991452992, + "grad_norm": 0.6051594018936157, + "learning_rate": 4.321608040201005e-05, + "loss": 1.2297, + "step": 114 + }, + { + "epoch": 1.3105413105413106, + "grad_norm": 0.6078906059265137, + "learning_rate": 4.271356783919598e-05, + "loss": 1.2055, + "step": 115 + }, + { + "epoch": 1.3219373219373218, + "grad_norm": 0.6110698580741882, + "learning_rate": 4.2211055276381914e-05, + "loss": 1.117, + "step": 116 + }, + { + "epoch": 1.3333333333333333, + "grad_norm": 0.6161591410636902, + "learning_rate": 4.170854271356784e-05, + "loss": 1.2147, + "step": 117 + }, + { + "epoch": 1.3447293447293447, + "grad_norm": 0.582253098487854, + "learning_rate": 4.120603015075377e-05, + "loss": 1.1182, + "step": 118 + }, + { + "epoch": 1.3561253561253561, + "grad_norm": 0.6166397929191589, + "learning_rate": 4.07035175879397e-05, + "loss": 1.2569, + "step": 119 + }, + { + "epoch": 1.3675213675213675, + "grad_norm": 0.6021968722343445, + "learning_rate": 4.020100502512563e-05, + "loss": 1.171, + "step": 120 + }, + { + "epoch": 1.378917378917379, + "grad_norm": 0.6168532967567444, + "learning_rate": 3.969849246231156e-05, + "loss": 1.07, + "step": 121 + }, + { + "epoch": 1.3903133903133904, + "grad_norm": 0.6636329293251038, + "learning_rate": 3.919597989949749e-05, + "loss": 1.2488, + "step": 122 + }, + { + "epoch": 1.4017094017094016, + "grad_norm": 0.6237582564353943, + "learning_rate": 3.869346733668342e-05, + "loss": 1.1282, + "step": 123 + }, + { + "epoch": 1.413105413105413, + "grad_norm": 0.6047698855400085, + "learning_rate": 3.8190954773869346e-05, + "loss": 1.1873, + "step": 124 + }, + { + "epoch": 1.4245014245014245, + "grad_norm": 0.6448729038238525, + "learning_rate": 3.768844221105528e-05, + "loss": 1.2344, + "step": 125 + }, + { + "epoch": 1.435897435897436, + "grad_norm": 0.6173490881919861, + "learning_rate": 3.7185929648241204e-05, + "loss": 1.1845, + "step": 126 + }, + { + "epoch": 1.4472934472934473, + "grad_norm": 0.6320385932922363, + "learning_rate": 3.668341708542714e-05, + "loss": 1.0968, + "step": 127 + }, + { + "epoch": 1.4586894586894588, + "grad_norm": 0.6567147970199585, + "learning_rate": 3.618090452261307e-05, + "loss": 1.2861, + "step": 128 + }, + { + "epoch": 1.4700854700854702, + "grad_norm": 0.5990245342254639, + "learning_rate": 3.5678391959798995e-05, + "loss": 1.1027, + "step": 129 + }, + { + "epoch": 1.4814814814814814, + "grad_norm": 0.652154803276062, + "learning_rate": 3.517587939698493e-05, + "loss": 1.3075, + "step": 130 + }, + { + "epoch": 1.4928774928774928, + "grad_norm": 0.6792729496955872, + "learning_rate": 3.467336683417085e-05, + "loss": 1.2526, + "step": 131 + }, + { + "epoch": 1.5042735042735043, + "grad_norm": 0.7002686262130737, + "learning_rate": 3.4170854271356785e-05, + "loss": 1.2418, + "step": 132 + }, + { + "epoch": 1.5156695156695157, + "grad_norm": 0.6259899139404297, + "learning_rate": 3.366834170854272e-05, + "loss": 1.134, + "step": 133 + }, + { + "epoch": 1.5270655270655271, + "grad_norm": 0.6328178644180298, + "learning_rate": 3.3165829145728643e-05, + "loss": 1.1128, + "step": 134 + }, + { + "epoch": 1.5384615384615383, + "grad_norm": 0.6246987581253052, + "learning_rate": 3.2663316582914576e-05, + "loss": 1.2537, + "step": 135 + }, + { + "epoch": 1.54985754985755, + "grad_norm": 0.6604442000389099, + "learning_rate": 3.21608040201005e-05, + "loss": 1.1178, + "step": 136 + }, + { + "epoch": 1.5612535612535612, + "grad_norm": 0.6827804446220398, + "learning_rate": 3.1658291457286434e-05, + "loss": 1.118, + "step": 137 + }, + { + "epoch": 1.5726495726495726, + "grad_norm": 0.6873475909233093, + "learning_rate": 3.1155778894472366e-05, + "loss": 1.1063, + "step": 138 + }, + { + "epoch": 1.584045584045584, + "grad_norm": 0.6253861784934998, + "learning_rate": 3.065326633165829e-05, + "loss": 1.1616, + "step": 139 + }, + { + "epoch": 1.5954415954415955, + "grad_norm": 0.6834537982940674, + "learning_rate": 3.015075376884422e-05, + "loss": 1.2297, + "step": 140 + }, + { + "epoch": 1.606837606837607, + "grad_norm": 0.679348349571228, + "learning_rate": 2.9648241206030153e-05, + "loss": 1.0814, + "step": 141 + }, + { + "epoch": 1.618233618233618, + "grad_norm": 0.7506070137023926, + "learning_rate": 2.914572864321608e-05, + "loss": 1.3602, + "step": 142 + }, + { + "epoch": 1.6296296296296298, + "grad_norm": 0.6164716482162476, + "learning_rate": 2.8643216080402015e-05, + "loss": 1.0395, + "step": 143 + }, + { + "epoch": 1.641025641025641, + "grad_norm": 0.7544202208518982, + "learning_rate": 2.814070351758794e-05, + "loss": 1.2558, + "step": 144 + }, + { + "epoch": 1.6524216524216524, + "grad_norm": 0.73212069272995, + "learning_rate": 2.763819095477387e-05, + "loss": 1.1011, + "step": 145 + }, + { + "epoch": 1.6638176638176638, + "grad_norm": 0.6177868247032166, + "learning_rate": 2.7135678391959802e-05, + "loss": 0.9692, + "step": 146 + }, + { + "epoch": 1.6752136752136753, + "grad_norm": 0.6596304774284363, + "learning_rate": 2.6633165829145728e-05, + "loss": 1.2087, + "step": 147 + }, + { + "epoch": 1.6866096866096867, + "grad_norm": 0.718070924282074, + "learning_rate": 2.613065326633166e-05, + "loss": 1.2413, + "step": 148 + }, + { + "epoch": 1.698005698005698, + "grad_norm": 0.6983141899108887, + "learning_rate": 2.562814070351759e-05, + "loss": 1.0729, + "step": 149 + }, + { + "epoch": 1.7094017094017095, + "grad_norm": 0.7079763412475586, + "learning_rate": 2.5125628140703518e-05, + "loss": 1.2304, + "step": 150 + }, + { + "epoch": 1.7207977207977208, + "grad_norm": 0.7287095785140991, + "learning_rate": 2.462311557788945e-05, + "loss": 1.1008, + "step": 151 + }, + { + "epoch": 1.7321937321937322, + "grad_norm": 0.7736594676971436, + "learning_rate": 2.4120603015075376e-05, + "loss": 1.2792, + "step": 152 + }, + { + "epoch": 1.7435897435897436, + "grad_norm": 0.7171739339828491, + "learning_rate": 2.361809045226131e-05, + "loss": 1.1599, + "step": 153 + }, + { + "epoch": 1.7549857549857548, + "grad_norm": 0.6759651899337769, + "learning_rate": 2.3115577889447238e-05, + "loss": 1.1898, + "step": 154 + }, + { + "epoch": 1.7663817663817665, + "grad_norm": 0.724470853805542, + "learning_rate": 2.2613065326633167e-05, + "loss": 1.1428, + "step": 155 + }, + { + "epoch": 1.7777777777777777, + "grad_norm": 0.6671333312988281, + "learning_rate": 2.21105527638191e-05, + "loss": 1.1782, + "step": 156 + }, + { + "epoch": 1.7891737891737893, + "grad_norm": 0.6964455246925354, + "learning_rate": 2.1608040201005025e-05, + "loss": 1.306, + "step": 157 + }, + { + "epoch": 1.8005698005698005, + "grad_norm": 0.6942877173423767, + "learning_rate": 2.1105527638190957e-05, + "loss": 1.2104, + "step": 158 + }, + { + "epoch": 1.811965811965812, + "grad_norm": 0.7392019033432007, + "learning_rate": 2.0603015075376886e-05, + "loss": 1.2356, + "step": 159 + }, + { + "epoch": 1.8233618233618234, + "grad_norm": 0.8423513174057007, + "learning_rate": 2.0100502512562815e-05, + "loss": 1.1423, + "step": 160 + }, + { + "epoch": 1.8347578347578346, + "grad_norm": 0.6901450157165527, + "learning_rate": 1.9597989949748744e-05, + "loss": 1.1661, + "step": 161 + }, + { + "epoch": 1.8461538461538463, + "grad_norm": 0.7124201655387878, + "learning_rate": 1.9095477386934673e-05, + "loss": 1.1464, + "step": 162 + }, + { + "epoch": 1.8575498575498575, + "grad_norm": 0.7080592513084412, + "learning_rate": 1.8592964824120602e-05, + "loss": 1.0529, + "step": 163 + }, + { + "epoch": 1.868945868945869, + "grad_norm": 0.6967963576316833, + "learning_rate": 1.8090452261306535e-05, + "loss": 1.1515, + "step": 164 + }, + { + "epoch": 1.8803418803418803, + "grad_norm": 0.7778387069702148, + "learning_rate": 1.7587939698492464e-05, + "loss": 1.1614, + "step": 165 + }, + { + "epoch": 1.8917378917378918, + "grad_norm": 0.7115724682807922, + "learning_rate": 1.7085427135678393e-05, + "loss": 1.1637, + "step": 166 + }, + { + "epoch": 1.9031339031339032, + "grad_norm": 0.7498459815979004, + "learning_rate": 1.6582914572864322e-05, + "loss": 1.1176, + "step": 167 + }, + { + "epoch": 1.9145299145299144, + "grad_norm": 0.7076297998428345, + "learning_rate": 1.608040201005025e-05, + "loss": 1.1491, + "step": 168 + }, + { + "epoch": 1.925925925925926, + "grad_norm": 0.7415072321891785, + "learning_rate": 1.5577889447236183e-05, + "loss": 1.1124, + "step": 169 + }, + { + "epoch": 1.9373219373219372, + "grad_norm": 0.7522334456443787, + "learning_rate": 1.507537688442211e-05, + "loss": 1.1856, + "step": 170 + }, + { + "epoch": 1.9487179487179487, + "grad_norm": 0.6429208517074585, + "learning_rate": 1.457286432160804e-05, + "loss": 1.1099, + "step": 171 + }, + { + "epoch": 1.96011396011396, + "grad_norm": 0.764468789100647, + "learning_rate": 1.407035175879397e-05, + "loss": 1.3465, + "step": 172 + }, + { + "epoch": 1.9715099715099715, + "grad_norm": 0.6907607316970825, + "learning_rate": 1.3567839195979901e-05, + "loss": 1.1452, + "step": 173 + }, + { + "epoch": 1.982905982905983, + "grad_norm": 0.6734034419059753, + "learning_rate": 1.306532663316583e-05, + "loss": 1.166, + "step": 174 + }, + { + "epoch": 1.9943019943019942, + "grad_norm": 0.9236817955970764, + "learning_rate": 1.2562814070351759e-05, + "loss": 1.4582, + "step": 175 + }, + { + "epoch": 2.005698005698006, + "grad_norm": 0.88917076587677, + "learning_rate": 1.2060301507537688e-05, + "loss": 1.3924, + "step": 176 + }, + { + "epoch": 2.017094017094017, + "grad_norm": 0.7218538522720337, + "learning_rate": 1.1557788944723619e-05, + "loss": 1.1354, + "step": 177 + }, + { + "epoch": 2.0284900284900287, + "grad_norm": 0.6501768231391907, + "learning_rate": 1.105527638190955e-05, + "loss": 1.1298, + "step": 178 + }, + { + "epoch": 2.03988603988604, + "grad_norm": 0.6275390982627869, + "learning_rate": 1.0552763819095479e-05, + "loss": 1.0144, + "step": 179 + }, + { + "epoch": 2.051282051282051, + "grad_norm": 0.6888313889503479, + "learning_rate": 1.0050251256281408e-05, + "loss": 1.1614, + "step": 180 + }, + { + "epoch": 2.0626780626780628, + "grad_norm": 0.6875863075256348, + "learning_rate": 9.547738693467337e-06, + "loss": 1.1225, + "step": 181 + }, + { + "epoch": 2.074074074074074, + "grad_norm": 0.6341241002082825, + "learning_rate": 9.045226130653267e-06, + "loss": 1.0308, + "step": 182 + }, + { + "epoch": 2.0854700854700856, + "grad_norm": 0.6856946349143982, + "learning_rate": 8.542713567839196e-06, + "loss": 1.1745, + "step": 183 + }, + { + "epoch": 2.096866096866097, + "grad_norm": 0.694002628326416, + "learning_rate": 8.040201005025125e-06, + "loss": 1.1566, + "step": 184 + }, + { + "epoch": 2.1082621082621085, + "grad_norm": 0.6416667699813843, + "learning_rate": 7.537688442211055e-06, + "loss": 1.1359, + "step": 185 + }, + { + "epoch": 2.1196581196581197, + "grad_norm": 0.7167916297912598, + "learning_rate": 7.035175879396985e-06, + "loss": 1.1244, + "step": 186 + }, + { + "epoch": 2.131054131054131, + "grad_norm": 0.695003092288971, + "learning_rate": 6.532663316582915e-06, + "loss": 1.1535, + "step": 187 + }, + { + "epoch": 2.1424501424501425, + "grad_norm": 0.6406252980232239, + "learning_rate": 6.030150753768844e-06, + "loss": 1.0678, + "step": 188 + }, + { + "epoch": 2.1538461538461537, + "grad_norm": 0.7266349196434021, + "learning_rate": 5.527638190954775e-06, + "loss": 1.2819, + "step": 189 + }, + { + "epoch": 2.1652421652421654, + "grad_norm": 0.6204450130462646, + "learning_rate": 5.025125628140704e-06, + "loss": 0.9848, + "step": 190 + }, + { + "epoch": 2.1766381766381766, + "grad_norm": 0.667356014251709, + "learning_rate": 4.522613065326634e-06, + "loss": 1.1543, + "step": 191 + }, + { + "epoch": 2.1880341880341883, + "grad_norm": 0.6994811296463013, + "learning_rate": 4.020100502512563e-06, + "loss": 1.0918, + "step": 192 + }, + { + "epoch": 2.1994301994301995, + "grad_norm": 0.7194527387619019, + "learning_rate": 3.5175879396984926e-06, + "loss": 1.1506, + "step": 193 + }, + { + "epoch": 2.2108262108262107, + "grad_norm": 0.6610317826271057, + "learning_rate": 3.015075376884422e-06, + "loss": 1.0556, + "step": 194 + }, + { + "epoch": 2.2222222222222223, + "grad_norm": 0.6722702383995056, + "learning_rate": 2.512562814070352e-06, + "loss": 1.1425, + "step": 195 + }, + { + "epoch": 2.2336182336182335, + "grad_norm": 0.64280766248703, + "learning_rate": 2.0100502512562813e-06, + "loss": 1.0667, + "step": 196 + }, + { + "epoch": 2.245014245014245, + "grad_norm": 0.6782143115997314, + "learning_rate": 1.507537688442211e-06, + "loss": 1.1588, + "step": 197 + }, + { + "epoch": 2.2564102564102564, + "grad_norm": 0.6610252261161804, + "learning_rate": 1.0050251256281407e-06, + "loss": 1.1253, + "step": 198 + }, + { + "epoch": 2.267806267806268, + "grad_norm": 0.6104291677474976, + "learning_rate": 5.025125628140703e-07, + "loss": 0.9622, + "step": 199 + }, + { + "epoch": 2.2792022792022792, + "grad_norm": 0.7020414471626282, + "learning_rate": 0.0, + "loss": 1.2647, + "step": 200 + } + ], + "logging_steps": 1, + "max_steps": 200, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 2.4266062597545984e+16, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/llama_French_Vietnamese/checkpoint-200/training_args.bin b/llama_French_Vietnamese/checkpoint-200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4a8ddaafbd7172b73b9f580481ece2fddabcbfa1 --- /dev/null +++ b/llama_French_Vietnamese/checkpoint-200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:446a17287b7b22beffdd7ea7b0823ff924185c4cebb659f26e1e1627ab5a936a +size 5624 diff --git a/llama_German_Chinese/checkpoint-200/README.md b/llama_German_Chinese/checkpoint-200/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ec91fdd050e2a5613ef04bb6c8fc77a63b8b10a1 --- /dev/null +++ b/llama_German_Chinese/checkpoint-200/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.14.0 \ No newline at end of file diff --git a/llama_German_Chinese/checkpoint-200/adapter_config.json b/llama_German_Chinese/checkpoint-200/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e7c4f63e3545bda108cb5abf304fc6a389346520 --- /dev/null +++ b/llama_German_Chinese/checkpoint-200/adapter_config.json @@ -0,0 +1,37 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit", + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "gate_proj", + "v_proj", + "up_proj", + "down_proj", + "q_proj", + "k_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/llama_German_Chinese/checkpoint-200/adapter_model.safetensors b/llama_German_Chinese/checkpoint-200/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e56a8a99409ed1e0e0594b9fc75299f2b3961b46 --- /dev/null +++ b/llama_German_Chinese/checkpoint-200/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd5c0f9fa61cd3f599d9a09f33b00e8371f26db2d5e5275b4d84980ca5cc1d2d +size 167832240 diff --git a/llama_German_Chinese/checkpoint-200/optimizer.pt b/llama_German_Chinese/checkpoint-200/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..93589ea02dd8f3200fb5bd07cfbf9dddd51945e3 --- /dev/null +++ b/llama_German_Chinese/checkpoint-200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d6a249ecd93e3aee01ef51af4ad07420f91e0b192cfeb3432367970b7707400 +size 85723284 diff --git a/llama_German_Chinese/checkpoint-200/rng_state.pth b/llama_German_Chinese/checkpoint-200/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..f21f2ff1c1a816463781d51760f8156e041f5979 --- /dev/null +++ b/llama_German_Chinese/checkpoint-200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4981123ff3cf7bd5b7f76839e90e4776f747ca4c38dcb41876fa010c0dea8b23 +size 14244 diff --git a/llama_German_Chinese/checkpoint-200/scheduler.pt b/llama_German_Chinese/checkpoint-200/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6ad11d6d5288d2841ba96a6d7d3838c80c80097a --- /dev/null +++ b/llama_German_Chinese/checkpoint-200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96 +size 1064 diff --git a/llama_German_Chinese/checkpoint-200/special_tokens_map.json b/llama_German_Chinese/checkpoint-200/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/llama_German_Chinese/checkpoint-200/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/llama_German_Chinese/checkpoint-200/tokenizer.json b/llama_German_Chinese/checkpoint-200/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/llama_German_Chinese/checkpoint-200/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/llama_German_Chinese/checkpoint-200/tokenizer_config.json b/llama_German_Chinese/checkpoint-200/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..447bd30f21e8c20e9e6497d6cfc650a3881e26a6 --- /dev/null +++ b/llama_German_Chinese/checkpoint-200/tokenizer_config.json @@ -0,0 +1,2064 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/llama_German_Chinese/checkpoint-200/trainer_state.json b/llama_German_Chinese/checkpoint-200/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e547544265ef1b373fdb98b918a8f1b7a7c884e0 --- /dev/null +++ b/llama_German_Chinese/checkpoint-200/trainer_state.json @@ -0,0 +1,1433 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.21606648199446, + "eval_steps": 500, + "global_step": 200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0110803324099723, + "grad_norm": 0.9041995406150818, + "learning_rate": 0.0001, + "loss": 2.5533, + "step": 1 + }, + { + "epoch": 0.0221606648199446, + "grad_norm": 0.9356410503387451, + "learning_rate": 9.949748743718594e-05, + "loss": 2.5542, + "step": 2 + }, + { + "epoch": 0.0332409972299169, + "grad_norm": 0.9131423830986023, + "learning_rate": 9.899497487437186e-05, + "loss": 2.5028, + "step": 3 + }, + { + "epoch": 0.0443213296398892, + "grad_norm": 0.9732369780540466, + "learning_rate": 9.84924623115578e-05, + "loss": 2.393, + "step": 4 + }, + { + "epoch": 0.055401662049861494, + "grad_norm": 0.9332369565963745, + "learning_rate": 9.798994974874372e-05, + "loss": 2.2448, + "step": 5 + }, + { + "epoch": 0.0664819944598338, + "grad_norm": 1.0083566904067993, + "learning_rate": 9.748743718592965e-05, + "loss": 2.1345, + "step": 6 + }, + { + "epoch": 0.07756232686980609, + "grad_norm": 0.8932923078536987, + "learning_rate": 9.698492462311559e-05, + "loss": 1.9912, + "step": 7 + }, + { + "epoch": 0.0886426592797784, + "grad_norm": 1.8232415914535522, + "learning_rate": 9.64824120603015e-05, + "loss": 1.844, + "step": 8 + }, + { + "epoch": 0.0997229916897507, + "grad_norm": 0.8558672070503235, + "learning_rate": 9.597989949748745e-05, + "loss": 1.7406, + "step": 9 + }, + { + "epoch": 0.11080332409972299, + "grad_norm": 0.7986319661140442, + "learning_rate": 9.547738693467337e-05, + "loss": 1.636, + "step": 10 + }, + { + "epoch": 0.12188365650969529, + "grad_norm": 0.8156765699386597, + "learning_rate": 9.49748743718593e-05, + "loss": 1.6183, + "step": 11 + }, + { + "epoch": 0.1329639889196676, + "grad_norm": 0.7248062491416931, + "learning_rate": 9.447236180904523e-05, + "loss": 1.57, + "step": 12 + }, + { + "epoch": 0.1440443213296399, + "grad_norm": 0.6793098449707031, + "learning_rate": 9.396984924623115e-05, + "loss": 1.4801, + "step": 13 + }, + { + "epoch": 0.15512465373961218, + "grad_norm": 0.566728949546814, + "learning_rate": 9.34673366834171e-05, + "loss": 1.4867, + "step": 14 + }, + { + "epoch": 0.16620498614958448, + "grad_norm": 0.5523749589920044, + "learning_rate": 9.296482412060302e-05, + "loss": 1.36, + "step": 15 + }, + { + "epoch": 0.1772853185595568, + "grad_norm": 0.5163611173629761, + "learning_rate": 9.246231155778895e-05, + "loss": 1.4472, + "step": 16 + }, + { + "epoch": 0.1883656509695291, + "grad_norm": 0.5090933442115784, + "learning_rate": 9.195979899497488e-05, + "loss": 1.4181, + "step": 17 + }, + { + "epoch": 0.1994459833795014, + "grad_norm": 0.5989904999732971, + "learning_rate": 9.14572864321608e-05, + "loss": 1.4414, + "step": 18 + }, + { + "epoch": 0.21052631578947367, + "grad_norm": 0.5392615795135498, + "learning_rate": 9.095477386934675e-05, + "loss": 1.3723, + "step": 19 + }, + { + "epoch": 0.22160664819944598, + "grad_norm": 0.6069510579109192, + "learning_rate": 9.045226130653267e-05, + "loss": 1.5178, + "step": 20 + }, + { + "epoch": 0.23268698060941828, + "grad_norm": 0.5653948187828064, + "learning_rate": 8.99497487437186e-05, + "loss": 1.2593, + "step": 21 + }, + { + "epoch": 0.24376731301939059, + "grad_norm": 0.5368112325668335, + "learning_rate": 8.944723618090453e-05, + "loss": 1.3471, + "step": 22 + }, + { + "epoch": 0.2548476454293629, + "grad_norm": 0.5639390349388123, + "learning_rate": 8.894472361809045e-05, + "loss": 1.4432, + "step": 23 + }, + { + "epoch": 0.2659279778393352, + "grad_norm": 0.5520769953727722, + "learning_rate": 8.84422110552764e-05, + "loss": 1.3426, + "step": 24 + }, + { + "epoch": 0.2770083102493075, + "grad_norm": 0.6374968886375427, + "learning_rate": 8.793969849246232e-05, + "loss": 1.5108, + "step": 25 + }, + { + "epoch": 0.2880886426592798, + "grad_norm": 0.6635875701904297, + "learning_rate": 8.743718592964825e-05, + "loss": 1.4302, + "step": 26 + }, + { + "epoch": 0.29916897506925205, + "grad_norm": 0.6544961929321289, + "learning_rate": 8.693467336683418e-05, + "loss": 1.4542, + "step": 27 + }, + { + "epoch": 0.31024930747922436, + "grad_norm": 0.5790326595306396, + "learning_rate": 8.64321608040201e-05, + "loss": 1.3176, + "step": 28 + }, + { + "epoch": 0.32132963988919666, + "grad_norm": 0.6066296696662903, + "learning_rate": 8.592964824120603e-05, + "loss": 1.355, + "step": 29 + }, + { + "epoch": 0.33240997229916897, + "grad_norm": 0.6547830700874329, + "learning_rate": 8.542713567839196e-05, + "loss": 1.3518, + "step": 30 + }, + { + "epoch": 0.34349030470914127, + "grad_norm": 0.6181479096412659, + "learning_rate": 8.49246231155779e-05, + "loss": 1.2498, + "step": 31 + }, + { + "epoch": 0.3545706371191136, + "grad_norm": 0.7298603057861328, + "learning_rate": 8.442211055276383e-05, + "loss": 1.2897, + "step": 32 + }, + { + "epoch": 0.3656509695290859, + "grad_norm": 0.7295474410057068, + "learning_rate": 8.391959798994975e-05, + "loss": 1.3034, + "step": 33 + }, + { + "epoch": 0.3767313019390582, + "grad_norm": 0.7686471343040466, + "learning_rate": 8.341708542713568e-05, + "loss": 1.3327, + "step": 34 + }, + { + "epoch": 0.3878116343490305, + "grad_norm": 0.7613719701766968, + "learning_rate": 8.291457286432161e-05, + "loss": 1.2834, + "step": 35 + }, + { + "epoch": 0.3988919667590028, + "grad_norm": 0.8543422222137451, + "learning_rate": 8.241206030150754e-05, + "loss": 1.3813, + "step": 36 + }, + { + "epoch": 0.4099722991689751, + "grad_norm": 0.9008685946464539, + "learning_rate": 8.190954773869348e-05, + "loss": 1.3528, + "step": 37 + }, + { + "epoch": 0.42105263157894735, + "grad_norm": 0.8236178159713745, + "learning_rate": 8.14070351758794e-05, + "loss": 1.3074, + "step": 38 + }, + { + "epoch": 0.43213296398891965, + "grad_norm": 0.8271133899688721, + "learning_rate": 8.090452261306533e-05, + "loss": 1.264, + "step": 39 + }, + { + "epoch": 0.44321329639889195, + "grad_norm": 0.8218770623207092, + "learning_rate": 8.040201005025126e-05, + "loss": 1.2871, + "step": 40 + }, + { + "epoch": 0.45429362880886426, + "grad_norm": 0.7466350197792053, + "learning_rate": 7.989949748743719e-05, + "loss": 1.2691, + "step": 41 + }, + { + "epoch": 0.46537396121883656, + "grad_norm": 0.7745970487594604, + "learning_rate": 7.939698492462313e-05, + "loss": 1.2766, + "step": 42 + }, + { + "epoch": 0.47645429362880887, + "grad_norm": 0.7701446413993835, + "learning_rate": 7.889447236180904e-05, + "loss": 1.294, + "step": 43 + }, + { + "epoch": 0.48753462603878117, + "grad_norm": 0.6183106899261475, + "learning_rate": 7.839195979899498e-05, + "loss": 1.3002, + "step": 44 + }, + { + "epoch": 0.4986149584487535, + "grad_norm": 0.5864247679710388, + "learning_rate": 7.788944723618091e-05, + "loss": 1.2178, + "step": 45 + }, + { + "epoch": 0.5096952908587258, + "grad_norm": 0.5792540907859802, + "learning_rate": 7.738693467336684e-05, + "loss": 1.2126, + "step": 46 + }, + { + "epoch": 0.5207756232686981, + "grad_norm": 0.6436092853546143, + "learning_rate": 7.688442211055277e-05, + "loss": 1.317, + "step": 47 + }, + { + "epoch": 0.5318559556786704, + "grad_norm": 0.5778934359550476, + "learning_rate": 7.638190954773869e-05, + "loss": 1.2303, + "step": 48 + }, + { + "epoch": 0.5429362880886427, + "grad_norm": 0.5667629837989807, + "learning_rate": 7.587939698492463e-05, + "loss": 1.239, + "step": 49 + }, + { + "epoch": 0.554016620498615, + "grad_norm": 0.579045832157135, + "learning_rate": 7.537688442211056e-05, + "loss": 1.1976, + "step": 50 + }, + { + "epoch": 0.5650969529085873, + "grad_norm": 0.5645351409912109, + "learning_rate": 7.487437185929649e-05, + "loss": 1.2011, + "step": 51 + }, + { + "epoch": 0.5761772853185596, + "grad_norm": 0.6186327934265137, + "learning_rate": 7.437185929648241e-05, + "loss": 1.26, + "step": 52 + }, + { + "epoch": 0.5872576177285319, + "grad_norm": 0.6174798011779785, + "learning_rate": 7.386934673366834e-05, + "loss": 1.2812, + "step": 53 + }, + { + "epoch": 0.5983379501385041, + "grad_norm": 0.6200773119926453, + "learning_rate": 7.336683417085427e-05, + "loss": 1.2493, + "step": 54 + }, + { + "epoch": 0.6094182825484764, + "grad_norm": 0.5842644572257996, + "learning_rate": 7.28643216080402e-05, + "loss": 1.2587, + "step": 55 + }, + { + "epoch": 0.6204986149584487, + "grad_norm": 0.5945526957511902, + "learning_rate": 7.236180904522614e-05, + "loss": 1.2105, + "step": 56 + }, + { + "epoch": 0.631578947368421, + "grad_norm": 0.6002059578895569, + "learning_rate": 7.185929648241206e-05, + "loss": 1.2404, + "step": 57 + }, + { + "epoch": 0.6426592797783933, + "grad_norm": 0.5362327098846436, + "learning_rate": 7.135678391959799e-05, + "loss": 1.3024, + "step": 58 + }, + { + "epoch": 0.6537396121883656, + "grad_norm": 0.5753970146179199, + "learning_rate": 7.085427135678392e-05, + "loss": 1.2076, + "step": 59 + }, + { + "epoch": 0.6648199445983379, + "grad_norm": 0.6161749958992004, + "learning_rate": 7.035175879396985e-05, + "loss": 1.1563, + "step": 60 + }, + { + "epoch": 0.6759002770083102, + "grad_norm": 0.578284502029419, + "learning_rate": 6.984924623115579e-05, + "loss": 1.2165, + "step": 61 + }, + { + "epoch": 0.6869806094182825, + "grad_norm": 0.6425468325614929, + "learning_rate": 6.93467336683417e-05, + "loss": 1.2972, + "step": 62 + }, + { + "epoch": 0.6980609418282548, + "grad_norm": 0.5888572335243225, + "learning_rate": 6.884422110552764e-05, + "loss": 1.1187, + "step": 63 + }, + { + "epoch": 0.7091412742382271, + "grad_norm": 0.6597657203674316, + "learning_rate": 6.834170854271357e-05, + "loss": 1.2613, + "step": 64 + }, + { + "epoch": 0.7202216066481995, + "grad_norm": 0.6418899893760681, + "learning_rate": 6.78391959798995e-05, + "loss": 1.1614, + "step": 65 + }, + { + "epoch": 0.7313019390581718, + "grad_norm": 0.5714394450187683, + "learning_rate": 6.733668341708544e-05, + "loss": 1.1293, + "step": 66 + }, + { + "epoch": 0.7423822714681441, + "grad_norm": 0.6186115145683289, + "learning_rate": 6.683417085427135e-05, + "loss": 1.1836, + "step": 67 + }, + { + "epoch": 0.7534626038781164, + "grad_norm": 0.5801815390586853, + "learning_rate": 6.633165829145729e-05, + "loss": 1.2035, + "step": 68 + }, + { + "epoch": 0.7645429362880887, + "grad_norm": 0.6500537991523743, + "learning_rate": 6.582914572864322e-05, + "loss": 1.2234, + "step": 69 + }, + { + "epoch": 0.775623268698061, + "grad_norm": 0.5818614363670349, + "learning_rate": 6.532663316582915e-05, + "loss": 1.2251, + "step": 70 + }, + { + "epoch": 0.7867036011080333, + "grad_norm": 0.6213693022727966, + "learning_rate": 6.482412060301508e-05, + "loss": 1.2281, + "step": 71 + }, + { + "epoch": 0.7977839335180056, + "grad_norm": 0.5809823870658875, + "learning_rate": 6.4321608040201e-05, + "loss": 1.18, + "step": 72 + }, + { + "epoch": 0.8088642659279779, + "grad_norm": 0.5340937376022339, + "learning_rate": 6.381909547738694e-05, + "loss": 1.1571, + "step": 73 + }, + { + "epoch": 0.8199445983379502, + "grad_norm": 0.5844171047210693, + "learning_rate": 6.331658291457287e-05, + "loss": 1.2212, + "step": 74 + }, + { + "epoch": 0.8310249307479224, + "grad_norm": 0.5754027366638184, + "learning_rate": 6.28140703517588e-05, + "loss": 1.2023, + "step": 75 + }, + { + "epoch": 0.8421052631578947, + "grad_norm": 0.6343287229537964, + "learning_rate": 6.231155778894473e-05, + "loss": 1.2817, + "step": 76 + }, + { + "epoch": 0.853185595567867, + "grad_norm": 0.6712394952774048, + "learning_rate": 6.180904522613065e-05, + "loss": 1.2516, + "step": 77 + }, + { + "epoch": 0.8642659279778393, + "grad_norm": 0.5741068720817566, + "learning_rate": 6.130653266331658e-05, + "loss": 1.2114, + "step": 78 + }, + { + "epoch": 0.8753462603878116, + "grad_norm": 0.6043746471405029, + "learning_rate": 6.080402010050251e-05, + "loss": 1.1762, + "step": 79 + }, + { + "epoch": 0.8864265927977839, + "grad_norm": 0.6717391610145569, + "learning_rate": 6.030150753768844e-05, + "loss": 1.228, + "step": 80 + }, + { + "epoch": 0.8975069252077562, + "grad_norm": 0.6319631338119507, + "learning_rate": 5.979899497487438e-05, + "loss": 1.2198, + "step": 81 + }, + { + "epoch": 0.9085872576177285, + "grad_norm": 0.6088309288024902, + "learning_rate": 5.929648241206031e-05, + "loss": 1.211, + "step": 82 + }, + { + "epoch": 0.9196675900277008, + "grad_norm": 0.6102275252342224, + "learning_rate": 5.879396984924623e-05, + "loss": 1.187, + "step": 83 + }, + { + "epoch": 0.9307479224376731, + "grad_norm": 0.5819908976554871, + "learning_rate": 5.829145728643216e-05, + "loss": 1.2206, + "step": 84 + }, + { + "epoch": 0.9418282548476454, + "grad_norm": 0.601245641708374, + "learning_rate": 5.778894472361809e-05, + "loss": 1.2269, + "step": 85 + }, + { + "epoch": 0.9529085872576177, + "grad_norm": 0.6378527283668518, + "learning_rate": 5.728643216080403e-05, + "loss": 1.2919, + "step": 86 + }, + { + "epoch": 0.96398891966759, + "grad_norm": 0.6004720330238342, + "learning_rate": 5.6783919597989955e-05, + "loss": 1.204, + "step": 87 + }, + { + "epoch": 0.9750692520775623, + "grad_norm": 0.6289650797843933, + "learning_rate": 5.628140703517588e-05, + "loss": 1.1608, + "step": 88 + }, + { + "epoch": 0.9861495844875346, + "grad_norm": 0.6542637944221497, + "learning_rate": 5.577889447236181e-05, + "loss": 1.1592, + "step": 89 + }, + { + "epoch": 0.997229916897507, + "grad_norm": 0.6260697245597839, + "learning_rate": 5.527638190954774e-05, + "loss": 1.2151, + "step": 90 + }, + { + "epoch": 1.0083102493074791, + "grad_norm": 1.5121455192565918, + "learning_rate": 5.477386934673368e-05, + "loss": 2.0839, + "step": 91 + }, + { + "epoch": 1.0193905817174516, + "grad_norm": 0.574207067489624, + "learning_rate": 5.4271356783919604e-05, + "loss": 1.1613, + "step": 92 + }, + { + "epoch": 1.0304709141274238, + "grad_norm": 0.6616407036781311, + "learning_rate": 5.376884422110553e-05, + "loss": 1.3486, + "step": 93 + }, + { + "epoch": 1.0415512465373962, + "grad_norm": 0.5284487009048462, + "learning_rate": 5.3266331658291455e-05, + "loss": 1.041, + "step": 94 + }, + { + "epoch": 1.0526315789473684, + "grad_norm": 0.5977286696434021, + "learning_rate": 5.276381909547739e-05, + "loss": 1.1503, + "step": 95 + }, + { + "epoch": 1.0637119113573408, + "grad_norm": 0.5718993544578552, + "learning_rate": 5.226130653266332e-05, + "loss": 1.1083, + "step": 96 + }, + { + "epoch": 1.074792243767313, + "grad_norm": 0.6474930644035339, + "learning_rate": 5.175879396984925e-05, + "loss": 1.2006, + "step": 97 + }, + { + "epoch": 1.0858725761772854, + "grad_norm": 0.5949592590332031, + "learning_rate": 5.125628140703518e-05, + "loss": 1.1012, + "step": 98 + }, + { + "epoch": 1.0969529085872576, + "grad_norm": 0.5743479132652283, + "learning_rate": 5.0753768844221104e-05, + "loss": 1.094, + "step": 99 + }, + { + "epoch": 1.10803324099723, + "grad_norm": 0.7052620053291321, + "learning_rate": 5.0251256281407036e-05, + "loss": 1.2209, + "step": 100 + }, + { + "epoch": 1.1191135734072022, + "grad_norm": 0.6266711354255676, + "learning_rate": 4.974874371859297e-05, + "loss": 1.0795, + "step": 101 + }, + { + "epoch": 1.1301939058171746, + "grad_norm": 0.6345641016960144, + "learning_rate": 4.92462311557789e-05, + "loss": 1.1237, + "step": 102 + }, + { + "epoch": 1.1412742382271468, + "grad_norm": 0.5873332023620605, + "learning_rate": 4.874371859296483e-05, + "loss": 1.062, + "step": 103 + }, + { + "epoch": 1.1523545706371192, + "grad_norm": 0.6368873119354248, + "learning_rate": 4.824120603015075e-05, + "loss": 0.9626, + "step": 104 + }, + { + "epoch": 1.1634349030470914, + "grad_norm": 0.6816115975379944, + "learning_rate": 4.7738693467336685e-05, + "loss": 1.0869, + "step": 105 + }, + { + "epoch": 1.1745152354570636, + "grad_norm": 0.7037101984024048, + "learning_rate": 4.723618090452262e-05, + "loss": 1.2178, + "step": 106 + }, + { + "epoch": 1.185595567867036, + "grad_norm": 0.6814127564430237, + "learning_rate": 4.673366834170855e-05, + "loss": 1.1456, + "step": 107 + }, + { + "epoch": 1.1966759002770084, + "grad_norm": 0.6351640224456787, + "learning_rate": 4.6231155778894475e-05, + "loss": 1.0543, + "step": 108 + }, + { + "epoch": 1.2077562326869806, + "grad_norm": 0.729073703289032, + "learning_rate": 4.57286432160804e-05, + "loss": 1.1579, + "step": 109 + }, + { + "epoch": 1.2188365650969528, + "grad_norm": 0.7375718355178833, + "learning_rate": 4.522613065326633e-05, + "loss": 1.178, + "step": 110 + }, + { + "epoch": 1.2299168975069252, + "grad_norm": 0.686299204826355, + "learning_rate": 4.4723618090452266e-05, + "loss": 1.1791, + "step": 111 + }, + { + "epoch": 1.2409972299168974, + "grad_norm": 0.6791194081306458, + "learning_rate": 4.42211055276382e-05, + "loss": 1.1277, + "step": 112 + }, + { + "epoch": 1.2520775623268698, + "grad_norm": 0.7525886297225952, + "learning_rate": 4.3718592964824124e-05, + "loss": 1.1455, + "step": 113 + }, + { + "epoch": 1.263157894736842, + "grad_norm": 0.7229343056678772, + "learning_rate": 4.321608040201005e-05, + "loss": 1.2113, + "step": 114 + }, + { + "epoch": 1.2742382271468145, + "grad_norm": 0.7222431302070618, + "learning_rate": 4.271356783919598e-05, + "loss": 1.1342, + "step": 115 + }, + { + "epoch": 1.2853185595567866, + "grad_norm": 0.677331805229187, + "learning_rate": 4.2211055276381914e-05, + "loss": 1.1411, + "step": 116 + }, + { + "epoch": 1.296398891966759, + "grad_norm": 0.6559180617332458, + "learning_rate": 4.170854271356784e-05, + "loss": 1.1108, + "step": 117 + }, + { + "epoch": 1.3074792243767313, + "grad_norm": 0.6477547287940979, + "learning_rate": 4.120603015075377e-05, + "loss": 1.079, + "step": 118 + }, + { + "epoch": 1.3185595567867037, + "grad_norm": 0.710292398929596, + "learning_rate": 4.07035175879397e-05, + "loss": 1.1465, + "step": 119 + }, + { + "epoch": 1.3296398891966759, + "grad_norm": 0.7174103260040283, + "learning_rate": 4.020100502512563e-05, + "loss": 1.2304, + "step": 120 + }, + { + "epoch": 1.3407202216066483, + "grad_norm": 0.6680272221565247, + "learning_rate": 3.969849246231156e-05, + "loss": 1.0935, + "step": 121 + }, + { + "epoch": 1.3518005540166205, + "grad_norm": 0.6414808630943298, + "learning_rate": 3.919597989949749e-05, + "loss": 1.0343, + "step": 122 + }, + { + "epoch": 1.3628808864265927, + "grad_norm": 0.7363560795783997, + "learning_rate": 3.869346733668342e-05, + "loss": 1.248, + "step": 123 + }, + { + "epoch": 1.373961218836565, + "grad_norm": 0.6751046776771545, + "learning_rate": 3.8190954773869346e-05, + "loss": 1.109, + "step": 124 + }, + { + "epoch": 1.3850415512465375, + "grad_norm": 0.6871734261512756, + "learning_rate": 3.768844221105528e-05, + "loss": 1.1761, + "step": 125 + }, + { + "epoch": 1.3961218836565097, + "grad_norm": 0.7280701398849487, + "learning_rate": 3.7185929648241204e-05, + "loss": 1.1457, + "step": 126 + }, + { + "epoch": 1.4072022160664819, + "grad_norm": 0.6830523014068604, + "learning_rate": 3.668341708542714e-05, + "loss": 0.985, + "step": 127 + }, + { + "epoch": 1.4182825484764543, + "grad_norm": 0.732204794883728, + "learning_rate": 3.618090452261307e-05, + "loss": 1.1521, + "step": 128 + }, + { + "epoch": 1.4293628808864267, + "grad_norm": 0.7047545909881592, + "learning_rate": 3.5678391959798995e-05, + "loss": 1.0978, + "step": 129 + }, + { + "epoch": 1.440443213296399, + "grad_norm": 0.7437470555305481, + "learning_rate": 3.517587939698493e-05, + "loss": 1.1808, + "step": 130 + }, + { + "epoch": 1.451523545706371, + "grad_norm": 0.6570298671722412, + "learning_rate": 3.467336683417085e-05, + "loss": 0.9725, + "step": 131 + }, + { + "epoch": 1.4626038781163435, + "grad_norm": 0.681265115737915, + "learning_rate": 3.4170854271356785e-05, + "loss": 1.1196, + "step": 132 + }, + { + "epoch": 1.4736842105263157, + "grad_norm": 0.7734697461128235, + "learning_rate": 3.366834170854272e-05, + "loss": 1.0839, + "step": 133 + }, + { + "epoch": 1.4847645429362881, + "grad_norm": 0.6945009231567383, + "learning_rate": 3.3165829145728643e-05, + "loss": 1.0336, + "step": 134 + }, + { + "epoch": 1.4958448753462603, + "grad_norm": 0.7448641657829285, + "learning_rate": 3.2663316582914576e-05, + "loss": 1.0924, + "step": 135 + }, + { + "epoch": 1.5069252077562327, + "grad_norm": 0.7725421190261841, + "learning_rate": 3.21608040201005e-05, + "loss": 1.1409, + "step": 136 + }, + { + "epoch": 1.5180055401662051, + "grad_norm": 0.8401060700416565, + "learning_rate": 3.1658291457286434e-05, + "loss": 1.1962, + "step": 137 + }, + { + "epoch": 1.5290858725761773, + "grad_norm": 0.708717405796051, + "learning_rate": 3.1155778894472366e-05, + "loss": 1.1007, + "step": 138 + }, + { + "epoch": 1.5401662049861495, + "grad_norm": 0.7954943776130676, + "learning_rate": 3.065326633165829e-05, + "loss": 1.1094, + "step": 139 + }, + { + "epoch": 1.5512465373961217, + "grad_norm": 0.7701108455657959, + "learning_rate": 3.015075376884422e-05, + "loss": 1.152, + "step": 140 + }, + { + "epoch": 1.5623268698060941, + "grad_norm": 0.7365975379943848, + "learning_rate": 2.9648241206030153e-05, + "loss": 1.0735, + "step": 141 + }, + { + "epoch": 1.5734072022160666, + "grad_norm": 0.6943490505218506, + "learning_rate": 2.914572864321608e-05, + "loss": 1.0759, + "step": 142 + }, + { + "epoch": 1.5844875346260388, + "grad_norm": 0.7694918513298035, + "learning_rate": 2.8643216080402015e-05, + "loss": 1.1438, + "step": 143 + }, + { + "epoch": 1.595567867036011, + "grad_norm": 0.6781268119812012, + "learning_rate": 2.814070351758794e-05, + "loss": 0.9776, + "step": 144 + }, + { + "epoch": 1.6066481994459834, + "grad_norm": 0.6973868012428284, + "learning_rate": 2.763819095477387e-05, + "loss": 1.0492, + "step": 145 + }, + { + "epoch": 1.6177285318559558, + "grad_norm": 0.770706295967102, + "learning_rate": 2.7135678391959802e-05, + "loss": 1.0905, + "step": 146 + }, + { + "epoch": 1.628808864265928, + "grad_norm": 0.7229887247085571, + "learning_rate": 2.6633165829145728e-05, + "loss": 1.1664, + "step": 147 + }, + { + "epoch": 1.6398891966759002, + "grad_norm": 0.762946367263794, + "learning_rate": 2.613065326633166e-05, + "loss": 1.1373, + "step": 148 + }, + { + "epoch": 1.6509695290858726, + "grad_norm": 0.7556053996086121, + "learning_rate": 2.562814070351759e-05, + "loss": 1.0613, + "step": 149 + }, + { + "epoch": 1.662049861495845, + "grad_norm": 0.7181993722915649, + "learning_rate": 2.5125628140703518e-05, + "loss": 1.0686, + "step": 150 + }, + { + "epoch": 1.6731301939058172, + "grad_norm": 0.7401473522186279, + "learning_rate": 2.462311557788945e-05, + "loss": 1.0952, + "step": 151 + }, + { + "epoch": 1.6842105263157894, + "grad_norm": 0.7067743539810181, + "learning_rate": 2.4120603015075376e-05, + "loss": 0.9935, + "step": 152 + }, + { + "epoch": 1.6952908587257618, + "grad_norm": 0.7622341513633728, + "learning_rate": 2.361809045226131e-05, + "loss": 1.1452, + "step": 153 + }, + { + "epoch": 1.7063711911357342, + "grad_norm": 0.7768684029579163, + "learning_rate": 2.3115577889447238e-05, + "loss": 1.1314, + "step": 154 + }, + { + "epoch": 1.7174515235457064, + "grad_norm": 0.7184272408485413, + "learning_rate": 2.2613065326633167e-05, + "loss": 1.049, + "step": 155 + }, + { + "epoch": 1.7285318559556786, + "grad_norm": 0.7589651942253113, + "learning_rate": 2.21105527638191e-05, + "loss": 1.0791, + "step": 156 + }, + { + "epoch": 1.739612188365651, + "grad_norm": 0.7551384568214417, + "learning_rate": 2.1608040201005025e-05, + "loss": 1.1264, + "step": 157 + }, + { + "epoch": 1.7506925207756234, + "grad_norm": 0.7738797664642334, + "learning_rate": 2.1105527638190957e-05, + "loss": 1.0658, + "step": 158 + }, + { + "epoch": 1.7617728531855956, + "grad_norm": 0.8027400374412537, + "learning_rate": 2.0603015075376886e-05, + "loss": 1.0526, + "step": 159 + }, + { + "epoch": 1.7728531855955678, + "grad_norm": 0.693533718585968, + "learning_rate": 2.0100502512562815e-05, + "loss": 1.0348, + "step": 160 + }, + { + "epoch": 1.78393351800554, + "grad_norm": 0.7291271686553955, + "learning_rate": 1.9597989949748744e-05, + "loss": 1.0686, + "step": 161 + }, + { + "epoch": 1.7950138504155124, + "grad_norm": 0.7514436841011047, + "learning_rate": 1.9095477386934673e-05, + "loss": 1.1501, + "step": 162 + }, + { + "epoch": 1.8060941828254848, + "grad_norm": 0.7121478915214539, + "learning_rate": 1.8592964824120602e-05, + "loss": 1.0153, + "step": 163 + }, + { + "epoch": 1.817174515235457, + "grad_norm": 0.7232415676116943, + "learning_rate": 1.8090452261306535e-05, + "loss": 1.034, + "step": 164 + }, + { + "epoch": 1.8282548476454292, + "grad_norm": 0.8162721991539001, + "learning_rate": 1.7587939698492464e-05, + "loss": 1.1254, + "step": 165 + }, + { + "epoch": 1.8393351800554016, + "grad_norm": 0.7215307354927063, + "learning_rate": 1.7085427135678393e-05, + "loss": 1.0638, + "step": 166 + }, + { + "epoch": 1.850415512465374, + "grad_norm": 0.7444539666175842, + "learning_rate": 1.6582914572864322e-05, + "loss": 1.0895, + "step": 167 + }, + { + "epoch": 1.8614958448753463, + "grad_norm": 0.787027895450592, + "learning_rate": 1.608040201005025e-05, + "loss": 0.9595, + "step": 168 + }, + { + "epoch": 1.8725761772853184, + "grad_norm": 0.7815292477607727, + "learning_rate": 1.5577889447236183e-05, + "loss": 1.1505, + "step": 169 + }, + { + "epoch": 1.8836565096952909, + "grad_norm": 0.7654871344566345, + "learning_rate": 1.507537688442211e-05, + "loss": 1.1074, + "step": 170 + }, + { + "epoch": 1.8947368421052633, + "grad_norm": 0.7809323072433472, + "learning_rate": 1.457286432160804e-05, + "loss": 1.0832, + "step": 171 + }, + { + "epoch": 1.9058171745152355, + "grad_norm": 0.8374095559120178, + "learning_rate": 1.407035175879397e-05, + "loss": 1.1028, + "step": 172 + }, + { + "epoch": 1.9168975069252077, + "grad_norm": 0.7976056933403015, + "learning_rate": 1.3567839195979901e-05, + "loss": 1.0124, + "step": 173 + }, + { + "epoch": 1.92797783933518, + "grad_norm": 0.8494656682014465, + "learning_rate": 1.306532663316583e-05, + "loss": 1.1487, + "step": 174 + }, + { + "epoch": 1.9390581717451525, + "grad_norm": 0.7171152234077454, + "learning_rate": 1.2562814070351759e-05, + "loss": 1.0215, + "step": 175 + }, + { + "epoch": 1.9501385041551247, + "grad_norm": 0.7844187617301941, + "learning_rate": 1.2060301507537688e-05, + "loss": 1.1526, + "step": 176 + }, + { + "epoch": 1.9612188365650969, + "grad_norm": 0.7948229908943176, + "learning_rate": 1.1557788944723619e-05, + "loss": 1.1165, + "step": 177 + }, + { + "epoch": 1.9722991689750693, + "grad_norm": 0.6978301405906677, + "learning_rate": 1.105527638190955e-05, + "loss": 0.9733, + "step": 178 + }, + { + "epoch": 1.9833795013850417, + "grad_norm": 0.8851218819618225, + "learning_rate": 1.0552763819095479e-05, + "loss": 1.167, + "step": 179 + }, + { + "epoch": 1.994459833795014, + "grad_norm": 0.7181246876716614, + "learning_rate": 1.0050251256281408e-05, + "loss": 1.0187, + "step": 180 + }, + { + "epoch": 2.005540166204986, + "grad_norm": 1.8050236701965332, + "learning_rate": 9.547738693467337e-06, + "loss": 1.97, + "step": 181 + }, + { + "epoch": 2.0166204986149583, + "grad_norm": 0.7432425618171692, + "learning_rate": 9.045226130653267e-06, + "loss": 1.0561, + "step": 182 + }, + { + "epoch": 2.027700831024931, + "grad_norm": 0.7358068227767944, + "learning_rate": 8.542713567839196e-06, + "loss": 0.9627, + "step": 183 + }, + { + "epoch": 2.038781163434903, + "grad_norm": 0.7742936015129089, + "learning_rate": 8.040201005025125e-06, + "loss": 1.143, + "step": 184 + }, + { + "epoch": 2.0498614958448753, + "grad_norm": 0.7715827226638794, + "learning_rate": 7.537688442211055e-06, + "loss": 1.0638, + "step": 185 + }, + { + "epoch": 2.0609418282548475, + "grad_norm": 0.7287455797195435, + "learning_rate": 7.035175879396985e-06, + "loss": 1.0142, + "step": 186 + }, + { + "epoch": 2.07202216066482, + "grad_norm": 0.7636083960533142, + "learning_rate": 6.532663316582915e-06, + "loss": 1.0196, + "step": 187 + }, + { + "epoch": 2.0831024930747923, + "grad_norm": 0.7313657999038696, + "learning_rate": 6.030150753768844e-06, + "loss": 1.0466, + "step": 188 + }, + { + "epoch": 2.0941828254847645, + "grad_norm": 0.7313005924224854, + "learning_rate": 5.527638190954775e-06, + "loss": 1.0009, + "step": 189 + }, + { + "epoch": 2.1052631578947367, + "grad_norm": 0.729709267616272, + "learning_rate": 5.025125628140704e-06, + "loss": 1.0601, + "step": 190 + }, + { + "epoch": 2.1163434903047094, + "grad_norm": 0.7446528077125549, + "learning_rate": 4.522613065326634e-06, + "loss": 0.9965, + "step": 191 + }, + { + "epoch": 2.1274238227146816, + "grad_norm": 0.7408772110939026, + "learning_rate": 4.020100502512563e-06, + "loss": 0.9824, + "step": 192 + }, + { + "epoch": 2.1385041551246537, + "grad_norm": 0.7374000549316406, + "learning_rate": 3.5175879396984926e-06, + "loss": 1.13, + "step": 193 + }, + { + "epoch": 2.149584487534626, + "grad_norm": 0.7011594772338867, + "learning_rate": 3.015075376884422e-06, + "loss": 0.9952, + "step": 194 + }, + { + "epoch": 2.160664819944598, + "grad_norm": 0.6886879801750183, + "learning_rate": 2.512562814070352e-06, + "loss": 0.9617, + "step": 195 + }, + { + "epoch": 2.1717451523545708, + "grad_norm": 0.8077765703201294, + "learning_rate": 2.0100502512562813e-06, + "loss": 1.0507, + "step": 196 + }, + { + "epoch": 2.182825484764543, + "grad_norm": 0.7857282757759094, + "learning_rate": 1.507537688442211e-06, + "loss": 1.1114, + "step": 197 + }, + { + "epoch": 2.193905817174515, + "grad_norm": 0.7135636806488037, + "learning_rate": 1.0050251256281407e-06, + "loss": 1.0691, + "step": 198 + }, + { + "epoch": 2.2049861495844874, + "grad_norm": 0.720593273639679, + "learning_rate": 5.025125628140703e-07, + "loss": 0.9977, + "step": 199 + }, + { + "epoch": 2.21606648199446, + "grad_norm": 0.7399870157241821, + "learning_rate": 0.0, + "loss": 1.0131, + "step": 200 + } + ], + "logging_steps": 1, + "max_steps": 200, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 2.7777393737908224e+16, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/llama_German_Chinese/checkpoint-200/training_args.bin b/llama_German_Chinese/checkpoint-200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..64664f661174fd09d232b79ec559a5390ed550cc --- /dev/null +++ b/llama_German_Chinese/checkpoint-200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba388dc814d9d11b4bb2a1e218e30c9fb6fce4002fee427a7da221fe8c11fc66 +size 5624 diff --git a/llama_German_English/checkpoint-200/README.md b/llama_German_English/checkpoint-200/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ec91fdd050e2a5613ef04bb6c8fc77a63b8b10a1 --- /dev/null +++ b/llama_German_English/checkpoint-200/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.14.0 \ No newline at end of file diff --git a/llama_German_English/checkpoint-200/adapter_config.json b/llama_German_English/checkpoint-200/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..80182dbd6ed6572fc49f558d3663c1f36aa2e81f --- /dev/null +++ b/llama_German_English/checkpoint-200/adapter_config.json @@ -0,0 +1,37 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit", + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "down_proj", + "v_proj", + "o_proj", + "k_proj", + "up_proj", + "gate_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/llama_German_English/checkpoint-200/adapter_model.safetensors b/llama_German_English/checkpoint-200/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..698f50a6b5e8081f556079a1c11cb4f4334d83b3 --- /dev/null +++ b/llama_German_English/checkpoint-200/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90aba05326ff535cff19bc8a6859e7f23b6e1334e25204fcd622ebaa7867edcb +size 167832240 diff --git a/llama_German_English/checkpoint-200/optimizer.pt b/llama_German_English/checkpoint-200/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..d4056f81d93341e8cc36b6111583aa60ba7bf396 --- /dev/null +++ b/llama_German_English/checkpoint-200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:393105277c93e001b06cd33c44cf0e6a4efa8f62c449e22aac131517e75cbf9f +size 85723284 diff --git a/llama_German_English/checkpoint-200/rng_state.pth b/llama_German_English/checkpoint-200/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..f21f2ff1c1a816463781d51760f8156e041f5979 --- /dev/null +++ b/llama_German_English/checkpoint-200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4981123ff3cf7bd5b7f76839e90e4776f747ca4c38dcb41876fa010c0dea8b23 +size 14244 diff --git a/llama_German_English/checkpoint-200/scheduler.pt b/llama_German_English/checkpoint-200/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6ad11d6d5288d2841ba96a6d7d3838c80c80097a --- /dev/null +++ b/llama_German_English/checkpoint-200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96 +size 1064 diff --git a/llama_German_English/checkpoint-200/special_tokens_map.json b/llama_German_English/checkpoint-200/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/llama_German_English/checkpoint-200/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/llama_German_English/checkpoint-200/tokenizer.json b/llama_German_English/checkpoint-200/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/llama_German_English/checkpoint-200/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/llama_German_English/checkpoint-200/tokenizer_config.json b/llama_German_English/checkpoint-200/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..447bd30f21e8c20e9e6497d6cfc650a3881e26a6 --- /dev/null +++ b/llama_German_English/checkpoint-200/tokenizer_config.json @@ -0,0 +1,2064 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/llama_German_English/checkpoint-200/trainer_state.json b/llama_German_English/checkpoint-200/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..60faaf2dc1f14aaa3529090102ab5e06051eff38 --- /dev/null +++ b/llama_German_English/checkpoint-200/trainer_state.json @@ -0,0 +1,1433 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.21606648199446, + "eval_steps": 500, + "global_step": 200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0110803324099723, + "grad_norm": 1.0439653396606445, + "learning_rate": 0.0001, + "loss": 2.5223, + "step": 1 + }, + { + "epoch": 0.0221606648199446, + "grad_norm": 1.072291612625122, + "learning_rate": 9.949748743718594e-05, + "loss": 2.465, + "step": 2 + }, + { + "epoch": 0.0332409972299169, + "grad_norm": 0.9897931218147278, + "learning_rate": 9.899497487437186e-05, + "loss": 2.4409, + "step": 3 + }, + { + "epoch": 0.0443213296398892, + "grad_norm": 1.0319087505340576, + "learning_rate": 9.84924623115578e-05, + "loss": 2.3619, + "step": 4 + }, + { + "epoch": 0.055401662049861494, + "grad_norm": 1.0002905130386353, + "learning_rate": 9.798994974874372e-05, + "loss": 2.1627, + "step": 5 + }, + { + "epoch": 0.0664819944598338, + "grad_norm": 1.0927557945251465, + "learning_rate": 9.748743718592965e-05, + "loss": 2.0505, + "step": 6 + }, + { + "epoch": 0.07756232686980609, + "grad_norm": 0.9562727808952332, + "learning_rate": 9.698492462311559e-05, + "loss": 1.9197, + "step": 7 + }, + { + "epoch": 0.0886426592797784, + "grad_norm": 1.1700633764266968, + "learning_rate": 9.64824120603015e-05, + "loss": 1.6867, + "step": 8 + }, + { + "epoch": 0.0997229916897507, + "grad_norm": 0.8975285291671753, + "learning_rate": 9.597989949748745e-05, + "loss": 1.6386, + "step": 9 + }, + { + "epoch": 0.11080332409972299, + "grad_norm": 0.8447593450546265, + "learning_rate": 9.547738693467337e-05, + "loss": 1.522, + "step": 10 + }, + { + "epoch": 0.12188365650969529, + "grad_norm": 0.8749620318412781, + "learning_rate": 9.49748743718593e-05, + "loss": 1.4721, + "step": 11 + }, + { + "epoch": 0.1329639889196676, + "grad_norm": 0.6403835415840149, + "learning_rate": 9.447236180904523e-05, + "loss": 1.4496, + "step": 12 + }, + { + "epoch": 0.1440443213296399, + "grad_norm": 0.4993043541908264, + "learning_rate": 9.396984924623115e-05, + "loss": 1.4022, + "step": 13 + }, + { + "epoch": 0.15512465373961218, + "grad_norm": 0.4994354844093323, + "learning_rate": 9.34673366834171e-05, + "loss": 1.4015, + "step": 14 + }, + { + "epoch": 0.16620498614958448, + "grad_norm": 0.5358327627182007, + "learning_rate": 9.296482412060302e-05, + "loss": 1.2465, + "step": 15 + }, + { + "epoch": 0.1772853185595568, + "grad_norm": 0.5239339470863342, + "learning_rate": 9.246231155778895e-05, + "loss": 1.3862, + "step": 16 + }, + { + "epoch": 0.1883656509695291, + "grad_norm": 0.5605911612510681, + "learning_rate": 9.195979899497488e-05, + "loss": 1.3572, + "step": 17 + }, + { + "epoch": 0.1994459833795014, + "grad_norm": 0.5271956324577332, + "learning_rate": 9.14572864321608e-05, + "loss": 1.363, + "step": 18 + }, + { + "epoch": 0.21052631578947367, + "grad_norm": 0.5072754621505737, + "learning_rate": 9.095477386934675e-05, + "loss": 1.3393, + "step": 19 + }, + { + "epoch": 0.22160664819944598, + "grad_norm": 0.5984258055686951, + "learning_rate": 9.045226130653267e-05, + "loss": 1.4338, + "step": 20 + }, + { + "epoch": 0.23268698060941828, + "grad_norm": 0.6067867279052734, + "learning_rate": 8.99497487437186e-05, + "loss": 1.1891, + "step": 21 + }, + { + "epoch": 0.24376731301939059, + "grad_norm": 0.5885209441184998, + "learning_rate": 8.944723618090453e-05, + "loss": 1.3141, + "step": 22 + }, + { + "epoch": 0.2548476454293629, + "grad_norm": 0.5784013271331787, + "learning_rate": 8.894472361809045e-05, + "loss": 1.4002, + "step": 23 + }, + { + "epoch": 0.2659279778393352, + "grad_norm": 0.6238617897033691, + "learning_rate": 8.84422110552764e-05, + "loss": 1.3739, + "step": 24 + }, + { + "epoch": 0.2770083102493075, + "grad_norm": 0.6549237370491028, + "learning_rate": 8.793969849246232e-05, + "loss": 1.4349, + "step": 25 + }, + { + "epoch": 0.2880886426592798, + "grad_norm": 0.6756062507629395, + "learning_rate": 8.743718592964825e-05, + "loss": 1.3734, + "step": 26 + }, + { + "epoch": 0.29916897506925205, + "grad_norm": 0.7228646278381348, + "learning_rate": 8.693467336683418e-05, + "loss": 1.3946, + "step": 27 + }, + { + "epoch": 0.31024930747922436, + "grad_norm": 0.6804022192955017, + "learning_rate": 8.64321608040201e-05, + "loss": 1.3068, + "step": 28 + }, + { + "epoch": 0.32132963988919666, + "grad_norm": 0.7463417053222656, + "learning_rate": 8.592964824120603e-05, + "loss": 1.3368, + "step": 29 + }, + { + "epoch": 0.33240997229916897, + "grad_norm": 0.7411599159240723, + "learning_rate": 8.542713567839196e-05, + "loss": 1.3373, + "step": 30 + }, + { + "epoch": 0.34349030470914127, + "grad_norm": 0.7613984942436218, + "learning_rate": 8.49246231155779e-05, + "loss": 1.2158, + "step": 31 + }, + { + "epoch": 0.3545706371191136, + "grad_norm": 0.8660432696342468, + "learning_rate": 8.442211055276383e-05, + "loss": 1.2104, + "step": 32 + }, + { + "epoch": 0.3656509695290859, + "grad_norm": 0.8703951835632324, + "learning_rate": 8.391959798994975e-05, + "loss": 1.2992, + "step": 33 + }, + { + "epoch": 0.3767313019390582, + "grad_norm": 0.8799692988395691, + "learning_rate": 8.341708542713568e-05, + "loss": 1.2525, + "step": 34 + }, + { + "epoch": 0.3878116343490305, + "grad_norm": 0.8259297609329224, + "learning_rate": 8.291457286432161e-05, + "loss": 1.2281, + "step": 35 + }, + { + "epoch": 0.3988919667590028, + "grad_norm": 0.9279691576957703, + "learning_rate": 8.241206030150754e-05, + "loss": 1.3589, + "step": 36 + }, + { + "epoch": 0.4099722991689751, + "grad_norm": 0.8066436648368835, + "learning_rate": 8.190954773869348e-05, + "loss": 1.2785, + "step": 37 + }, + { + "epoch": 0.42105263157894735, + "grad_norm": 0.7177894115447998, + "learning_rate": 8.14070351758794e-05, + "loss": 1.278, + "step": 38 + }, + { + "epoch": 0.43213296398891965, + "grad_norm": 0.6018456220626831, + "learning_rate": 8.090452261306533e-05, + "loss": 1.2151, + "step": 39 + }, + { + "epoch": 0.44321329639889195, + "grad_norm": 0.5777440667152405, + "learning_rate": 8.040201005025126e-05, + "loss": 1.235, + "step": 40 + }, + { + "epoch": 0.45429362880886426, + "grad_norm": 0.4722194969654083, + "learning_rate": 7.989949748743719e-05, + "loss": 1.256, + "step": 41 + }, + { + "epoch": 0.46537396121883656, + "grad_norm": 0.4909801185131073, + "learning_rate": 7.939698492462313e-05, + "loss": 1.2826, + "step": 42 + }, + { + "epoch": 0.47645429362880887, + "grad_norm": 0.5409879684448242, + "learning_rate": 7.889447236180904e-05, + "loss": 1.2748, + "step": 43 + }, + { + "epoch": 0.48753462603878117, + "grad_norm": 0.5462285876274109, + "learning_rate": 7.839195979899498e-05, + "loss": 1.2736, + "step": 44 + }, + { + "epoch": 0.4986149584487535, + "grad_norm": 0.5091783404350281, + "learning_rate": 7.788944723618091e-05, + "loss": 1.1858, + "step": 45 + }, + { + "epoch": 0.5096952908587258, + "grad_norm": 0.4923122227191925, + "learning_rate": 7.738693467336684e-05, + "loss": 1.2042, + "step": 46 + }, + { + "epoch": 0.5207756232686981, + "grad_norm": 0.5117873549461365, + "learning_rate": 7.688442211055277e-05, + "loss": 1.2803, + "step": 47 + }, + { + "epoch": 0.5318559556786704, + "grad_norm": 0.47855451703071594, + "learning_rate": 7.638190954773869e-05, + "loss": 1.2136, + "step": 48 + }, + { + "epoch": 0.5429362880886427, + "grad_norm": 0.4981115460395813, + "learning_rate": 7.587939698492463e-05, + "loss": 1.2662, + "step": 49 + }, + { + "epoch": 0.554016620498615, + "grad_norm": 0.4743058681488037, + "learning_rate": 7.537688442211056e-05, + "loss": 1.1551, + "step": 50 + }, + { + "epoch": 0.5650969529085873, + "grad_norm": 0.4888276159763336, + "learning_rate": 7.487437185929649e-05, + "loss": 1.1727, + "step": 51 + }, + { + "epoch": 0.5761772853185596, + "grad_norm": 0.5036386251449585, + "learning_rate": 7.437185929648241e-05, + "loss": 1.2205, + "step": 52 + }, + { + "epoch": 0.5872576177285319, + "grad_norm": 0.5121099352836609, + "learning_rate": 7.386934673366834e-05, + "loss": 1.2157, + "step": 53 + }, + { + "epoch": 0.5983379501385041, + "grad_norm": 0.5122838616371155, + "learning_rate": 7.336683417085427e-05, + "loss": 1.2199, + "step": 54 + }, + { + "epoch": 0.6094182825484764, + "grad_norm": 0.5164381265640259, + "learning_rate": 7.28643216080402e-05, + "loss": 1.2962, + "step": 55 + }, + { + "epoch": 0.6204986149584487, + "grad_norm": 0.5261529684066772, + "learning_rate": 7.236180904522614e-05, + "loss": 1.2283, + "step": 56 + }, + { + "epoch": 0.631578947368421, + "grad_norm": 0.5181484818458557, + "learning_rate": 7.185929648241206e-05, + "loss": 1.2724, + "step": 57 + }, + { + "epoch": 0.6426592797783933, + "grad_norm": 0.48683488368988037, + "learning_rate": 7.135678391959799e-05, + "loss": 1.3003, + "step": 58 + }, + { + "epoch": 0.6537396121883656, + "grad_norm": 0.4646837115287781, + "learning_rate": 7.085427135678392e-05, + "loss": 1.2103, + "step": 59 + }, + { + "epoch": 0.6648199445983379, + "grad_norm": 0.513752818107605, + "learning_rate": 7.035175879396985e-05, + "loss": 1.1334, + "step": 60 + }, + { + "epoch": 0.6759002770083102, + "grad_norm": 0.47100791335105896, + "learning_rate": 6.984924623115579e-05, + "loss": 1.2148, + "step": 61 + }, + { + "epoch": 0.6869806094182825, + "grad_norm": 0.5160151124000549, + "learning_rate": 6.93467336683417e-05, + "loss": 1.2718, + "step": 62 + }, + { + "epoch": 0.6980609418282548, + "grad_norm": 0.42398396134376526, + "learning_rate": 6.884422110552764e-05, + "loss": 1.119, + "step": 63 + }, + { + "epoch": 0.7091412742382271, + "grad_norm": 0.5424822568893433, + "learning_rate": 6.834170854271357e-05, + "loss": 1.2911, + "step": 64 + }, + { + "epoch": 0.7202216066481995, + "grad_norm": 0.557855486869812, + "learning_rate": 6.78391959798995e-05, + "loss": 1.1695, + "step": 65 + }, + { + "epoch": 0.7313019390581718, + "grad_norm": 0.4864124059677124, + "learning_rate": 6.733668341708544e-05, + "loss": 1.1762, + "step": 66 + }, + { + "epoch": 0.7423822714681441, + "grad_norm": 0.5386707186698914, + "learning_rate": 6.683417085427135e-05, + "loss": 1.1766, + "step": 67 + }, + { + "epoch": 0.7534626038781164, + "grad_norm": 0.5114085674285889, + "learning_rate": 6.633165829145729e-05, + "loss": 1.2149, + "step": 68 + }, + { + "epoch": 0.7645429362880887, + "grad_norm": 0.5140852928161621, + "learning_rate": 6.582914572864322e-05, + "loss": 1.2486, + "step": 69 + }, + { + "epoch": 0.775623268698061, + "grad_norm": 0.4860430359840393, + "learning_rate": 6.532663316582915e-05, + "loss": 1.2724, + "step": 70 + }, + { + "epoch": 0.7867036011080333, + "grad_norm": 0.5633051991462708, + "learning_rate": 6.482412060301508e-05, + "loss": 1.2467, + "step": 71 + }, + { + "epoch": 0.7977839335180056, + "grad_norm": 0.48254477977752686, + "learning_rate": 6.4321608040201e-05, + "loss": 1.1976, + "step": 72 + }, + { + "epoch": 0.8088642659279779, + "grad_norm": 0.48218846321105957, + "learning_rate": 6.381909547738694e-05, + "loss": 1.1644, + "step": 73 + }, + { + "epoch": 0.8199445983379502, + "grad_norm": 0.4937390387058258, + "learning_rate": 6.331658291457287e-05, + "loss": 1.2049, + "step": 74 + }, + { + "epoch": 0.8310249307479224, + "grad_norm": 0.48136794567108154, + "learning_rate": 6.28140703517588e-05, + "loss": 1.2073, + "step": 75 + }, + { + "epoch": 0.8421052631578947, + "grad_norm": 0.5367251038551331, + "learning_rate": 6.231155778894473e-05, + "loss": 1.3024, + "step": 76 + }, + { + "epoch": 0.853185595567867, + "grad_norm": 0.5513749718666077, + "learning_rate": 6.180904522613065e-05, + "loss": 1.2312, + "step": 77 + }, + { + "epoch": 0.8642659279778393, + "grad_norm": 0.5120927095413208, + "learning_rate": 6.130653266331658e-05, + "loss": 1.1814, + "step": 78 + }, + { + "epoch": 0.8753462603878116, + "grad_norm": 0.47654178738594055, + "learning_rate": 6.080402010050251e-05, + "loss": 1.1599, + "step": 79 + }, + { + "epoch": 0.8864265927977839, + "grad_norm": 0.5559302568435669, + "learning_rate": 6.030150753768844e-05, + "loss": 1.2203, + "step": 80 + }, + { + "epoch": 0.8975069252077562, + "grad_norm": 0.5184886455535889, + "learning_rate": 5.979899497487438e-05, + "loss": 1.2238, + "step": 81 + }, + { + "epoch": 0.9085872576177285, + "grad_norm": 0.5314403176307678, + "learning_rate": 5.929648241206031e-05, + "loss": 1.2255, + "step": 82 + }, + { + "epoch": 0.9196675900277008, + "grad_norm": 0.4995604455471039, + "learning_rate": 5.879396984924623e-05, + "loss": 1.1613, + "step": 83 + }, + { + "epoch": 0.9307479224376731, + "grad_norm": 0.483528733253479, + "learning_rate": 5.829145728643216e-05, + "loss": 1.2412, + "step": 84 + }, + { + "epoch": 0.9418282548476454, + "grad_norm": 0.5035815238952637, + "learning_rate": 5.778894472361809e-05, + "loss": 1.2189, + "step": 85 + }, + { + "epoch": 0.9529085872576177, + "grad_norm": 0.5537089705467224, + "learning_rate": 5.728643216080403e-05, + "loss": 1.3265, + "step": 86 + }, + { + "epoch": 0.96398891966759, + "grad_norm": 0.5601852536201477, + "learning_rate": 5.6783919597989955e-05, + "loss": 1.2444, + "step": 87 + }, + { + "epoch": 0.9750692520775623, + "grad_norm": 0.5105345249176025, + "learning_rate": 5.628140703517588e-05, + "loss": 1.169, + "step": 88 + }, + { + "epoch": 0.9861495844875346, + "grad_norm": 0.533295750617981, + "learning_rate": 5.577889447236181e-05, + "loss": 1.1606, + "step": 89 + }, + { + "epoch": 0.997229916897507, + "grad_norm": 0.5628755688667297, + "learning_rate": 5.527638190954774e-05, + "loss": 1.2022, + "step": 90 + }, + { + "epoch": 1.0083102493074791, + "grad_norm": 1.1733123064041138, + "learning_rate": 5.477386934673368e-05, + "loss": 2.0244, + "step": 91 + }, + { + "epoch": 1.0193905817174516, + "grad_norm": 0.48274314403533936, + "learning_rate": 5.4271356783919604e-05, + "loss": 1.1683, + "step": 92 + }, + { + "epoch": 1.0304709141274238, + "grad_norm": 0.5582519173622131, + "learning_rate": 5.376884422110553e-05, + "loss": 1.3466, + "step": 93 + }, + { + "epoch": 1.0415512465373962, + "grad_norm": 0.4637548327445984, + "learning_rate": 5.3266331658291455e-05, + "loss": 1.0639, + "step": 94 + }, + { + "epoch": 1.0526315789473684, + "grad_norm": 0.5016134977340698, + "learning_rate": 5.276381909547739e-05, + "loss": 1.2031, + "step": 95 + }, + { + "epoch": 1.0637119113573408, + "grad_norm": 0.5333296060562134, + "learning_rate": 5.226130653266332e-05, + "loss": 1.145, + "step": 96 + }, + { + "epoch": 1.074792243767313, + "grad_norm": 0.5505311489105225, + "learning_rate": 5.175879396984925e-05, + "loss": 1.2017, + "step": 97 + }, + { + "epoch": 1.0858725761772854, + "grad_norm": 0.49051010608673096, + "learning_rate": 5.125628140703518e-05, + "loss": 1.1612, + "step": 98 + }, + { + "epoch": 1.0969529085872576, + "grad_norm": 0.526991605758667, + "learning_rate": 5.0753768844221104e-05, + "loss": 1.1927, + "step": 99 + }, + { + "epoch": 1.10803324099723, + "grad_norm": 0.5483986139297485, + "learning_rate": 5.0251256281407036e-05, + "loss": 1.2416, + "step": 100 + }, + { + "epoch": 1.1191135734072022, + "grad_norm": 0.5175971984863281, + "learning_rate": 4.974874371859297e-05, + "loss": 1.097, + "step": 101 + }, + { + "epoch": 1.1301939058171746, + "grad_norm": 0.5794366002082825, + "learning_rate": 4.92462311557789e-05, + "loss": 1.1469, + "step": 102 + }, + { + "epoch": 1.1412742382271468, + "grad_norm": 0.5408708453178406, + "learning_rate": 4.874371859296483e-05, + "loss": 1.1193, + "step": 103 + }, + { + "epoch": 1.1523545706371192, + "grad_norm": 0.504085123538971, + "learning_rate": 4.824120603015075e-05, + "loss": 0.9672, + "step": 104 + }, + { + "epoch": 1.1634349030470914, + "grad_norm": 0.6266749501228333, + "learning_rate": 4.7738693467336685e-05, + "loss": 1.1375, + "step": 105 + }, + { + "epoch": 1.1745152354570636, + "grad_norm": 0.5513699054718018, + "learning_rate": 4.723618090452262e-05, + "loss": 1.2081, + "step": 106 + }, + { + "epoch": 1.185595567867036, + "grad_norm": 0.5879850387573242, + "learning_rate": 4.673366834170855e-05, + "loss": 1.1576, + "step": 107 + }, + { + "epoch": 1.1966759002770084, + "grad_norm": 0.5555039048194885, + "learning_rate": 4.6231155778894475e-05, + "loss": 1.1212, + "step": 108 + }, + { + "epoch": 1.2077562326869806, + "grad_norm": 0.5815752744674683, + "learning_rate": 4.57286432160804e-05, + "loss": 1.1984, + "step": 109 + }, + { + "epoch": 1.2188365650969528, + "grad_norm": 0.6069645881652832, + "learning_rate": 4.522613065326633e-05, + "loss": 1.1879, + "step": 110 + }, + { + "epoch": 1.2299168975069252, + "grad_norm": 0.6030775308609009, + "learning_rate": 4.4723618090452266e-05, + "loss": 1.1959, + "step": 111 + }, + { + "epoch": 1.2409972299168974, + "grad_norm": 0.5615729093551636, + "learning_rate": 4.42211055276382e-05, + "loss": 1.1266, + "step": 112 + }, + { + "epoch": 1.2520775623268698, + "grad_norm": 0.5482991337776184, + "learning_rate": 4.3718592964824124e-05, + "loss": 1.158, + "step": 113 + }, + { + "epoch": 1.263157894736842, + "grad_norm": 0.6517236232757568, + "learning_rate": 4.321608040201005e-05, + "loss": 1.2302, + "step": 114 + }, + { + "epoch": 1.2742382271468145, + "grad_norm": 0.5991782546043396, + "learning_rate": 4.271356783919598e-05, + "loss": 1.1322, + "step": 115 + }, + { + "epoch": 1.2853185595567866, + "grad_norm": 0.6047670841217041, + "learning_rate": 4.2211055276381914e-05, + "loss": 1.1633, + "step": 116 + }, + { + "epoch": 1.296398891966759, + "grad_norm": 0.6029966473579407, + "learning_rate": 4.170854271356784e-05, + "loss": 1.1269, + "step": 117 + }, + { + "epoch": 1.3074792243767313, + "grad_norm": 0.5472580790519714, + "learning_rate": 4.120603015075377e-05, + "loss": 1.0763, + "step": 118 + }, + { + "epoch": 1.3185595567867037, + "grad_norm": 0.5828231573104858, + "learning_rate": 4.07035175879397e-05, + "loss": 1.1432, + "step": 119 + }, + { + "epoch": 1.3296398891966759, + "grad_norm": 0.6450657844543457, + "learning_rate": 4.020100502512563e-05, + "loss": 1.2425, + "step": 120 + }, + { + "epoch": 1.3407202216066483, + "grad_norm": 0.6098791360855103, + "learning_rate": 3.969849246231156e-05, + "loss": 1.0802, + "step": 121 + }, + { + "epoch": 1.3518005540166205, + "grad_norm": 0.5955787897109985, + "learning_rate": 3.919597989949749e-05, + "loss": 1.0539, + "step": 122 + }, + { + "epoch": 1.3628808864265927, + "grad_norm": 0.651118814945221, + "learning_rate": 3.869346733668342e-05, + "loss": 1.2524, + "step": 123 + }, + { + "epoch": 1.373961218836565, + "grad_norm": 0.6121578216552734, + "learning_rate": 3.8190954773869346e-05, + "loss": 1.1569, + "step": 124 + }, + { + "epoch": 1.3850415512465375, + "grad_norm": 0.6717909574508667, + "learning_rate": 3.768844221105528e-05, + "loss": 1.2191, + "step": 125 + }, + { + "epoch": 1.3961218836565097, + "grad_norm": 0.6332257390022278, + "learning_rate": 3.7185929648241204e-05, + "loss": 1.1298, + "step": 126 + }, + { + "epoch": 1.4072022160664819, + "grad_norm": 0.6334303617477417, + "learning_rate": 3.668341708542714e-05, + "loss": 1.0351, + "step": 127 + }, + { + "epoch": 1.4182825484764543, + "grad_norm": 0.655293345451355, + "learning_rate": 3.618090452261307e-05, + "loss": 1.177, + "step": 128 + }, + { + "epoch": 1.4293628808864267, + "grad_norm": 0.6217477321624756, + "learning_rate": 3.5678391959798995e-05, + "loss": 1.1263, + "step": 129 + }, + { + "epoch": 1.440443213296399, + "grad_norm": 0.6945567727088928, + "learning_rate": 3.517587939698493e-05, + "loss": 1.2184, + "step": 130 + }, + { + "epoch": 1.451523545706371, + "grad_norm": 0.6126496195793152, + "learning_rate": 3.467336683417085e-05, + "loss": 1.0179, + "step": 131 + }, + { + "epoch": 1.4626038781163435, + "grad_norm": 0.6645523309707642, + "learning_rate": 3.4170854271356785e-05, + "loss": 1.1921, + "step": 132 + }, + { + "epoch": 1.4736842105263157, + "grad_norm": 0.65655916929245, + "learning_rate": 3.366834170854272e-05, + "loss": 1.1358, + "step": 133 + }, + { + "epoch": 1.4847645429362881, + "grad_norm": 0.6353817582130432, + "learning_rate": 3.3165829145728643e-05, + "loss": 1.0577, + "step": 134 + }, + { + "epoch": 1.4958448753462603, + "grad_norm": 0.6068665385246277, + "learning_rate": 3.2663316582914576e-05, + "loss": 1.1409, + "step": 135 + }, + { + "epoch": 1.5069252077562327, + "grad_norm": 0.6636937856674194, + "learning_rate": 3.21608040201005e-05, + "loss": 1.1746, + "step": 136 + }, + { + "epoch": 1.5180055401662051, + "grad_norm": 0.764776885509491, + "learning_rate": 3.1658291457286434e-05, + "loss": 1.2315, + "step": 137 + }, + { + "epoch": 1.5290858725761773, + "grad_norm": 0.6621037125587463, + "learning_rate": 3.1155778894472366e-05, + "loss": 1.122, + "step": 138 + }, + { + "epoch": 1.5401662049861495, + "grad_norm": 0.6436090469360352, + "learning_rate": 3.065326633165829e-05, + "loss": 1.0881, + "step": 139 + }, + { + "epoch": 1.5512465373961217, + "grad_norm": 0.6181167364120483, + "learning_rate": 3.015075376884422e-05, + "loss": 1.1523, + "step": 140 + }, + { + "epoch": 1.5623268698060941, + "grad_norm": 0.6687259674072266, + "learning_rate": 2.9648241206030153e-05, + "loss": 1.0796, + "step": 141 + }, + { + "epoch": 1.5734072022160666, + "grad_norm": 0.6394988298416138, + "learning_rate": 2.914572864321608e-05, + "loss": 1.1142, + "step": 142 + }, + { + "epoch": 1.5844875346260388, + "grad_norm": 0.6827041506767273, + "learning_rate": 2.8643216080402015e-05, + "loss": 1.1727, + "step": 143 + }, + { + "epoch": 1.595567867036011, + "grad_norm": 0.6124918460845947, + "learning_rate": 2.814070351758794e-05, + "loss": 1.037, + "step": 144 + }, + { + "epoch": 1.6066481994459834, + "grad_norm": 0.6157639622688293, + "learning_rate": 2.763819095477387e-05, + "loss": 1.0598, + "step": 145 + }, + { + "epoch": 1.6177285318559558, + "grad_norm": 0.667032778263092, + "learning_rate": 2.7135678391959802e-05, + "loss": 1.1222, + "step": 146 + }, + { + "epoch": 1.628808864265928, + "grad_norm": 0.7053226232528687, + "learning_rate": 2.6633165829145728e-05, + "loss": 1.2135, + "step": 147 + }, + { + "epoch": 1.6398891966759002, + "grad_norm": 0.7048087120056152, + "learning_rate": 2.613065326633166e-05, + "loss": 1.1588, + "step": 148 + }, + { + "epoch": 1.6509695290858726, + "grad_norm": 0.6474671959877014, + "learning_rate": 2.562814070351759e-05, + "loss": 1.0781, + "step": 149 + }, + { + "epoch": 1.662049861495845, + "grad_norm": 0.65389084815979, + "learning_rate": 2.5125628140703518e-05, + "loss": 1.0982, + "step": 150 + }, + { + "epoch": 1.6731301939058172, + "grad_norm": 0.6817282438278198, + "learning_rate": 2.462311557788945e-05, + "loss": 1.0967, + "step": 151 + }, + { + "epoch": 1.6842105263157894, + "grad_norm": 0.6355194449424744, + "learning_rate": 2.4120603015075376e-05, + "loss": 1.0279, + "step": 152 + }, + { + "epoch": 1.6952908587257618, + "grad_norm": 0.683628261089325, + "learning_rate": 2.361809045226131e-05, + "loss": 1.1533, + "step": 153 + }, + { + "epoch": 1.7063711911357342, + "grad_norm": 0.710445761680603, + "learning_rate": 2.3115577889447238e-05, + "loss": 1.182, + "step": 154 + }, + { + "epoch": 1.7174515235457064, + "grad_norm": 0.6544257998466492, + "learning_rate": 2.2613065326633167e-05, + "loss": 1.0936, + "step": 155 + }, + { + "epoch": 1.7285318559556786, + "grad_norm": 0.6538469791412354, + "learning_rate": 2.21105527638191e-05, + "loss": 1.0657, + "step": 156 + }, + { + "epoch": 1.739612188365651, + "grad_norm": 0.6852319836616516, + "learning_rate": 2.1608040201005025e-05, + "loss": 1.162, + "step": 157 + }, + { + "epoch": 1.7506925207756234, + "grad_norm": 0.6878906488418579, + "learning_rate": 2.1105527638190957e-05, + "loss": 1.0954, + "step": 158 + }, + { + "epoch": 1.7617728531855956, + "grad_norm": 0.7377908825874329, + "learning_rate": 2.0603015075376886e-05, + "loss": 1.0891, + "step": 159 + }, + { + "epoch": 1.7728531855955678, + "grad_norm": 0.6463127732276917, + "learning_rate": 2.0100502512562815e-05, + "loss": 1.0751, + "step": 160 + }, + { + "epoch": 1.78393351800554, + "grad_norm": 0.6408430337905884, + "learning_rate": 1.9597989949748744e-05, + "loss": 1.0861, + "step": 161 + }, + { + "epoch": 1.7950138504155124, + "grad_norm": 0.7149915099143982, + "learning_rate": 1.9095477386934673e-05, + "loss": 1.1801, + "step": 162 + }, + { + "epoch": 1.8060941828254848, + "grad_norm": 0.6683186888694763, + "learning_rate": 1.8592964824120602e-05, + "loss": 1.0473, + "step": 163 + }, + { + "epoch": 1.817174515235457, + "grad_norm": 0.6818321347236633, + "learning_rate": 1.8090452261306535e-05, + "loss": 1.0699, + "step": 164 + }, + { + "epoch": 1.8282548476454292, + "grad_norm": 0.7177061438560486, + "learning_rate": 1.7587939698492464e-05, + "loss": 1.12, + "step": 165 + }, + { + "epoch": 1.8393351800554016, + "grad_norm": 0.6971613168716431, + "learning_rate": 1.7085427135678393e-05, + "loss": 1.0919, + "step": 166 + }, + { + "epoch": 1.850415512465374, + "grad_norm": 0.6916444301605225, + "learning_rate": 1.6582914572864322e-05, + "loss": 1.0878, + "step": 167 + }, + { + "epoch": 1.8614958448753463, + "grad_norm": 0.6586862206459045, + "learning_rate": 1.608040201005025e-05, + "loss": 0.9554, + "step": 168 + }, + { + "epoch": 1.8725761772853184, + "grad_norm": 0.7246650457382202, + "learning_rate": 1.5577889447236183e-05, + "loss": 1.1607, + "step": 169 + }, + { + "epoch": 1.8836565096952909, + "grad_norm": 0.761667013168335, + "learning_rate": 1.507537688442211e-05, + "loss": 1.1838, + "step": 170 + }, + { + "epoch": 1.8947368421052633, + "grad_norm": 0.6918529868125916, + "learning_rate": 1.457286432160804e-05, + "loss": 1.1148, + "step": 171 + }, + { + "epoch": 1.9058171745152355, + "grad_norm": 0.7272382378578186, + "learning_rate": 1.407035175879397e-05, + "loss": 1.1342, + "step": 172 + }, + { + "epoch": 1.9168975069252077, + "grad_norm": 0.6923725008964539, + "learning_rate": 1.3567839195979901e-05, + "loss": 1.0149, + "step": 173 + }, + { + "epoch": 1.92797783933518, + "grad_norm": 0.7408672571182251, + "learning_rate": 1.306532663316583e-05, + "loss": 1.148, + "step": 174 + }, + { + "epoch": 1.9390581717451525, + "grad_norm": 0.6574199795722961, + "learning_rate": 1.2562814070351759e-05, + "loss": 1.0415, + "step": 175 + }, + { + "epoch": 1.9501385041551247, + "grad_norm": 0.721842885017395, + "learning_rate": 1.2060301507537688e-05, + "loss": 1.2188, + "step": 176 + }, + { + "epoch": 1.9612188365650969, + "grad_norm": 0.7084245681762695, + "learning_rate": 1.1557788944723619e-05, + "loss": 1.1464, + "step": 177 + }, + { + "epoch": 1.9722991689750693, + "grad_norm": 0.6412287354469299, + "learning_rate": 1.105527638190955e-05, + "loss": 0.9909, + "step": 178 + }, + { + "epoch": 1.9833795013850417, + "grad_norm": 0.7143056988716125, + "learning_rate": 1.0552763819095479e-05, + "loss": 1.1627, + "step": 179 + }, + { + "epoch": 1.994459833795014, + "grad_norm": 0.6697918772697449, + "learning_rate": 1.0050251256281408e-05, + "loss": 1.0892, + "step": 180 + }, + { + "epoch": 2.005540166204986, + "grad_norm": 1.8020601272583008, + "learning_rate": 9.547738693467337e-06, + "loss": 1.984, + "step": 181 + }, + { + "epoch": 2.0166204986149583, + "grad_norm": 0.7190349102020264, + "learning_rate": 9.045226130653267e-06, + "loss": 1.0837, + "step": 182 + }, + { + "epoch": 2.027700831024931, + "grad_norm": 0.6655665040016174, + "learning_rate": 8.542713567839196e-06, + "loss": 0.9812, + "step": 183 + }, + { + "epoch": 2.038781163434903, + "grad_norm": 0.7195249795913696, + "learning_rate": 8.040201005025125e-06, + "loss": 1.1748, + "step": 184 + }, + { + "epoch": 2.0498614958448753, + "grad_norm": 0.6796366572380066, + "learning_rate": 7.537688442211055e-06, + "loss": 1.07, + "step": 185 + }, + { + "epoch": 2.0609418282548475, + "grad_norm": 0.679729700088501, + "learning_rate": 7.035175879396985e-06, + "loss": 0.996, + "step": 186 + }, + { + "epoch": 2.07202216066482, + "grad_norm": 0.6805736422538757, + "learning_rate": 6.532663316582915e-06, + "loss": 1.0531, + "step": 187 + }, + { + "epoch": 2.0831024930747923, + "grad_norm": 0.698531448841095, + "learning_rate": 6.030150753768844e-06, + "loss": 1.1163, + "step": 188 + }, + { + "epoch": 2.0941828254847645, + "grad_norm": 0.6734586358070374, + "learning_rate": 5.527638190954775e-06, + "loss": 1.0584, + "step": 189 + }, + { + "epoch": 2.1052631578947367, + "grad_norm": 0.6843752264976501, + "learning_rate": 5.025125628140704e-06, + "loss": 1.0854, + "step": 190 + }, + { + "epoch": 2.1163434903047094, + "grad_norm": 0.6570263504981995, + "learning_rate": 4.522613065326634e-06, + "loss": 1.016, + "step": 191 + }, + { + "epoch": 2.1274238227146816, + "grad_norm": 0.707304060459137, + "learning_rate": 4.020100502512563e-06, + "loss": 1.0311, + "step": 192 + }, + { + "epoch": 2.1385041551246537, + "grad_norm": 0.7010348439216614, + "learning_rate": 3.5175879396984926e-06, + "loss": 1.1732, + "step": 193 + }, + { + "epoch": 2.149584487534626, + "grad_norm": 0.6604486703872681, + "learning_rate": 3.015075376884422e-06, + "loss": 1.012, + "step": 194 + }, + { + "epoch": 2.160664819944598, + "grad_norm": 0.6502628922462463, + "learning_rate": 2.512562814070352e-06, + "loss": 0.9839, + "step": 195 + }, + { + "epoch": 2.1717451523545708, + "grad_norm": 0.6600444912910461, + "learning_rate": 2.0100502512562813e-06, + "loss": 1.0509, + "step": 196 + }, + { + "epoch": 2.182825484764543, + "grad_norm": 0.69642573595047, + "learning_rate": 1.507537688442211e-06, + "loss": 1.1109, + "step": 197 + }, + { + "epoch": 2.193905817174515, + "grad_norm": 0.691315233707428, + "learning_rate": 1.0050251256281407e-06, + "loss": 1.1067, + "step": 198 + }, + { + "epoch": 2.2049861495844874, + "grad_norm": 0.6788798570632935, + "learning_rate": 5.025125628140703e-07, + "loss": 1.0564, + "step": 199 + }, + { + "epoch": 2.21606648199446, + "grad_norm": 0.6888399124145508, + "learning_rate": 0.0, + "loss": 1.0406, + "step": 200 + } + ], + "logging_steps": 1, + "max_steps": 200, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 2.18753507500032e+16, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/llama_German_English/checkpoint-200/training_args.bin b/llama_German_English/checkpoint-200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..2d1a238875405d6c94389df89b1eb8769ed6f6f8 --- /dev/null +++ b/llama_German_English/checkpoint-200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3fbffb80a479646f00b55bfb2b5a23ac4f71ea8d9e984c62e08b76b644a02fe +size 5624 diff --git a/llama_German_French/checkpoint-200/README.md b/llama_German_French/checkpoint-200/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ec91fdd050e2a5613ef04bb6c8fc77a63b8b10a1 --- /dev/null +++ b/llama_German_French/checkpoint-200/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.14.0 \ No newline at end of file diff --git a/llama_German_French/checkpoint-200/adapter_config.json b/llama_German_French/checkpoint-200/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..84a49652299ec105a4ad02bb6f4756d589e1f648 --- /dev/null +++ b/llama_German_French/checkpoint-200/adapter_config.json @@ -0,0 +1,37 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit", + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "gate_proj", + "q_proj", + "o_proj", + "up_proj", + "k_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/llama_German_French/checkpoint-200/adapter_model.safetensors b/llama_German_French/checkpoint-200/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c505b031e019241bf09203b53f64b1a99542efa3 --- /dev/null +++ b/llama_German_French/checkpoint-200/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cb46c247fa21b0738f88437187f4682a030f798272627a1099c7f3ade3b43f1 +size 167832240 diff --git a/llama_German_French/checkpoint-200/optimizer.pt b/llama_German_French/checkpoint-200/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..de1852128807f36ca2fa67b75bd2c06242e0706b --- /dev/null +++ b/llama_German_French/checkpoint-200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d911fd4030f5d0cc739b8b641bae506a42c2fd7757c9bb8195d43679e51e1ef +size 85723284 diff --git a/llama_German_French/checkpoint-200/rng_state.pth b/llama_German_French/checkpoint-200/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..f21f2ff1c1a816463781d51760f8156e041f5979 --- /dev/null +++ b/llama_German_French/checkpoint-200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4981123ff3cf7bd5b7f76839e90e4776f747ca4c38dcb41876fa010c0dea8b23 +size 14244 diff --git a/llama_German_French/checkpoint-200/scheduler.pt b/llama_German_French/checkpoint-200/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6ad11d6d5288d2841ba96a6d7d3838c80c80097a --- /dev/null +++ b/llama_German_French/checkpoint-200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96 +size 1064 diff --git a/llama_German_French/checkpoint-200/special_tokens_map.json b/llama_German_French/checkpoint-200/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/llama_German_French/checkpoint-200/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/llama_German_French/checkpoint-200/tokenizer.json b/llama_German_French/checkpoint-200/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/llama_German_French/checkpoint-200/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/llama_German_French/checkpoint-200/tokenizer_config.json b/llama_German_French/checkpoint-200/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..447bd30f21e8c20e9e6497d6cfc650a3881e26a6 --- /dev/null +++ b/llama_German_French/checkpoint-200/tokenizer_config.json @@ -0,0 +1,2064 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/llama_German_French/checkpoint-200/trainer_state.json b/llama_German_French/checkpoint-200/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..7bd312d8622a5d35f1e65b7754a0e974a870bc3c --- /dev/null +++ b/llama_German_French/checkpoint-200/trainer_state.json @@ -0,0 +1,1433 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.21606648199446, + "eval_steps": 500, + "global_step": 200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0110803324099723, + "grad_norm": 0.9539678692817688, + "learning_rate": 0.0001, + "loss": 2.3989, + "step": 1 + }, + { + "epoch": 0.0221606648199446, + "grad_norm": 0.9507014751434326, + "learning_rate": 9.949748743718594e-05, + "loss": 2.3707, + "step": 2 + }, + { + "epoch": 0.0332409972299169, + "grad_norm": 0.9412204027175903, + "learning_rate": 9.899497487437186e-05, + "loss": 2.3376, + "step": 3 + }, + { + "epoch": 0.0443213296398892, + "grad_norm": 0.9634190797805786, + "learning_rate": 9.84924623115578e-05, + "loss": 2.2129, + "step": 4 + }, + { + "epoch": 0.055401662049861494, + "grad_norm": 0.9230924844741821, + "learning_rate": 9.798994974874372e-05, + "loss": 2.0629, + "step": 5 + }, + { + "epoch": 0.0664819944598338, + "grad_norm": 1.073400855064392, + "learning_rate": 9.748743718592965e-05, + "loss": 1.9289, + "step": 6 + }, + { + "epoch": 0.07756232686980609, + "grad_norm": 1.012299656867981, + "learning_rate": 9.698492462311559e-05, + "loss": 1.8658, + "step": 7 + }, + { + "epoch": 0.0886426592797784, + "grad_norm": 1.1373164653778076, + "learning_rate": 9.64824120603015e-05, + "loss": 1.6656, + "step": 8 + }, + { + "epoch": 0.0997229916897507, + "grad_norm": 0.7563988566398621, + "learning_rate": 9.597989949748745e-05, + "loss": 1.5606, + "step": 9 + }, + { + "epoch": 0.11080332409972299, + "grad_norm": 0.7940493822097778, + "learning_rate": 9.547738693467337e-05, + "loss": 1.445, + "step": 10 + }, + { + "epoch": 0.12188365650969529, + "grad_norm": 0.760132908821106, + "learning_rate": 9.49748743718593e-05, + "loss": 1.4138, + "step": 11 + }, + { + "epoch": 0.1329639889196676, + "grad_norm": 0.6393184065818787, + "learning_rate": 9.447236180904523e-05, + "loss": 1.416, + "step": 12 + }, + { + "epoch": 0.1440443213296399, + "grad_norm": 0.4566696584224701, + "learning_rate": 9.396984924623115e-05, + "loss": 1.316, + "step": 13 + }, + { + "epoch": 0.15512465373961218, + "grad_norm": 0.4536629021167755, + "learning_rate": 9.34673366834171e-05, + "loss": 1.336, + "step": 14 + }, + { + "epoch": 0.16620498614958448, + "grad_norm": 0.4461921453475952, + "learning_rate": 9.296482412060302e-05, + "loss": 1.1758, + "step": 15 + }, + { + "epoch": 0.1772853185595568, + "grad_norm": 0.48644697666168213, + "learning_rate": 9.246231155778895e-05, + "loss": 1.3097, + "step": 16 + }, + { + "epoch": 0.1883656509695291, + "grad_norm": 0.527540922164917, + "learning_rate": 9.195979899497488e-05, + "loss": 1.3245, + "step": 17 + }, + { + "epoch": 0.1994459833795014, + "grad_norm": 0.47114598751068115, + "learning_rate": 9.14572864321608e-05, + "loss": 1.2954, + "step": 18 + }, + { + "epoch": 0.21052631578947367, + "grad_norm": 0.48365527391433716, + "learning_rate": 9.095477386934675e-05, + "loss": 1.2919, + "step": 19 + }, + { + "epoch": 0.22160664819944598, + "grad_norm": 0.5727285742759705, + "learning_rate": 9.045226130653267e-05, + "loss": 1.367, + "step": 20 + }, + { + "epoch": 0.23268698060941828, + "grad_norm": 0.5801117420196533, + "learning_rate": 8.99497487437186e-05, + "loss": 1.1397, + "step": 21 + }, + { + "epoch": 0.24376731301939059, + "grad_norm": 0.5830456614494324, + "learning_rate": 8.944723618090453e-05, + "loss": 1.2724, + "step": 22 + }, + { + "epoch": 0.2548476454293629, + "grad_norm": 0.5610237121582031, + "learning_rate": 8.894472361809045e-05, + "loss": 1.3623, + "step": 23 + }, + { + "epoch": 0.2659279778393352, + "grad_norm": 0.5642468333244324, + "learning_rate": 8.84422110552764e-05, + "loss": 1.2727, + "step": 24 + }, + { + "epoch": 0.2770083102493075, + "grad_norm": 0.5839318633079529, + "learning_rate": 8.793969849246232e-05, + "loss": 1.3696, + "step": 25 + }, + { + "epoch": 0.2880886426592798, + "grad_norm": 0.6226404905319214, + "learning_rate": 8.743718592964825e-05, + "loss": 1.3249, + "step": 26 + }, + { + "epoch": 0.29916897506925205, + "grad_norm": 0.6644773483276367, + "learning_rate": 8.693467336683418e-05, + "loss": 1.3544, + "step": 27 + }, + { + "epoch": 0.31024930747922436, + "grad_norm": 1.2361204624176025, + "learning_rate": 8.64321608040201e-05, + "loss": 1.258, + "step": 28 + }, + { + "epoch": 0.32132963988919666, + "grad_norm": 0.6459571123123169, + "learning_rate": 8.592964824120603e-05, + "loss": 1.2721, + "step": 29 + }, + { + "epoch": 0.33240997229916897, + "grad_norm": 0.6955879330635071, + "learning_rate": 8.542713567839196e-05, + "loss": 1.3075, + "step": 30 + }, + { + "epoch": 0.34349030470914127, + "grad_norm": 0.647169291973114, + "learning_rate": 8.49246231155779e-05, + "loss": 1.1705, + "step": 31 + }, + { + "epoch": 0.3545706371191136, + "grad_norm": 0.7589477896690369, + "learning_rate": 8.442211055276383e-05, + "loss": 1.1657, + "step": 32 + }, + { + "epoch": 0.3656509695290859, + "grad_norm": 0.7796575427055359, + "learning_rate": 8.391959798994975e-05, + "loss": 1.24, + "step": 33 + }, + { + "epoch": 0.3767313019390582, + "grad_norm": 0.8446609377861023, + "learning_rate": 8.341708542713568e-05, + "loss": 1.2323, + "step": 34 + }, + { + "epoch": 0.3878116343490305, + "grad_norm": 0.743309736251831, + "learning_rate": 8.291457286432161e-05, + "loss": 1.1753, + "step": 35 + }, + { + "epoch": 0.3988919667590028, + "grad_norm": 0.8793002367019653, + "learning_rate": 8.241206030150754e-05, + "loss": 1.2718, + "step": 36 + }, + { + "epoch": 0.4099722991689751, + "grad_norm": 0.8249404430389404, + "learning_rate": 8.190954773869348e-05, + "loss": 1.2402, + "step": 37 + }, + { + "epoch": 0.42105263157894735, + "grad_norm": 0.7260106205940247, + "learning_rate": 8.14070351758794e-05, + "loss": 1.2394, + "step": 38 + }, + { + "epoch": 0.43213296398891965, + "grad_norm": 0.624657928943634, + "learning_rate": 8.090452261306533e-05, + "loss": 1.1727, + "step": 39 + }, + { + "epoch": 0.44321329639889195, + "grad_norm": 0.5977988839149475, + "learning_rate": 8.040201005025126e-05, + "loss": 1.177, + "step": 40 + }, + { + "epoch": 0.45429362880886426, + "grad_norm": 0.485361248254776, + "learning_rate": 7.989949748743719e-05, + "loss": 1.2467, + "step": 41 + }, + { + "epoch": 0.46537396121883656, + "grad_norm": 0.5094634294509888, + "learning_rate": 7.939698492462313e-05, + "loss": 1.2485, + "step": 42 + }, + { + "epoch": 0.47645429362880887, + "grad_norm": 0.5027374625205994, + "learning_rate": 7.889447236180904e-05, + "loss": 1.2194, + "step": 43 + }, + { + "epoch": 0.48753462603878117, + "grad_norm": 0.49862539768218994, + "learning_rate": 7.839195979899498e-05, + "loss": 1.1967, + "step": 44 + }, + { + "epoch": 0.4986149584487535, + "grad_norm": 0.49750617146492004, + "learning_rate": 7.788944723618091e-05, + "loss": 1.1373, + "step": 45 + }, + { + "epoch": 0.5096952908587258, + "grad_norm": 0.4608665704727173, + "learning_rate": 7.738693467336684e-05, + "loss": 1.1442, + "step": 46 + }, + { + "epoch": 0.5207756232686981, + "grad_norm": 0.4939401149749756, + "learning_rate": 7.688442211055277e-05, + "loss": 1.233, + "step": 47 + }, + { + "epoch": 0.5318559556786704, + "grad_norm": 0.45019057393074036, + "learning_rate": 7.638190954773869e-05, + "loss": 1.1596, + "step": 48 + }, + { + "epoch": 0.5429362880886427, + "grad_norm": 0.4512038826942444, + "learning_rate": 7.587939698492463e-05, + "loss": 1.2198, + "step": 49 + }, + { + "epoch": 0.554016620498615, + "grad_norm": 0.5084260106086731, + "learning_rate": 7.537688442211056e-05, + "loss": 1.1293, + "step": 50 + }, + { + "epoch": 0.5650969529085873, + "grad_norm": 0.45852744579315186, + "learning_rate": 7.487437185929649e-05, + "loss": 1.1258, + "step": 51 + }, + { + "epoch": 0.5761772853185596, + "grad_norm": 0.559191882610321, + "learning_rate": 7.437185929648241e-05, + "loss": 1.1825, + "step": 52 + }, + { + "epoch": 0.5872576177285319, + "grad_norm": 0.4626789689064026, + "learning_rate": 7.386934673366834e-05, + "loss": 1.1882, + "step": 53 + }, + { + "epoch": 0.5983379501385041, + "grad_norm": 0.4773663580417633, + "learning_rate": 7.336683417085427e-05, + "loss": 1.2021, + "step": 54 + }, + { + "epoch": 0.6094182825484764, + "grad_norm": 0.4846740663051605, + "learning_rate": 7.28643216080402e-05, + "loss": 1.2684, + "step": 55 + }, + { + "epoch": 0.6204986149584487, + "grad_norm": 0.48669999837875366, + "learning_rate": 7.236180904522614e-05, + "loss": 1.1904, + "step": 56 + }, + { + "epoch": 0.631578947368421, + "grad_norm": 0.5055761337280273, + "learning_rate": 7.185929648241206e-05, + "loss": 1.2323, + "step": 57 + }, + { + "epoch": 0.6426592797783933, + "grad_norm": 0.4473815858364105, + "learning_rate": 7.135678391959799e-05, + "loss": 1.2512, + "step": 58 + }, + { + "epoch": 0.6537396121883656, + "grad_norm": 0.4710114896297455, + "learning_rate": 7.085427135678392e-05, + "loss": 1.1553, + "step": 59 + }, + { + "epoch": 0.6648199445983379, + "grad_norm": 0.4998447299003601, + "learning_rate": 7.035175879396985e-05, + "loss": 1.0993, + "step": 60 + }, + { + "epoch": 0.6759002770083102, + "grad_norm": 0.44559937715530396, + "learning_rate": 6.984924623115579e-05, + "loss": 1.1707, + "step": 61 + }, + { + "epoch": 0.6869806094182825, + "grad_norm": 0.5074825286865234, + "learning_rate": 6.93467336683417e-05, + "loss": 1.2061, + "step": 62 + }, + { + "epoch": 0.6980609418282548, + "grad_norm": 0.43217408657073975, + "learning_rate": 6.884422110552764e-05, + "loss": 1.0772, + "step": 63 + }, + { + "epoch": 0.7091412742382271, + "grad_norm": 0.5155186057090759, + "learning_rate": 6.834170854271357e-05, + "loss": 1.2402, + "step": 64 + }, + { + "epoch": 0.7202216066481995, + "grad_norm": 0.4830930233001709, + "learning_rate": 6.78391959798995e-05, + "loss": 1.0951, + "step": 65 + }, + { + "epoch": 0.7313019390581718, + "grad_norm": 0.4575657248497009, + "learning_rate": 6.733668341708544e-05, + "loss": 1.129, + "step": 66 + }, + { + "epoch": 0.7423822714681441, + "grad_norm": 0.5293765068054199, + "learning_rate": 6.683417085427135e-05, + "loss": 1.0968, + "step": 67 + }, + { + "epoch": 0.7534626038781164, + "grad_norm": 0.502807080745697, + "learning_rate": 6.633165829145729e-05, + "loss": 1.1611, + "step": 68 + }, + { + "epoch": 0.7645429362880887, + "grad_norm": 0.4874890148639679, + "learning_rate": 6.582914572864322e-05, + "loss": 1.1821, + "step": 69 + }, + { + "epoch": 0.775623268698061, + "grad_norm": 0.45496171712875366, + "learning_rate": 6.532663316582915e-05, + "loss": 1.2181, + "step": 70 + }, + { + "epoch": 0.7867036011080333, + "grad_norm": 0.5249992609024048, + "learning_rate": 6.482412060301508e-05, + "loss": 1.2121, + "step": 71 + }, + { + "epoch": 0.7977839335180056, + "grad_norm": 0.5168310403823853, + "learning_rate": 6.4321608040201e-05, + "loss": 1.1298, + "step": 72 + }, + { + "epoch": 0.8088642659279779, + "grad_norm": 0.4819679856300354, + "learning_rate": 6.381909547738694e-05, + "loss": 1.1217, + "step": 73 + }, + { + "epoch": 0.8199445983379502, + "grad_norm": 0.4629118740558624, + "learning_rate": 6.331658291457287e-05, + "loss": 1.154, + "step": 74 + }, + { + "epoch": 0.8310249307479224, + "grad_norm": 0.4716959595680237, + "learning_rate": 6.28140703517588e-05, + "loss": 1.1675, + "step": 75 + }, + { + "epoch": 0.8421052631578947, + "grad_norm": 0.5164201855659485, + "learning_rate": 6.231155778894473e-05, + "loss": 1.2663, + "step": 76 + }, + { + "epoch": 0.853185595567867, + "grad_norm": 0.5236655473709106, + "learning_rate": 6.180904522613065e-05, + "loss": 1.1796, + "step": 77 + }, + { + "epoch": 0.8642659279778393, + "grad_norm": 0.49086111783981323, + "learning_rate": 6.130653266331658e-05, + "loss": 1.1118, + "step": 78 + }, + { + "epoch": 0.8753462603878116, + "grad_norm": 0.44578009843826294, + "learning_rate": 6.080402010050251e-05, + "loss": 1.1148, + "step": 79 + }, + { + "epoch": 0.8864265927977839, + "grad_norm": 0.5272083282470703, + "learning_rate": 6.030150753768844e-05, + "loss": 1.1604, + "step": 80 + }, + { + "epoch": 0.8975069252077562, + "grad_norm": 0.4991357624530792, + "learning_rate": 5.979899497487438e-05, + "loss": 1.1723, + "step": 81 + }, + { + "epoch": 0.9085872576177285, + "grad_norm": 0.527912437915802, + "learning_rate": 5.929648241206031e-05, + "loss": 1.1998, + "step": 82 + }, + { + "epoch": 0.9196675900277008, + "grad_norm": 0.4923700988292694, + "learning_rate": 5.879396984924623e-05, + "loss": 1.1185, + "step": 83 + }, + { + "epoch": 0.9307479224376731, + "grad_norm": 0.44895341992378235, + "learning_rate": 5.829145728643216e-05, + "loss": 1.1535, + "step": 84 + }, + { + "epoch": 0.9418282548476454, + "grad_norm": 0.4726865887641907, + "learning_rate": 5.778894472361809e-05, + "loss": 1.1536, + "step": 85 + }, + { + "epoch": 0.9529085872576177, + "grad_norm": 0.5040445327758789, + "learning_rate": 5.728643216080403e-05, + "loss": 1.2675, + "step": 86 + }, + { + "epoch": 0.96398891966759, + "grad_norm": 0.5474406480789185, + "learning_rate": 5.6783919597989955e-05, + "loss": 1.1631, + "step": 87 + }, + { + "epoch": 0.9750692520775623, + "grad_norm": 0.5047523975372314, + "learning_rate": 5.628140703517588e-05, + "loss": 1.1449, + "step": 88 + }, + { + "epoch": 0.9861495844875346, + "grad_norm": 0.5345660448074341, + "learning_rate": 5.577889447236181e-05, + "loss": 1.1334, + "step": 89 + }, + { + "epoch": 0.997229916897507, + "grad_norm": 0.5445220470428467, + "learning_rate": 5.527638190954774e-05, + "loss": 1.1653, + "step": 90 + }, + { + "epoch": 1.0083102493074791, + "grad_norm": 1.281954050064087, + "learning_rate": 5.477386934673368e-05, + "loss": 1.9666, + "step": 91 + }, + { + "epoch": 1.0193905817174516, + "grad_norm": 0.4604099690914154, + "learning_rate": 5.4271356783919604e-05, + "loss": 1.1033, + "step": 92 + }, + { + "epoch": 1.0304709141274238, + "grad_norm": 0.5462485551834106, + "learning_rate": 5.376884422110553e-05, + "loss": 1.3182, + "step": 93 + }, + { + "epoch": 1.0415512465373962, + "grad_norm": 0.45567235350608826, + "learning_rate": 5.3266331658291455e-05, + "loss": 1.0226, + "step": 94 + }, + { + "epoch": 1.0526315789473684, + "grad_norm": 0.4756138324737549, + "learning_rate": 5.276381909547739e-05, + "loss": 1.1378, + "step": 95 + }, + { + "epoch": 1.0637119113573408, + "grad_norm": 0.5044397711753845, + "learning_rate": 5.226130653266332e-05, + "loss": 1.0873, + "step": 96 + }, + { + "epoch": 1.074792243767313, + "grad_norm": 0.5245856642723083, + "learning_rate": 5.175879396984925e-05, + "loss": 1.145, + "step": 97 + }, + { + "epoch": 1.0858725761772854, + "grad_norm": 0.47712233662605286, + "learning_rate": 5.125628140703518e-05, + "loss": 1.0784, + "step": 98 + }, + { + "epoch": 1.0969529085872576, + "grad_norm": 0.5153785347938538, + "learning_rate": 5.0753768844221104e-05, + "loss": 1.1586, + "step": 99 + }, + { + "epoch": 1.10803324099723, + "grad_norm": 0.5337215065956116, + "learning_rate": 5.0251256281407036e-05, + "loss": 1.2157, + "step": 100 + }, + { + "epoch": 1.1191135734072022, + "grad_norm": 0.513725996017456, + "learning_rate": 4.974874371859297e-05, + "loss": 1.0565, + "step": 101 + }, + { + "epoch": 1.1301939058171746, + "grad_norm": 0.5424889326095581, + "learning_rate": 4.92462311557789e-05, + "loss": 1.1006, + "step": 102 + }, + { + "epoch": 1.1412742382271468, + "grad_norm": 0.5333667993545532, + "learning_rate": 4.874371859296483e-05, + "loss": 1.0599, + "step": 103 + }, + { + "epoch": 1.1523545706371192, + "grad_norm": 0.48299750685691833, + "learning_rate": 4.824120603015075e-05, + "loss": 0.9029, + "step": 104 + }, + { + "epoch": 1.1634349030470914, + "grad_norm": 0.5714932680130005, + "learning_rate": 4.7738693467336685e-05, + "loss": 1.0728, + "step": 105 + }, + { + "epoch": 1.1745152354570636, + "grad_norm": 0.548978328704834, + "learning_rate": 4.723618090452262e-05, + "loss": 1.1746, + "step": 106 + }, + { + "epoch": 1.185595567867036, + "grad_norm": 0.5985714793205261, + "learning_rate": 4.673366834170855e-05, + "loss": 1.0902, + "step": 107 + }, + { + "epoch": 1.1966759002770084, + "grad_norm": 0.5157080292701721, + "learning_rate": 4.6231155778894475e-05, + "loss": 1.0568, + "step": 108 + }, + { + "epoch": 1.2077562326869806, + "grad_norm": 0.575770914554596, + "learning_rate": 4.57286432160804e-05, + "loss": 1.1476, + "step": 109 + }, + { + "epoch": 1.2188365650969528, + "grad_norm": 0.5768939256668091, + "learning_rate": 4.522613065326633e-05, + "loss": 1.1444, + "step": 110 + }, + { + "epoch": 1.2299168975069252, + "grad_norm": 0.5460391640663147, + "learning_rate": 4.4723618090452266e-05, + "loss": 1.1532, + "step": 111 + }, + { + "epoch": 1.2409972299168974, + "grad_norm": 0.5260967016220093, + "learning_rate": 4.42211055276382e-05, + "loss": 1.075, + "step": 112 + }, + { + "epoch": 1.2520775623268698, + "grad_norm": 0.5274809002876282, + "learning_rate": 4.3718592964824124e-05, + "loss": 1.1017, + "step": 113 + }, + { + "epoch": 1.263157894736842, + "grad_norm": 0.6482009887695312, + "learning_rate": 4.321608040201005e-05, + "loss": 1.2008, + "step": 114 + }, + { + "epoch": 1.2742382271468145, + "grad_norm": 0.6299459934234619, + "learning_rate": 4.271356783919598e-05, + "loss": 1.085, + "step": 115 + }, + { + "epoch": 1.2853185595567866, + "grad_norm": 0.5909615159034729, + "learning_rate": 4.2211055276381914e-05, + "loss": 1.1303, + "step": 116 + }, + { + "epoch": 1.296398891966759, + "grad_norm": 0.5842643976211548, + "learning_rate": 4.170854271356784e-05, + "loss": 1.0929, + "step": 117 + }, + { + "epoch": 1.3074792243767313, + "grad_norm": 0.5436193346977234, + "learning_rate": 4.120603015075377e-05, + "loss": 1.0365, + "step": 118 + }, + { + "epoch": 1.3185595567867037, + "grad_norm": 0.5717719197273254, + "learning_rate": 4.07035175879397e-05, + "loss": 1.0925, + "step": 119 + }, + { + "epoch": 1.3296398891966759, + "grad_norm": 0.6517003774642944, + "learning_rate": 4.020100502512563e-05, + "loss": 1.1772, + "step": 120 + }, + { + "epoch": 1.3407202216066483, + "grad_norm": 0.5689294338226318, + "learning_rate": 3.969849246231156e-05, + "loss": 1.0195, + "step": 121 + }, + { + "epoch": 1.3518005540166205, + "grad_norm": 0.5608482360839844, + "learning_rate": 3.919597989949749e-05, + "loss": 0.991, + "step": 122 + }, + { + "epoch": 1.3628808864265927, + "grad_norm": 0.6248909831047058, + "learning_rate": 3.869346733668342e-05, + "loss": 1.2159, + "step": 123 + }, + { + "epoch": 1.373961218836565, + "grad_norm": 0.5770443081855774, + "learning_rate": 3.8190954773869346e-05, + "loss": 1.1003, + "step": 124 + }, + { + "epoch": 1.3850415512465375, + "grad_norm": 0.6343099474906921, + "learning_rate": 3.768844221105528e-05, + "loss": 1.1453, + "step": 125 + }, + { + "epoch": 1.3961218836565097, + "grad_norm": 0.6156879663467407, + "learning_rate": 3.7185929648241204e-05, + "loss": 1.107, + "step": 126 + }, + { + "epoch": 1.4072022160664819, + "grad_norm": 0.6195124983787537, + "learning_rate": 3.668341708542714e-05, + "loss": 1.0073, + "step": 127 + }, + { + "epoch": 1.4182825484764543, + "grad_norm": 0.6385990977287292, + "learning_rate": 3.618090452261307e-05, + "loss": 1.1386, + "step": 128 + }, + { + "epoch": 1.4293628808864267, + "grad_norm": 0.6045225262641907, + "learning_rate": 3.5678391959798995e-05, + "loss": 1.0795, + "step": 129 + }, + { + "epoch": 1.440443213296399, + "grad_norm": 0.6713436245918274, + "learning_rate": 3.517587939698493e-05, + "loss": 1.1575, + "step": 130 + }, + { + "epoch": 1.451523545706371, + "grad_norm": 0.575412929058075, + "learning_rate": 3.467336683417085e-05, + "loss": 0.9738, + "step": 131 + }, + { + "epoch": 1.4626038781163435, + "grad_norm": 0.6234104633331299, + "learning_rate": 3.4170854271356785e-05, + "loss": 1.1082, + "step": 132 + }, + { + "epoch": 1.4736842105263157, + "grad_norm": 0.6303395628929138, + "learning_rate": 3.366834170854272e-05, + "loss": 1.0774, + "step": 133 + }, + { + "epoch": 1.4847645429362881, + "grad_norm": 0.6352152228355408, + "learning_rate": 3.3165829145728643e-05, + "loss": 1.0167, + "step": 134 + }, + { + "epoch": 1.4958448753462603, + "grad_norm": 0.615987241268158, + "learning_rate": 3.2663316582914576e-05, + "loss": 1.084, + "step": 135 + }, + { + "epoch": 1.5069252077562327, + "grad_norm": 0.6664732098579407, + "learning_rate": 3.21608040201005e-05, + "loss": 1.0985, + "step": 136 + }, + { + "epoch": 1.5180055401662051, + "grad_norm": 0.7383134961128235, + "learning_rate": 3.1658291457286434e-05, + "loss": 1.1818, + "step": 137 + }, + { + "epoch": 1.5290858725761773, + "grad_norm": 0.6412088871002197, + "learning_rate": 3.1155778894472366e-05, + "loss": 1.0773, + "step": 138 + }, + { + "epoch": 1.5401662049861495, + "grad_norm": 0.6358785033226013, + "learning_rate": 3.065326633165829e-05, + "loss": 1.0402, + "step": 139 + }, + { + "epoch": 1.5512465373961217, + "grad_norm": 0.6523603200912476, + "learning_rate": 3.015075376884422e-05, + "loss": 1.1136, + "step": 140 + }, + { + "epoch": 1.5623268698060941, + "grad_norm": 0.6411638259887695, + "learning_rate": 2.9648241206030153e-05, + "loss": 1.051, + "step": 141 + }, + { + "epoch": 1.5734072022160666, + "grad_norm": 0.6304634809494019, + "learning_rate": 2.914572864321608e-05, + "loss": 1.0714, + "step": 142 + }, + { + "epoch": 1.5844875346260388, + "grad_norm": 0.6691243052482605, + "learning_rate": 2.8643216080402015e-05, + "loss": 1.1276, + "step": 143 + }, + { + "epoch": 1.595567867036011, + "grad_norm": 0.590101957321167, + "learning_rate": 2.814070351758794e-05, + "loss": 0.9619, + "step": 144 + }, + { + "epoch": 1.6066481994459834, + "grad_norm": 0.5995690226554871, + "learning_rate": 2.763819095477387e-05, + "loss": 1.003, + "step": 145 + }, + { + "epoch": 1.6177285318559558, + "grad_norm": 0.6279429793357849, + "learning_rate": 2.7135678391959802e-05, + "loss": 1.0711, + "step": 146 + }, + { + "epoch": 1.628808864265928, + "grad_norm": 0.6602685451507568, + "learning_rate": 2.6633165829145728e-05, + "loss": 1.1766, + "step": 147 + }, + { + "epoch": 1.6398891966759002, + "grad_norm": 0.7377718687057495, + "learning_rate": 2.613065326633166e-05, + "loss": 1.1155, + "step": 148 + }, + { + "epoch": 1.6509695290858726, + "grad_norm": 0.6458079814910889, + "learning_rate": 2.562814070351759e-05, + "loss": 1.0323, + "step": 149 + }, + { + "epoch": 1.662049861495845, + "grad_norm": 0.6258276700973511, + "learning_rate": 2.5125628140703518e-05, + "loss": 1.0364, + "step": 150 + }, + { + "epoch": 1.6731301939058172, + "grad_norm": 0.6357448101043701, + "learning_rate": 2.462311557788945e-05, + "loss": 1.0378, + "step": 151 + }, + { + "epoch": 1.6842105263157894, + "grad_norm": 0.6035080552101135, + "learning_rate": 2.4120603015075376e-05, + "loss": 0.97, + "step": 152 + }, + { + "epoch": 1.6952908587257618, + "grad_norm": 0.6766805052757263, + "learning_rate": 2.361809045226131e-05, + "loss": 1.1183, + "step": 153 + }, + { + "epoch": 1.7063711911357342, + "grad_norm": 0.652312695980072, + "learning_rate": 2.3115577889447238e-05, + "loss": 1.1006, + "step": 154 + }, + { + "epoch": 1.7174515235457064, + "grad_norm": 0.6161527037620544, + "learning_rate": 2.2613065326633167e-05, + "loss": 1.0243, + "step": 155 + }, + { + "epoch": 1.7285318559556786, + "grad_norm": 0.6364408731460571, + "learning_rate": 2.21105527638191e-05, + "loss": 1.013, + "step": 156 + }, + { + "epoch": 1.739612188365651, + "grad_norm": 0.6595537066459656, + "learning_rate": 2.1608040201005025e-05, + "loss": 1.1293, + "step": 157 + }, + { + "epoch": 1.7506925207756234, + "grad_norm": 0.6244936585426331, + "learning_rate": 2.1105527638190957e-05, + "loss": 1.0434, + "step": 158 + }, + { + "epoch": 1.7617728531855956, + "grad_norm": 0.6980383396148682, + "learning_rate": 2.0603015075376886e-05, + "loss": 1.0251, + "step": 159 + }, + { + "epoch": 1.7728531855955678, + "grad_norm": 0.6194802522659302, + "learning_rate": 2.0100502512562815e-05, + "loss": 1.0232, + "step": 160 + }, + { + "epoch": 1.78393351800554, + "grad_norm": 0.6191238760948181, + "learning_rate": 1.9597989949748744e-05, + "loss": 1.0466, + "step": 161 + }, + { + "epoch": 1.7950138504155124, + "grad_norm": 0.6697459816932678, + "learning_rate": 1.9095477386934673e-05, + "loss": 1.1207, + "step": 162 + }, + { + "epoch": 1.8060941828254848, + "grad_norm": 0.6268838047981262, + "learning_rate": 1.8592964824120602e-05, + "loss": 0.9917, + "step": 163 + }, + { + "epoch": 1.817174515235457, + "grad_norm": 0.6360554099082947, + "learning_rate": 1.8090452261306535e-05, + "loss": 1.0161, + "step": 164 + }, + { + "epoch": 1.8282548476454292, + "grad_norm": 0.6684898138046265, + "learning_rate": 1.7587939698492464e-05, + "loss": 1.056, + "step": 165 + }, + { + "epoch": 1.8393351800554016, + "grad_norm": 0.6515068411827087, + "learning_rate": 1.7085427135678393e-05, + "loss": 1.0309, + "step": 166 + }, + { + "epoch": 1.850415512465374, + "grad_norm": 0.6543217301368713, + "learning_rate": 1.6582914572864322e-05, + "loss": 1.0352, + "step": 167 + }, + { + "epoch": 1.8614958448753463, + "grad_norm": 0.6411666870117188, + "learning_rate": 1.608040201005025e-05, + "loss": 0.9145, + "step": 168 + }, + { + "epoch": 1.8725761772853184, + "grad_norm": 0.7090892791748047, + "learning_rate": 1.5577889447236183e-05, + "loss": 1.1159, + "step": 169 + }, + { + "epoch": 1.8836565096952909, + "grad_norm": 0.724311113357544, + "learning_rate": 1.507537688442211e-05, + "loss": 1.1269, + "step": 170 + }, + { + "epoch": 1.8947368421052633, + "grad_norm": 0.6784828305244446, + "learning_rate": 1.457286432160804e-05, + "loss": 1.056, + "step": 171 + }, + { + "epoch": 1.9058171745152355, + "grad_norm": 0.7494035363197327, + "learning_rate": 1.407035175879397e-05, + "loss": 1.0663, + "step": 172 + }, + { + "epoch": 1.9168975069252077, + "grad_norm": 0.6572526693344116, + "learning_rate": 1.3567839195979901e-05, + "loss": 0.9586, + "step": 173 + }, + { + "epoch": 1.92797783933518, + "grad_norm": 0.6908337473869324, + "learning_rate": 1.306532663316583e-05, + "loss": 1.1032, + "step": 174 + }, + { + "epoch": 1.9390581717451525, + "grad_norm": 0.6384634375572205, + "learning_rate": 1.2562814070351759e-05, + "loss": 0.9993, + "step": 175 + }, + { + "epoch": 1.9501385041551247, + "grad_norm": 0.6988664269447327, + "learning_rate": 1.2060301507537688e-05, + "loss": 1.1774, + "step": 176 + }, + { + "epoch": 1.9612188365650969, + "grad_norm": 0.7173413038253784, + "learning_rate": 1.1557788944723619e-05, + "loss": 1.0903, + "step": 177 + }, + { + "epoch": 1.9722991689750693, + "grad_norm": 0.6163381338119507, + "learning_rate": 1.105527638190955e-05, + "loss": 0.9508, + "step": 178 + }, + { + "epoch": 1.9833795013850417, + "grad_norm": 0.7130718231201172, + "learning_rate": 1.0552763819095479e-05, + "loss": 1.1151, + "step": 179 + }, + { + "epoch": 1.994459833795014, + "grad_norm": 0.6283115744590759, + "learning_rate": 1.0050251256281408e-05, + "loss": 1.0122, + "step": 180 + }, + { + "epoch": 2.005540166204986, + "grad_norm": 1.7011470794677734, + "learning_rate": 9.547738693467337e-06, + "loss": 1.8724, + "step": 181 + }, + { + "epoch": 2.0166204986149583, + "grad_norm": 0.6880711913108826, + "learning_rate": 9.045226130653267e-06, + "loss": 1.0253, + "step": 182 + }, + { + "epoch": 2.027700831024931, + "grad_norm": 0.6449779272079468, + "learning_rate": 8.542713567839196e-06, + "loss": 0.9395, + "step": 183 + }, + { + "epoch": 2.038781163434903, + "grad_norm": 0.6878742575645447, + "learning_rate": 8.040201005025125e-06, + "loss": 1.1515, + "step": 184 + }, + { + "epoch": 2.0498614958448753, + "grad_norm": 0.6578611135482788, + "learning_rate": 7.537688442211055e-06, + "loss": 1.0202, + "step": 185 + }, + { + "epoch": 2.0609418282548475, + "grad_norm": 0.6302409768104553, + "learning_rate": 7.035175879396985e-06, + "loss": 0.9428, + "step": 186 + }, + { + "epoch": 2.07202216066482, + "grad_norm": 0.644244909286499, + "learning_rate": 6.532663316582915e-06, + "loss": 1.0047, + "step": 187 + }, + { + "epoch": 2.0831024930747923, + "grad_norm": 0.8153209090232849, + "learning_rate": 6.030150753768844e-06, + "loss": 1.0684, + "step": 188 + }, + { + "epoch": 2.0941828254847645, + "grad_norm": 0.6339110136032104, + "learning_rate": 5.527638190954775e-06, + "loss": 1.0228, + "step": 189 + }, + { + "epoch": 2.1052631578947367, + "grad_norm": 0.656551718711853, + "learning_rate": 5.025125628140704e-06, + "loss": 1.0447, + "step": 190 + }, + { + "epoch": 2.1163434903047094, + "grad_norm": 0.6212584972381592, + "learning_rate": 4.522613065326634e-06, + "loss": 0.9713, + "step": 191 + }, + { + "epoch": 2.1274238227146816, + "grad_norm": 0.7052453756332397, + "learning_rate": 4.020100502512563e-06, + "loss": 1.0032, + "step": 192 + }, + { + "epoch": 2.1385041551246537, + "grad_norm": 0.6406748294830322, + "learning_rate": 3.5175879396984926e-06, + "loss": 1.0942, + "step": 193 + }, + { + "epoch": 2.149584487534626, + "grad_norm": 0.6339934468269348, + "learning_rate": 3.015075376884422e-06, + "loss": 0.9696, + "step": 194 + }, + { + "epoch": 2.160664819944598, + "grad_norm": 0.6168909072875977, + "learning_rate": 2.512562814070352e-06, + "loss": 0.952, + "step": 195 + }, + { + "epoch": 2.1717451523545708, + "grad_norm": 0.670893669128418, + "learning_rate": 2.0100502512562813e-06, + "loss": 1.0214, + "step": 196 + }, + { + "epoch": 2.182825484764543, + "grad_norm": 0.6624979376792908, + "learning_rate": 1.507537688442211e-06, + "loss": 1.0655, + "step": 197 + }, + { + "epoch": 2.193905817174515, + "grad_norm": 0.6381769180297852, + "learning_rate": 1.0050251256281407e-06, + "loss": 1.0344, + "step": 198 + }, + { + "epoch": 2.2049861495844874, + "grad_norm": 0.6395440697669983, + "learning_rate": 5.025125628140703e-07, + "loss": 0.9908, + "step": 199 + }, + { + "epoch": 2.21606648199446, + "grad_norm": 0.6501817107200623, + "learning_rate": 0.0, + "loss": 0.9978, + "step": 200 + } + ], + "logging_steps": 1, + "max_steps": 200, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 2.4497042030739456e+16, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/llama_German_French/checkpoint-200/training_args.bin b/llama_German_French/checkpoint-200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..fade439525d52529ac7a4a243e368105fbadb173 --- /dev/null +++ b/llama_German_French/checkpoint-200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f094d141cf4134283de6be7078e78ff7ba91d237724a46b01a047d5c94fd6945 +size 5624 diff --git a/llama_German_Vietnamese/checkpoint-200/README.md b/llama_German_Vietnamese/checkpoint-200/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ec91fdd050e2a5613ef04bb6c8fc77a63b8b10a1 --- /dev/null +++ b/llama_German_Vietnamese/checkpoint-200/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.14.0 \ No newline at end of file diff --git a/llama_German_Vietnamese/checkpoint-200/adapter_config.json b/llama_German_Vietnamese/checkpoint-200/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..237fdddf2e7df3d3adc96557b0da5ba9bbb4e05c --- /dev/null +++ b/llama_German_Vietnamese/checkpoint-200/adapter_config.json @@ -0,0 +1,37 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit", + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "gate_proj", + "k_proj", + "up_proj", + "down_proj", + "o_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/llama_German_Vietnamese/checkpoint-200/adapter_model.safetensors b/llama_German_Vietnamese/checkpoint-200/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5bd70c62fe0a7ee79e02dbe4085046dc961023e6 --- /dev/null +++ b/llama_German_Vietnamese/checkpoint-200/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f1f1dbc458f4da74b874fae6c2df870b9143a8f350679c8df2c5195af9c0280 +size 167832240 diff --git a/llama_German_Vietnamese/checkpoint-200/optimizer.pt b/llama_German_Vietnamese/checkpoint-200/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..9f1ef7bab684f4fe9915caa504ce3e7d991c6e4e --- /dev/null +++ b/llama_German_Vietnamese/checkpoint-200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea2539fd47099f68786f088130971486d958310b956b6465cb4fc7af922b30f1 +size 85723284 diff --git a/llama_German_Vietnamese/checkpoint-200/rng_state.pth b/llama_German_Vietnamese/checkpoint-200/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..f21f2ff1c1a816463781d51760f8156e041f5979 --- /dev/null +++ b/llama_German_Vietnamese/checkpoint-200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4981123ff3cf7bd5b7f76839e90e4776f747ca4c38dcb41876fa010c0dea8b23 +size 14244 diff --git a/llama_German_Vietnamese/checkpoint-200/scheduler.pt b/llama_German_Vietnamese/checkpoint-200/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6ad11d6d5288d2841ba96a6d7d3838c80c80097a --- /dev/null +++ b/llama_German_Vietnamese/checkpoint-200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96 +size 1064 diff --git a/llama_German_Vietnamese/checkpoint-200/special_tokens_map.json b/llama_German_Vietnamese/checkpoint-200/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/llama_German_Vietnamese/checkpoint-200/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/llama_German_Vietnamese/checkpoint-200/tokenizer.json b/llama_German_Vietnamese/checkpoint-200/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/llama_German_Vietnamese/checkpoint-200/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/llama_German_Vietnamese/checkpoint-200/tokenizer_config.json b/llama_German_Vietnamese/checkpoint-200/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..447bd30f21e8c20e9e6497d6cfc650a3881e26a6 --- /dev/null +++ b/llama_German_Vietnamese/checkpoint-200/tokenizer_config.json @@ -0,0 +1,2064 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/llama_German_Vietnamese/checkpoint-200/trainer_state.json b/llama_German_Vietnamese/checkpoint-200/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..93846e75558ca2a299bc2bb77d8e2287536ba303 --- /dev/null +++ b/llama_German_Vietnamese/checkpoint-200/trainer_state.json @@ -0,0 +1,1433 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.21606648199446, + "eval_steps": 500, + "global_step": 200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0110803324099723, + "grad_norm": 0.9601478576660156, + "learning_rate": 0.0001, + "loss": 2.5214, + "step": 1 + }, + { + "epoch": 0.0221606648199446, + "grad_norm": 0.9971591830253601, + "learning_rate": 9.949748743718594e-05, + "loss": 2.5439, + "step": 2 + }, + { + "epoch": 0.0332409972299169, + "grad_norm": 0.9417962431907654, + "learning_rate": 9.899497487437186e-05, + "loss": 2.4934, + "step": 3 + }, + { + "epoch": 0.0443213296398892, + "grad_norm": 1.0321747064590454, + "learning_rate": 9.84924623115578e-05, + "loss": 2.4263, + "step": 4 + }, + { + "epoch": 0.055401662049861494, + "grad_norm": 0.9931637644767761, + "learning_rate": 9.798994974874372e-05, + "loss": 2.2022, + "step": 5 + }, + { + "epoch": 0.0664819944598338, + "grad_norm": 1.0871055126190186, + "learning_rate": 9.748743718592965e-05, + "loss": 2.1079, + "step": 6 + }, + { + "epoch": 0.07756232686980609, + "grad_norm": 1.0963650941848755, + "learning_rate": 9.698492462311559e-05, + "loss": 2.0265, + "step": 7 + }, + { + "epoch": 0.0886426592797784, + "grad_norm": 1.1211330890655518, + "learning_rate": 9.64824120603015e-05, + "loss": 1.8401, + "step": 8 + }, + { + "epoch": 0.0997229916897507, + "grad_norm": 0.7635355591773987, + "learning_rate": 9.597989949748745e-05, + "loss": 1.6885, + "step": 9 + }, + { + "epoch": 0.11080332409972299, + "grad_norm": 0.9378063082695007, + "learning_rate": 9.547738693467337e-05, + "loss": 1.5966, + "step": 10 + }, + { + "epoch": 0.12188365650969529, + "grad_norm": 0.814569890499115, + "learning_rate": 9.49748743718593e-05, + "loss": 1.5616, + "step": 11 + }, + { + "epoch": 0.1329639889196676, + "grad_norm": 0.671045184135437, + "learning_rate": 9.447236180904523e-05, + "loss": 1.4996, + "step": 12 + }, + { + "epoch": 0.1440443213296399, + "grad_norm": 0.5119243860244751, + "learning_rate": 9.396984924623115e-05, + "loss": 1.471, + "step": 13 + }, + { + "epoch": 0.15512465373961218, + "grad_norm": 0.47041207551956177, + "learning_rate": 9.34673366834171e-05, + "loss": 1.4445, + "step": 14 + }, + { + "epoch": 0.16620498614958448, + "grad_norm": 0.48001161217689514, + "learning_rate": 9.296482412060302e-05, + "loss": 1.3295, + "step": 15 + }, + { + "epoch": 0.1772853185595568, + "grad_norm": 0.578236997127533, + "learning_rate": 9.246231155778895e-05, + "loss": 1.4751, + "step": 16 + }, + { + "epoch": 0.1883656509695291, + "grad_norm": 0.5084776282310486, + "learning_rate": 9.195979899497488e-05, + "loss": 1.4156, + "step": 17 + }, + { + "epoch": 0.1994459833795014, + "grad_norm": 0.5236825346946716, + "learning_rate": 9.14572864321608e-05, + "loss": 1.4412, + "step": 18 + }, + { + "epoch": 0.21052631578947367, + "grad_norm": 0.4692622125148773, + "learning_rate": 9.095477386934675e-05, + "loss": 1.3813, + "step": 19 + }, + { + "epoch": 0.22160664819944598, + "grad_norm": 0.5485289096832275, + "learning_rate": 9.045226130653267e-05, + "loss": 1.4734, + "step": 20 + }, + { + "epoch": 0.23268698060941828, + "grad_norm": 0.5981004238128662, + "learning_rate": 8.99497487437186e-05, + "loss": 1.2878, + "step": 21 + }, + { + "epoch": 0.24376731301939059, + "grad_norm": 0.5485694408416748, + "learning_rate": 8.944723618090453e-05, + "loss": 1.3564, + "step": 22 + }, + { + "epoch": 0.2548476454293629, + "grad_norm": 0.5757837295532227, + "learning_rate": 8.894472361809045e-05, + "loss": 1.443, + "step": 23 + }, + { + "epoch": 0.2659279778393352, + "grad_norm": 0.5952128767967224, + "learning_rate": 8.84422110552764e-05, + "loss": 1.3952, + "step": 24 + }, + { + "epoch": 0.2770083102493075, + "grad_norm": 0.6220591068267822, + "learning_rate": 8.793969849246232e-05, + "loss": 1.5112, + "step": 25 + }, + { + "epoch": 0.2880886426592798, + "grad_norm": 0.6600030064582825, + "learning_rate": 8.743718592964825e-05, + "loss": 1.4488, + "step": 26 + }, + { + "epoch": 0.29916897506925205, + "grad_norm": 0.6519652009010315, + "learning_rate": 8.693467336683418e-05, + "loss": 1.4591, + "step": 27 + }, + { + "epoch": 0.31024930747922436, + "grad_norm": 0.6863296627998352, + "learning_rate": 8.64321608040201e-05, + "loss": 1.3665, + "step": 28 + }, + { + "epoch": 0.32132963988919666, + "grad_norm": 0.6871782541275024, + "learning_rate": 8.592964824120603e-05, + "loss": 1.3526, + "step": 29 + }, + { + "epoch": 0.33240997229916897, + "grad_norm": 0.7255661487579346, + "learning_rate": 8.542713567839196e-05, + "loss": 1.3958, + "step": 30 + }, + { + "epoch": 0.34349030470914127, + "grad_norm": 0.7219669818878174, + "learning_rate": 8.49246231155779e-05, + "loss": 1.3009, + "step": 31 + }, + { + "epoch": 0.3545706371191136, + "grad_norm": 0.8132200241088867, + "learning_rate": 8.442211055276383e-05, + "loss": 1.2597, + "step": 32 + }, + { + "epoch": 0.3656509695290859, + "grad_norm": 0.7818196415901184, + "learning_rate": 8.391959798994975e-05, + "loss": 1.3707, + "step": 33 + }, + { + "epoch": 0.3767313019390582, + "grad_norm": 0.8026142716407776, + "learning_rate": 8.341708542713568e-05, + "loss": 1.3096, + "step": 34 + }, + { + "epoch": 0.3878116343490305, + "grad_norm": 0.7700684666633606, + "learning_rate": 8.291457286432161e-05, + "loss": 1.2834, + "step": 35 + }, + { + "epoch": 0.3988919667590028, + "grad_norm": 0.921628475189209, + "learning_rate": 8.241206030150754e-05, + "loss": 1.4043, + "step": 36 + }, + { + "epoch": 0.4099722991689751, + "grad_norm": 0.8844993114471436, + "learning_rate": 8.190954773869348e-05, + "loss": 1.3644, + "step": 37 + }, + { + "epoch": 0.42105263157894735, + "grad_norm": 0.817577064037323, + "learning_rate": 8.14070351758794e-05, + "loss": 1.3475, + "step": 38 + }, + { + "epoch": 0.43213296398891965, + "grad_norm": 0.7010540962219238, + "learning_rate": 8.090452261306533e-05, + "loss": 1.2523, + "step": 39 + }, + { + "epoch": 0.44321329639889195, + "grad_norm": 0.6243841648101807, + "learning_rate": 8.040201005025126e-05, + "loss": 1.2866, + "step": 40 + }, + { + "epoch": 0.45429362880886426, + "grad_norm": 0.5377902984619141, + "learning_rate": 7.989949748743719e-05, + "loss": 1.306, + "step": 41 + }, + { + "epoch": 0.46537396121883656, + "grad_norm": 0.5656118988990784, + "learning_rate": 7.939698492462313e-05, + "loss": 1.3025, + "step": 42 + }, + { + "epoch": 0.47645429362880887, + "grad_norm": 0.5747938752174377, + "learning_rate": 7.889447236180904e-05, + "loss": 1.3078, + "step": 43 + }, + { + "epoch": 0.48753462603878117, + "grad_norm": 0.541191041469574, + "learning_rate": 7.839195979899498e-05, + "loss": 1.3143, + "step": 44 + }, + { + "epoch": 0.4986149584487535, + "grad_norm": 0.5412452220916748, + "learning_rate": 7.788944723618091e-05, + "loss": 1.2474, + "step": 45 + }, + { + "epoch": 0.5096952908587258, + "grad_norm": 0.5455557107925415, + "learning_rate": 7.738693467336684e-05, + "loss": 1.2612, + "step": 46 + }, + { + "epoch": 0.5207756232686981, + "grad_norm": 0.580496609210968, + "learning_rate": 7.688442211055277e-05, + "loss": 1.3573, + "step": 47 + }, + { + "epoch": 0.5318559556786704, + "grad_norm": 0.5404570698738098, + "learning_rate": 7.638190954773869e-05, + "loss": 1.2945, + "step": 48 + }, + { + "epoch": 0.5429362880886427, + "grad_norm": 0.5350461006164551, + "learning_rate": 7.587939698492463e-05, + "loss": 1.3042, + "step": 49 + }, + { + "epoch": 0.554016620498615, + "grad_norm": 0.5313552021980286, + "learning_rate": 7.537688442211056e-05, + "loss": 1.198, + "step": 50 + }, + { + "epoch": 0.5650969529085873, + "grad_norm": 0.5592322945594788, + "learning_rate": 7.487437185929649e-05, + "loss": 1.2152, + "step": 51 + }, + { + "epoch": 0.5761772853185596, + "grad_norm": 0.5533973574638367, + "learning_rate": 7.437185929648241e-05, + "loss": 1.2972, + "step": 52 + }, + { + "epoch": 0.5872576177285319, + "grad_norm": 0.8928530216217041, + "learning_rate": 7.386934673366834e-05, + "loss": 1.3444, + "step": 53 + }, + { + "epoch": 0.5983379501385041, + "grad_norm": 0.5723676085472107, + "learning_rate": 7.336683417085427e-05, + "loss": 1.2919, + "step": 54 + }, + { + "epoch": 0.6094182825484764, + "grad_norm": 0.5337915420532227, + "learning_rate": 7.28643216080402e-05, + "loss": 1.2934, + "step": 55 + }, + { + "epoch": 0.6204986149584487, + "grad_norm": 0.5501229166984558, + "learning_rate": 7.236180904522614e-05, + "loss": 1.2849, + "step": 56 + }, + { + "epoch": 0.631578947368421, + "grad_norm": 0.5692880153656006, + "learning_rate": 7.185929648241206e-05, + "loss": 1.3019, + "step": 57 + }, + { + "epoch": 0.6426592797783933, + "grad_norm": 0.534240186214447, + "learning_rate": 7.135678391959799e-05, + "loss": 1.3367, + "step": 58 + }, + { + "epoch": 0.6537396121883656, + "grad_norm": 0.5338261127471924, + "learning_rate": 7.085427135678392e-05, + "loss": 1.2605, + "step": 59 + }, + { + "epoch": 0.6648199445983379, + "grad_norm": 0.5499207377433777, + "learning_rate": 7.035175879396985e-05, + "loss": 1.1653, + "step": 60 + }, + { + "epoch": 0.6759002770083102, + "grad_norm": 0.5046166181564331, + "learning_rate": 6.984924623115579e-05, + "loss": 1.267, + "step": 61 + }, + { + "epoch": 0.6869806094182825, + "grad_norm": 0.5236312747001648, + "learning_rate": 6.93467336683417e-05, + "loss": 1.3165, + "step": 62 + }, + { + "epoch": 0.6980609418282548, + "grad_norm": 0.4572203755378723, + "learning_rate": 6.884422110552764e-05, + "loss": 1.1461, + "step": 63 + }, + { + "epoch": 0.7091412742382271, + "grad_norm": 0.588129997253418, + "learning_rate": 6.834170854271357e-05, + "loss": 1.3179, + "step": 64 + }, + { + "epoch": 0.7202216066481995, + "grad_norm": 0.5468114018440247, + "learning_rate": 6.78391959798995e-05, + "loss": 1.195, + "step": 65 + }, + { + "epoch": 0.7313019390581718, + "grad_norm": 0.5059835910797119, + "learning_rate": 6.733668341708544e-05, + "loss": 1.2038, + "step": 66 + }, + { + "epoch": 0.7423822714681441, + "grad_norm": 0.539843738079071, + "learning_rate": 6.683417085427135e-05, + "loss": 1.1811, + "step": 67 + }, + { + "epoch": 0.7534626038781164, + "grad_norm": 0.5525516271591187, + "learning_rate": 6.633165829145729e-05, + "loss": 1.2704, + "step": 68 + }, + { + "epoch": 0.7645429362880887, + "grad_norm": 0.544106125831604, + "learning_rate": 6.582914572864322e-05, + "loss": 1.2563, + "step": 69 + }, + { + "epoch": 0.775623268698061, + "grad_norm": 0.5261914134025574, + "learning_rate": 6.532663316582915e-05, + "loss": 1.307, + "step": 70 + }, + { + "epoch": 0.7867036011080333, + "grad_norm": 0.5528128147125244, + "learning_rate": 6.482412060301508e-05, + "loss": 1.2808, + "step": 71 + }, + { + "epoch": 0.7977839335180056, + "grad_norm": 0.5334076285362244, + "learning_rate": 6.4321608040201e-05, + "loss": 1.2469, + "step": 72 + }, + { + "epoch": 0.8088642659279779, + "grad_norm": 0.5195380449295044, + "learning_rate": 6.381909547738694e-05, + "loss": 1.2046, + "step": 73 + }, + { + "epoch": 0.8199445983379502, + "grad_norm": 0.5215188264846802, + "learning_rate": 6.331658291457287e-05, + "loss": 1.2784, + "step": 74 + }, + { + "epoch": 0.8310249307479224, + "grad_norm": 0.5307009220123291, + "learning_rate": 6.28140703517588e-05, + "loss": 1.2436, + "step": 75 + }, + { + "epoch": 0.8421052631578947, + "grad_norm": 0.5916880965232849, + "learning_rate": 6.231155778894473e-05, + "loss": 1.3524, + "step": 76 + }, + { + "epoch": 0.853185595567867, + "grad_norm": 0.5962271690368652, + "learning_rate": 6.180904522613065e-05, + "loss": 1.2624, + "step": 77 + }, + { + "epoch": 0.8642659279778393, + "grad_norm": 0.5542305111885071, + "learning_rate": 6.130653266331658e-05, + "loss": 1.2514, + "step": 78 + }, + { + "epoch": 0.8753462603878116, + "grad_norm": 0.5269355773925781, + "learning_rate": 6.080402010050251e-05, + "loss": 1.2438, + "step": 79 + }, + { + "epoch": 0.8864265927977839, + "grad_norm": 0.596505880355835, + "learning_rate": 6.030150753768844e-05, + "loss": 1.262, + "step": 80 + }, + { + "epoch": 0.8975069252077562, + "grad_norm": 0.5472405552864075, + "learning_rate": 5.979899497487438e-05, + "loss": 1.2682, + "step": 81 + }, + { + "epoch": 0.9085872576177285, + "grad_norm": 0.5715760588645935, + "learning_rate": 5.929648241206031e-05, + "loss": 1.3046, + "step": 82 + }, + { + "epoch": 0.9196675900277008, + "grad_norm": 0.5978801250457764, + "learning_rate": 5.879396984924623e-05, + "loss": 1.2553, + "step": 83 + }, + { + "epoch": 0.9307479224376731, + "grad_norm": 0.5009713768959045, + "learning_rate": 5.829145728643216e-05, + "loss": 1.2309, + "step": 84 + }, + { + "epoch": 0.9418282548476454, + "grad_norm": 0.5573865175247192, + "learning_rate": 5.778894472361809e-05, + "loss": 1.2746, + "step": 85 + }, + { + "epoch": 0.9529085872576177, + "grad_norm": 0.5725341439247131, + "learning_rate": 5.728643216080403e-05, + "loss": 1.3578, + "step": 86 + }, + { + "epoch": 0.96398891966759, + "grad_norm": 0.610304057598114, + "learning_rate": 5.6783919597989955e-05, + "loss": 1.2823, + "step": 87 + }, + { + "epoch": 0.9750692520775623, + "grad_norm": 0.5619968175888062, + "learning_rate": 5.628140703517588e-05, + "loss": 1.257, + "step": 88 + }, + { + "epoch": 0.9861495844875346, + "grad_norm": 0.593148410320282, + "learning_rate": 5.577889447236181e-05, + "loss": 1.2417, + "step": 89 + }, + { + "epoch": 0.997229916897507, + "grad_norm": 0.5581191182136536, + "learning_rate": 5.527638190954774e-05, + "loss": 1.2476, + "step": 90 + }, + { + "epoch": 1.0083102493074791, + "grad_norm": 1.3162742853164673, + "learning_rate": 5.477386934673368e-05, + "loss": 2.0878, + "step": 91 + }, + { + "epoch": 1.0193905817174516, + "grad_norm": 0.5107030868530273, + "learning_rate": 5.4271356783919604e-05, + "loss": 1.2154, + "step": 92 + }, + { + "epoch": 1.0304709141274238, + "grad_norm": 0.5894776582717896, + "learning_rate": 5.376884422110553e-05, + "loss": 1.394, + "step": 93 + }, + { + "epoch": 1.0415512465373962, + "grad_norm": 0.48722517490386963, + "learning_rate": 5.3266331658291455e-05, + "loss": 1.1112, + "step": 94 + }, + { + "epoch": 1.0526315789473684, + "grad_norm": 0.529351532459259, + "learning_rate": 5.276381909547739e-05, + "loss": 1.2253, + "step": 95 + }, + { + "epoch": 1.0637119113573408, + "grad_norm": 0.5319796204566956, + "learning_rate": 5.226130653266332e-05, + "loss": 1.1533, + "step": 96 + }, + { + "epoch": 1.074792243767313, + "grad_norm": 0.5794050693511963, + "learning_rate": 5.175879396984925e-05, + "loss": 1.2671, + "step": 97 + }, + { + "epoch": 1.0858725761772854, + "grad_norm": 0.5293458700180054, + "learning_rate": 5.125628140703518e-05, + "loss": 1.167, + "step": 98 + }, + { + "epoch": 1.0969529085872576, + "grad_norm": 0.5559932589530945, + "learning_rate": 5.0753768844221104e-05, + "loss": 1.2224, + "step": 99 + }, + { + "epoch": 1.10803324099723, + "grad_norm": 0.593885064125061, + "learning_rate": 5.0251256281407036e-05, + "loss": 1.2955, + "step": 100 + }, + { + "epoch": 1.1191135734072022, + "grad_norm": 0.5666723847389221, + "learning_rate": 4.974874371859297e-05, + "loss": 1.1399, + "step": 101 + }, + { + "epoch": 1.1301939058171746, + "grad_norm": 0.6024338603019714, + "learning_rate": 4.92462311557789e-05, + "loss": 1.2082, + "step": 102 + }, + { + "epoch": 1.1412742382271468, + "grad_norm": 0.5638694763183594, + "learning_rate": 4.874371859296483e-05, + "loss": 1.1431, + "step": 103 + }, + { + "epoch": 1.1523545706371192, + "grad_norm": 0.5496792197227478, + "learning_rate": 4.824120603015075e-05, + "loss": 1.0157, + "step": 104 + }, + { + "epoch": 1.1634349030470914, + "grad_norm": 0.6527867317199707, + "learning_rate": 4.7738693467336685e-05, + "loss": 1.1533, + "step": 105 + }, + { + "epoch": 1.1745152354570636, + "grad_norm": 0.6266101002693176, + "learning_rate": 4.723618090452262e-05, + "loss": 1.2551, + "step": 106 + }, + { + "epoch": 1.185595567867036, + "grad_norm": 0.6470689177513123, + "learning_rate": 4.673366834170855e-05, + "loss": 1.212, + "step": 107 + }, + { + "epoch": 1.1966759002770084, + "grad_norm": 0.6015138030052185, + "learning_rate": 4.6231155778894475e-05, + "loss": 1.1269, + "step": 108 + }, + { + "epoch": 1.2077562326869806, + "grad_norm": 0.6057308912277222, + "learning_rate": 4.57286432160804e-05, + "loss": 1.2424, + "step": 109 + }, + { + "epoch": 1.2188365650969528, + "grad_norm": 0.6024768352508545, + "learning_rate": 4.522613065326633e-05, + "loss": 1.188, + "step": 110 + }, + { + "epoch": 1.2299168975069252, + "grad_norm": 0.5917758941650391, + "learning_rate": 4.4723618090452266e-05, + "loss": 1.1956, + "step": 111 + }, + { + "epoch": 1.2409972299168974, + "grad_norm": 0.5909789800643921, + "learning_rate": 4.42211055276382e-05, + "loss": 1.1521, + "step": 112 + }, + { + "epoch": 1.2520775623268698, + "grad_norm": 0.6082826852798462, + "learning_rate": 4.3718592964824124e-05, + "loss": 1.2157, + "step": 113 + }, + { + "epoch": 1.263157894736842, + "grad_norm": 0.7129999995231628, + "learning_rate": 4.321608040201005e-05, + "loss": 1.2732, + "step": 114 + }, + { + "epoch": 1.2742382271468145, + "grad_norm": 0.6589417457580566, + "learning_rate": 4.271356783919598e-05, + "loss": 1.1874, + "step": 115 + }, + { + "epoch": 1.2853185595567866, + "grad_norm": 0.634284496307373, + "learning_rate": 4.2211055276381914e-05, + "loss": 1.2013, + "step": 116 + }, + { + "epoch": 1.296398891966759, + "grad_norm": 0.6580467820167542, + "learning_rate": 4.170854271356784e-05, + "loss": 1.1884, + "step": 117 + }, + { + "epoch": 1.3074792243767313, + "grad_norm": 0.5867501497268677, + "learning_rate": 4.120603015075377e-05, + "loss": 1.0882, + "step": 118 + }, + { + "epoch": 1.3185595567867037, + "grad_norm": 0.6408854722976685, + "learning_rate": 4.07035175879397e-05, + "loss": 1.1805, + "step": 119 + }, + { + "epoch": 1.3296398891966759, + "grad_norm": 0.6668931245803833, + "learning_rate": 4.020100502512563e-05, + "loss": 1.2314, + "step": 120 + }, + { + "epoch": 1.3407202216066483, + "grad_norm": 0.6118000149726868, + "learning_rate": 3.969849246231156e-05, + "loss": 1.0767, + "step": 121 + }, + { + "epoch": 1.3518005540166205, + "grad_norm": 0.6585280895233154, + "learning_rate": 3.919597989949749e-05, + "loss": 1.0985, + "step": 122 + }, + { + "epoch": 1.3628808864265927, + "grad_norm": 0.6733730435371399, + "learning_rate": 3.869346733668342e-05, + "loss": 1.3112, + "step": 123 + }, + { + "epoch": 1.373961218836565, + "grad_norm": 0.6803246736526489, + "learning_rate": 3.8190954773869346e-05, + "loss": 1.194, + "step": 124 + }, + { + "epoch": 1.3850415512465375, + "grad_norm": 0.7020207047462463, + "learning_rate": 3.768844221105528e-05, + "loss": 1.2318, + "step": 125 + }, + { + "epoch": 1.3961218836565097, + "grad_norm": 0.6999279260635376, + "learning_rate": 3.7185929648241204e-05, + "loss": 1.2101, + "step": 126 + }, + { + "epoch": 1.4072022160664819, + "grad_norm": 0.7011158466339111, + "learning_rate": 3.668341708542714e-05, + "loss": 1.056, + "step": 127 + }, + { + "epoch": 1.4182825484764543, + "grad_norm": 0.7185156345367432, + "learning_rate": 3.618090452261307e-05, + "loss": 1.202, + "step": 128 + }, + { + "epoch": 1.4293628808864267, + "grad_norm": 0.6784414649009705, + "learning_rate": 3.5678391959798995e-05, + "loss": 1.1488, + "step": 129 + }, + { + "epoch": 1.440443213296399, + "grad_norm": 0.7275694012641907, + "learning_rate": 3.517587939698493e-05, + "loss": 1.225, + "step": 130 + }, + { + "epoch": 1.451523545706371, + "grad_norm": 0.6645621061325073, + "learning_rate": 3.467336683417085e-05, + "loss": 1.0712, + "step": 131 + }, + { + "epoch": 1.4626038781163435, + "grad_norm": 0.7038294076919556, + "learning_rate": 3.4170854271356785e-05, + "loss": 1.219, + "step": 132 + }, + { + "epoch": 1.4736842105263157, + "grad_norm": 0.7240763902664185, + "learning_rate": 3.366834170854272e-05, + "loss": 1.144, + "step": 133 + }, + { + "epoch": 1.4847645429362881, + "grad_norm": 0.6860496997833252, + "learning_rate": 3.3165829145728643e-05, + "loss": 1.1299, + "step": 134 + }, + { + "epoch": 1.4958448753462603, + "grad_norm": 0.6575061678886414, + "learning_rate": 3.2663316582914576e-05, + "loss": 1.182, + "step": 135 + }, + { + "epoch": 1.5069252077562327, + "grad_norm": 0.705866813659668, + "learning_rate": 3.21608040201005e-05, + "loss": 1.2145, + "step": 136 + }, + { + "epoch": 1.5180055401662051, + "grad_norm": 0.8369534015655518, + "learning_rate": 3.1658291457286434e-05, + "loss": 1.2471, + "step": 137 + }, + { + "epoch": 1.5290858725761773, + "grad_norm": 0.7173314690589905, + "learning_rate": 3.1155778894472366e-05, + "loss": 1.1579, + "step": 138 + }, + { + "epoch": 1.5401662049861495, + "grad_norm": 0.714084804058075, + "learning_rate": 3.065326633165829e-05, + "loss": 1.1485, + "step": 139 + }, + { + "epoch": 1.5512465373961217, + "grad_norm": 0.7206579446792603, + "learning_rate": 3.015075376884422e-05, + "loss": 1.2206, + "step": 140 + }, + { + "epoch": 1.5623268698060941, + "grad_norm": 0.7310248017311096, + "learning_rate": 2.9648241206030153e-05, + "loss": 1.1414, + "step": 141 + }, + { + "epoch": 1.5734072022160666, + "grad_norm": 0.7568632364273071, + "learning_rate": 2.914572864321608e-05, + "loss": 1.1521, + "step": 142 + }, + { + "epoch": 1.5844875346260388, + "grad_norm": 0.7479029297828674, + "learning_rate": 2.8643216080402015e-05, + "loss": 1.2343, + "step": 143 + }, + { + "epoch": 1.595567867036011, + "grad_norm": 0.6636027097702026, + "learning_rate": 2.814070351758794e-05, + "loss": 1.0381, + "step": 144 + }, + { + "epoch": 1.6066481994459834, + "grad_norm": 0.6452569961547852, + "learning_rate": 2.763819095477387e-05, + "loss": 1.0732, + "step": 145 + }, + { + "epoch": 1.6177285318559558, + "grad_norm": 0.7449268698692322, + "learning_rate": 2.7135678391959802e-05, + "loss": 1.1638, + "step": 146 + }, + { + "epoch": 1.628808864265928, + "grad_norm": 0.7598639130592346, + "learning_rate": 2.6633165829145728e-05, + "loss": 1.2569, + "step": 147 + }, + { + "epoch": 1.6398891966759002, + "grad_norm": 0.75065678358078, + "learning_rate": 2.613065326633166e-05, + "loss": 1.1693, + "step": 148 + }, + { + "epoch": 1.6509695290858726, + "grad_norm": 0.70372474193573, + "learning_rate": 2.562814070351759e-05, + "loss": 1.1345, + "step": 149 + }, + { + "epoch": 1.662049861495845, + "grad_norm": 0.6834449172019958, + "learning_rate": 2.5125628140703518e-05, + "loss": 1.1039, + "step": 150 + }, + { + "epoch": 1.6731301939058172, + "grad_norm": 0.717481255531311, + "learning_rate": 2.462311557788945e-05, + "loss": 1.1275, + "step": 151 + }, + { + "epoch": 1.6842105263157894, + "grad_norm": 0.6743510961532593, + "learning_rate": 2.4120603015075376e-05, + "loss": 1.0413, + "step": 152 + }, + { + "epoch": 1.6952908587257618, + "grad_norm": 0.7231684327125549, + "learning_rate": 2.361809045226131e-05, + "loss": 1.1701, + "step": 153 + }, + { + "epoch": 1.7063711911357342, + "grad_norm": 0.7410204410552979, + "learning_rate": 2.3115577889447238e-05, + "loss": 1.2034, + "step": 154 + }, + { + "epoch": 1.7174515235457064, + "grad_norm": 0.7088721990585327, + "learning_rate": 2.2613065326633167e-05, + "loss": 1.1152, + "step": 155 + }, + { + "epoch": 1.7285318559556786, + "grad_norm": 0.72236168384552, + "learning_rate": 2.21105527638191e-05, + "loss": 1.1143, + "step": 156 + }, + { + "epoch": 1.739612188365651, + "grad_norm": 0.736068844795227, + "learning_rate": 2.1608040201005025e-05, + "loss": 1.1958, + "step": 157 + }, + { + "epoch": 1.7506925207756234, + "grad_norm": 0.6970533132553101, + "learning_rate": 2.1105527638190957e-05, + "loss": 1.1141, + "step": 158 + }, + { + "epoch": 1.7617728531855956, + "grad_norm": 0.7857602834701538, + "learning_rate": 2.0603015075376886e-05, + "loss": 1.1372, + "step": 159 + }, + { + "epoch": 1.7728531855955678, + "grad_norm": 0.6954653859138489, + "learning_rate": 2.0100502512562815e-05, + "loss": 1.1134, + "step": 160 + }, + { + "epoch": 1.78393351800554, + "grad_norm": 0.7295299172401428, + "learning_rate": 1.9597989949748744e-05, + "loss": 1.1519, + "step": 161 + }, + { + "epoch": 1.7950138504155124, + "grad_norm": 0.7494513392448425, + "learning_rate": 1.9095477386934673e-05, + "loss": 1.2252, + "step": 162 + }, + { + "epoch": 1.8060941828254848, + "grad_norm": 0.7257880568504333, + "learning_rate": 1.8592964824120602e-05, + "loss": 1.0802, + "step": 163 + }, + { + "epoch": 1.817174515235457, + "grad_norm": 0.7184067964553833, + "learning_rate": 1.8090452261306535e-05, + "loss": 1.0713, + "step": 164 + }, + { + "epoch": 1.8282548476454292, + "grad_norm": 0.7629296779632568, + "learning_rate": 1.7587939698492464e-05, + "loss": 1.1353, + "step": 165 + }, + { + "epoch": 1.8393351800554016, + "grad_norm": 0.7616356611251831, + "learning_rate": 1.7085427135678393e-05, + "loss": 1.0991, + "step": 166 + }, + { + "epoch": 1.850415512465374, + "grad_norm": 0.7364541292190552, + "learning_rate": 1.6582914572864322e-05, + "loss": 1.1087, + "step": 167 + }, + { + "epoch": 1.8614958448753463, + "grad_norm": 0.7199831604957581, + "learning_rate": 1.608040201005025e-05, + "loss": 1.0106, + "step": 168 + }, + { + "epoch": 1.8725761772853184, + "grad_norm": 0.7781419157981873, + "learning_rate": 1.5577889447236183e-05, + "loss": 1.1996, + "step": 169 + }, + { + "epoch": 1.8836565096952909, + "grad_norm": 0.7689887285232544, + "learning_rate": 1.507537688442211e-05, + "loss": 1.202, + "step": 170 + }, + { + "epoch": 1.8947368421052633, + "grad_norm": 0.7493013739585876, + "learning_rate": 1.457286432160804e-05, + "loss": 1.1383, + "step": 171 + }, + { + "epoch": 1.9058171745152355, + "grad_norm": 0.7968258261680603, + "learning_rate": 1.407035175879397e-05, + "loss": 1.1608, + "step": 172 + }, + { + "epoch": 1.9168975069252077, + "grad_norm": 0.764722466468811, + "learning_rate": 1.3567839195979901e-05, + "loss": 1.0487, + "step": 173 + }, + { + "epoch": 1.92797783933518, + "grad_norm": 0.7502325773239136, + "learning_rate": 1.306532663316583e-05, + "loss": 1.158, + "step": 174 + }, + { + "epoch": 1.9390581717451525, + "grad_norm": 0.7293640375137329, + "learning_rate": 1.2562814070351759e-05, + "loss": 1.0846, + "step": 175 + }, + { + "epoch": 1.9501385041551247, + "grad_norm": 0.8207630515098572, + "learning_rate": 1.2060301507537688e-05, + "loss": 1.244, + "step": 176 + }, + { + "epoch": 1.9612188365650969, + "grad_norm": 0.7213388085365295, + "learning_rate": 1.1557788944723619e-05, + "loss": 1.144, + "step": 177 + }, + { + "epoch": 1.9722991689750693, + "grad_norm": 0.7004494071006775, + "learning_rate": 1.105527638190955e-05, + "loss": 1.0342, + "step": 178 + }, + { + "epoch": 1.9833795013850417, + "grad_norm": 0.7714151740074158, + "learning_rate": 1.0552763819095479e-05, + "loss": 1.1949, + "step": 179 + }, + { + "epoch": 1.994459833795014, + "grad_norm": 0.7237851023674011, + "learning_rate": 1.0050251256281408e-05, + "loss": 1.1064, + "step": 180 + }, + { + "epoch": 2.005540166204986, + "grad_norm": 1.8300179243087769, + "learning_rate": 9.547738693467337e-06, + "loss": 1.964, + "step": 181 + }, + { + "epoch": 2.0166204986149583, + "grad_norm": 0.727444589138031, + "learning_rate": 9.045226130653267e-06, + "loss": 1.1091, + "step": 182 + }, + { + "epoch": 2.027700831024931, + "grad_norm": 0.7298402786254883, + "learning_rate": 8.542713567839196e-06, + "loss": 1.0274, + "step": 183 + }, + { + "epoch": 2.038781163434903, + "grad_norm": 0.7863662242889404, + "learning_rate": 8.040201005025125e-06, + "loss": 1.2199, + "step": 184 + }, + { + "epoch": 2.0498614958448753, + "grad_norm": 0.7304998636245728, + "learning_rate": 7.537688442211055e-06, + "loss": 1.1346, + "step": 185 + }, + { + "epoch": 2.0609418282548475, + "grad_norm": 0.713515043258667, + "learning_rate": 7.035175879396985e-06, + "loss": 1.0402, + "step": 186 + }, + { + "epoch": 2.07202216066482, + "grad_norm": 0.741409182548523, + "learning_rate": 6.532663316582915e-06, + "loss": 1.0777, + "step": 187 + }, + { + "epoch": 2.0831024930747923, + "grad_norm": 0.7387590408325195, + "learning_rate": 6.030150753768844e-06, + "loss": 1.1149, + "step": 188 + }, + { + "epoch": 2.0941828254847645, + "grad_norm": 0.7313185334205627, + "learning_rate": 5.527638190954775e-06, + "loss": 1.0968, + "step": 189 + }, + { + "epoch": 2.1052631578947367, + "grad_norm": 0.7165120840072632, + "learning_rate": 5.025125628140704e-06, + "loss": 1.126, + "step": 190 + }, + { + "epoch": 2.1163434903047094, + "grad_norm": 0.7364940643310547, + "learning_rate": 4.522613065326634e-06, + "loss": 1.0637, + "step": 191 + }, + { + "epoch": 2.1274238227146816, + "grad_norm": 0.7294583320617676, + "learning_rate": 4.020100502512563e-06, + "loss": 1.0529, + "step": 192 + }, + { + "epoch": 2.1385041551246537, + "grad_norm": 0.7198062539100647, + "learning_rate": 3.5175879396984926e-06, + "loss": 1.1796, + "step": 193 + }, + { + "epoch": 2.149584487534626, + "grad_norm": 0.7021225690841675, + "learning_rate": 3.015075376884422e-06, + "loss": 1.0443, + "step": 194 + }, + { + "epoch": 2.160664819944598, + "grad_norm": 0.6765656471252441, + "learning_rate": 2.512562814070352e-06, + "loss": 1.025, + "step": 195 + }, + { + "epoch": 2.1717451523545708, + "grad_norm": 0.7222888469696045, + "learning_rate": 2.0100502512562813e-06, + "loss": 1.0863, + "step": 196 + }, + { + "epoch": 2.182825484764543, + "grad_norm": 0.7656276226043701, + "learning_rate": 1.507537688442211e-06, + "loss": 1.1742, + "step": 197 + }, + { + "epoch": 2.193905817174515, + "grad_norm": 0.723197340965271, + "learning_rate": 1.0050251256281407e-06, + "loss": 1.1128, + "step": 198 + }, + { + "epoch": 2.2049861495844874, + "grad_norm": 0.7231385111808777, + "learning_rate": 5.025125628140703e-07, + "loss": 1.0786, + "step": 199 + }, + { + "epoch": 2.21606648199446, + "grad_norm": 0.713428258895874, + "learning_rate": 0.0, + "loss": 1.0523, + "step": 200 + } + ], + "logging_steps": 1, + "max_steps": 200, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 2.384526434766029e+16, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/llama_German_Vietnamese/checkpoint-200/training_args.bin b/llama_German_Vietnamese/checkpoint-200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..1761246eeb99323a2d4268c53c160fb543644dbb --- /dev/null +++ b/llama_German_Vietnamese/checkpoint-200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b06a1abf95f8488cc91273e81a1fd64a9a294f4d0542593b0acd563e7526e1e1 +size 5624 diff --git a/llama_Vietnamese_Chinese/checkpoint-200/README.md b/llama_Vietnamese_Chinese/checkpoint-200/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ec91fdd050e2a5613ef04bb6c8fc77a63b8b10a1 --- /dev/null +++ b/llama_Vietnamese_Chinese/checkpoint-200/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.14.0 \ No newline at end of file diff --git a/llama_Vietnamese_Chinese/checkpoint-200/adapter_config.json b/llama_Vietnamese_Chinese/checkpoint-200/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2e852ab801d19437dec2a6a2a96ec97e363ae85a --- /dev/null +++ b/llama_Vietnamese_Chinese/checkpoint-200/adapter_config.json @@ -0,0 +1,37 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit", + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "up_proj", + "k_proj", + "gate_proj", + "v_proj", + "q_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/llama_Vietnamese_Chinese/checkpoint-200/adapter_model.safetensors b/llama_Vietnamese_Chinese/checkpoint-200/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..572af40083586c5884fc3a3fa480aac1824059b0 --- /dev/null +++ b/llama_Vietnamese_Chinese/checkpoint-200/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2432e704123300fa0e85adcc4e9b6c93bd9f0ed6c5b57fc3e9d84610b3337e2 +size 167832240 diff --git a/llama_Vietnamese_Chinese/checkpoint-200/optimizer.pt b/llama_Vietnamese_Chinese/checkpoint-200/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..f4e7c9a060c8235148eb0ccf90b31c36a8e820a9 --- /dev/null +++ b/llama_Vietnamese_Chinese/checkpoint-200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d67cd75628a857937c1b896e20fb19e4d539f1f005a455b16deba36679bd69d2 +size 85723284 diff --git a/llama_Vietnamese_Chinese/checkpoint-200/rng_state.pth b/llama_Vietnamese_Chinese/checkpoint-200/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1f8291cd6ce87668b786a72f3e93d072fbe54902 --- /dev/null +++ b/llama_Vietnamese_Chinese/checkpoint-200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4c917636c7a58af68a29056522a757e9f9b99005b776641aa157c536967817d +size 14244 diff --git a/llama_Vietnamese_Chinese/checkpoint-200/scheduler.pt b/llama_Vietnamese_Chinese/checkpoint-200/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6ad11d6d5288d2841ba96a6d7d3838c80c80097a --- /dev/null +++ b/llama_Vietnamese_Chinese/checkpoint-200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96 +size 1064 diff --git a/llama_Vietnamese_Chinese/checkpoint-200/special_tokens_map.json b/llama_Vietnamese_Chinese/checkpoint-200/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/llama_Vietnamese_Chinese/checkpoint-200/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/llama_Vietnamese_Chinese/checkpoint-200/tokenizer.json b/llama_Vietnamese_Chinese/checkpoint-200/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/llama_Vietnamese_Chinese/checkpoint-200/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/llama_Vietnamese_Chinese/checkpoint-200/tokenizer_config.json b/llama_Vietnamese_Chinese/checkpoint-200/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..447bd30f21e8c20e9e6497d6cfc650a3881e26a6 --- /dev/null +++ b/llama_Vietnamese_Chinese/checkpoint-200/tokenizer_config.json @@ -0,0 +1,2064 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/llama_Vietnamese_Chinese/checkpoint-200/trainer_state.json b/llama_Vietnamese_Chinese/checkpoint-200/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d372fcc63737ab7f1f05814f3c8cf1003687768c --- /dev/null +++ b/llama_Vietnamese_Chinese/checkpoint-200/trainer_state.json @@ -0,0 +1,1433 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.7036059806508356, + "eval_steps": 500, + "global_step": 200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.003518029903254178, + "grad_norm": 1.260473608970642, + "learning_rate": 0.0001, + "loss": 2.9049, + "step": 1 + }, + { + "epoch": 0.007036059806508356, + "grad_norm": 1.2573540210723877, + "learning_rate": 9.949748743718594e-05, + "loss": 2.9229, + "step": 2 + }, + { + "epoch": 0.010554089709762533, + "grad_norm": 1.1971232891082764, + "learning_rate": 9.899497487437186e-05, + "loss": 2.7224, + "step": 3 + }, + { + "epoch": 0.014072119613016711, + "grad_norm": 1.3032786846160889, + "learning_rate": 9.84924623115578e-05, + "loss": 2.5683, + "step": 4 + }, + { + "epoch": 0.01759014951627089, + "grad_norm": 1.358597755432129, + "learning_rate": 9.798994974874372e-05, + "loss": 2.4848, + "step": 5 + }, + { + "epoch": 0.021108179419525065, + "grad_norm": 1.439229130744934, + "learning_rate": 9.748743718592965e-05, + "loss": 2.2625, + "step": 6 + }, + { + "epoch": 0.024626209322779244, + "grad_norm": 1.313475251197815, + "learning_rate": 9.698492462311559e-05, + "loss": 2.0213, + "step": 7 + }, + { + "epoch": 0.028144239226033423, + "grad_norm": 1.3972655534744263, + "learning_rate": 9.64824120603015e-05, + "loss": 2.0339, + "step": 8 + }, + { + "epoch": 0.0316622691292876, + "grad_norm": 1.2032045125961304, + "learning_rate": 9.597989949748745e-05, + "loss": 1.7561, + "step": 9 + }, + { + "epoch": 0.03518029903254178, + "grad_norm": 1.1419575214385986, + "learning_rate": 9.547738693467337e-05, + "loss": 1.6908, + "step": 10 + }, + { + "epoch": 0.03869832893579595, + "grad_norm": 1.0084314346313477, + "learning_rate": 9.49748743718593e-05, + "loss": 1.4594, + "step": 11 + }, + { + "epoch": 0.04221635883905013, + "grad_norm": 0.8950707912445068, + "learning_rate": 9.447236180904523e-05, + "loss": 1.4036, + "step": 12 + }, + { + "epoch": 0.04573438874230431, + "grad_norm": 0.735814094543457, + "learning_rate": 9.396984924623115e-05, + "loss": 1.3852, + "step": 13 + }, + { + "epoch": 0.04925241864555849, + "grad_norm": 0.7151777148246765, + "learning_rate": 9.34673366834171e-05, + "loss": 1.4866, + "step": 14 + }, + { + "epoch": 0.052770448548812667, + "grad_norm": 0.6819817423820496, + "learning_rate": 9.296482412060302e-05, + "loss": 1.342, + "step": 15 + }, + { + "epoch": 0.056288478452066845, + "grad_norm": 0.6794399619102478, + "learning_rate": 9.246231155778895e-05, + "loss": 1.3458, + "step": 16 + }, + { + "epoch": 0.05980650835532102, + "grad_norm": 0.7077609896659851, + "learning_rate": 9.195979899497488e-05, + "loss": 1.3624, + "step": 17 + }, + { + "epoch": 0.0633245382585752, + "grad_norm": 0.6987395286560059, + "learning_rate": 9.14572864321608e-05, + "loss": 1.2826, + "step": 18 + }, + { + "epoch": 0.06684256816182937, + "grad_norm": 0.7677096724510193, + "learning_rate": 9.095477386934675e-05, + "loss": 1.3643, + "step": 19 + }, + { + "epoch": 0.07036059806508356, + "grad_norm": 0.7091855406761169, + "learning_rate": 9.045226130653267e-05, + "loss": 1.2746, + "step": 20 + }, + { + "epoch": 0.07387862796833773, + "grad_norm": 0.7494800090789795, + "learning_rate": 8.99497487437186e-05, + "loss": 1.3333, + "step": 21 + }, + { + "epoch": 0.0773966578715919, + "grad_norm": 0.7067513465881348, + "learning_rate": 8.944723618090453e-05, + "loss": 1.1819, + "step": 22 + }, + { + "epoch": 0.08091468777484609, + "grad_norm": 0.7177988290786743, + "learning_rate": 8.894472361809045e-05, + "loss": 1.248, + "step": 23 + }, + { + "epoch": 0.08443271767810026, + "grad_norm": 0.8215416669845581, + "learning_rate": 8.84422110552764e-05, + "loss": 1.2657, + "step": 24 + }, + { + "epoch": 0.08795074758135445, + "grad_norm": 0.7781690955162048, + "learning_rate": 8.793969849246232e-05, + "loss": 1.2697, + "step": 25 + }, + { + "epoch": 0.09146877748460862, + "grad_norm": 0.7476558685302734, + "learning_rate": 8.743718592964825e-05, + "loss": 1.1899, + "step": 26 + }, + { + "epoch": 0.09498680738786279, + "grad_norm": 0.7552306056022644, + "learning_rate": 8.693467336683418e-05, + "loss": 1.2209, + "step": 27 + }, + { + "epoch": 0.09850483729111698, + "grad_norm": 0.8574095368385315, + "learning_rate": 8.64321608040201e-05, + "loss": 1.2418, + "step": 28 + }, + { + "epoch": 0.10202286719437115, + "grad_norm": 0.8529037833213806, + "learning_rate": 8.592964824120603e-05, + "loss": 1.2028, + "step": 29 + }, + { + "epoch": 0.10554089709762533, + "grad_norm": 0.9519858360290527, + "learning_rate": 8.542713567839196e-05, + "loss": 1.2501, + "step": 30 + }, + { + "epoch": 0.1090589270008795, + "grad_norm": 0.929736852645874, + "learning_rate": 8.49246231155779e-05, + "loss": 1.162, + "step": 31 + }, + { + "epoch": 0.11257695690413369, + "grad_norm": 1.0004149675369263, + "learning_rate": 8.442211055276383e-05, + "loss": 1.297, + "step": 32 + }, + { + "epoch": 0.11609498680738786, + "grad_norm": 0.9739089608192444, + "learning_rate": 8.391959798994975e-05, + "loss": 1.2161, + "step": 33 + }, + { + "epoch": 0.11961301671064203, + "grad_norm": 1.010574460029602, + "learning_rate": 8.341708542713568e-05, + "loss": 1.2653, + "step": 34 + }, + { + "epoch": 0.12313104661389622, + "grad_norm": 1.1182818412780762, + "learning_rate": 8.291457286432161e-05, + "loss": 1.2406, + "step": 35 + }, + { + "epoch": 0.1266490765171504, + "grad_norm": 1.0420522689819336, + "learning_rate": 8.241206030150754e-05, + "loss": 1.1295, + "step": 36 + }, + { + "epoch": 0.13016710642040458, + "grad_norm": 0.9873694777488708, + "learning_rate": 8.190954773869348e-05, + "loss": 1.1819, + "step": 37 + }, + { + "epoch": 0.13368513632365875, + "grad_norm": 0.9985349774360657, + "learning_rate": 8.14070351758794e-05, + "loss": 1.0707, + "step": 38 + }, + { + "epoch": 0.13720316622691292, + "grad_norm": 0.9374362826347351, + "learning_rate": 8.090452261306533e-05, + "loss": 1.1942, + "step": 39 + }, + { + "epoch": 0.14072119613016712, + "grad_norm": 0.9439392685890198, + "learning_rate": 8.040201005025126e-05, + "loss": 1.249, + "step": 40 + }, + { + "epoch": 0.1442392260334213, + "grad_norm": 0.7446234822273254, + "learning_rate": 7.989949748743719e-05, + "loss": 1.2198, + "step": 41 + }, + { + "epoch": 0.14775725593667546, + "grad_norm": 0.6614280939102173, + "learning_rate": 7.939698492462313e-05, + "loss": 1.147, + "step": 42 + }, + { + "epoch": 0.15127528583992964, + "grad_norm": 0.7048504948616028, + "learning_rate": 7.889447236180904e-05, + "loss": 1.2024, + "step": 43 + }, + { + "epoch": 0.1547933157431838, + "grad_norm": 0.6736462116241455, + "learning_rate": 7.839195979899498e-05, + "loss": 1.1622, + "step": 44 + }, + { + "epoch": 0.158311345646438, + "grad_norm": 0.651723325252533, + "learning_rate": 7.788944723618091e-05, + "loss": 1.115, + "step": 45 + }, + { + "epoch": 0.16182937554969218, + "grad_norm": 0.6364433169364929, + "learning_rate": 7.738693467336684e-05, + "loss": 1.1094, + "step": 46 + }, + { + "epoch": 0.16534740545294635, + "grad_norm": 0.6916666626930237, + "learning_rate": 7.688442211055277e-05, + "loss": 1.0492, + "step": 47 + }, + { + "epoch": 0.16886543535620052, + "grad_norm": 0.6645711064338684, + "learning_rate": 7.638190954773869e-05, + "loss": 1.1187, + "step": 48 + }, + { + "epoch": 0.1723834652594547, + "grad_norm": 0.6615894436836243, + "learning_rate": 7.587939698492463e-05, + "loss": 1.0937, + "step": 49 + }, + { + "epoch": 0.1759014951627089, + "grad_norm": 0.6639358997344971, + "learning_rate": 7.537688442211056e-05, + "loss": 1.1598, + "step": 50 + }, + { + "epoch": 0.17941952506596306, + "grad_norm": 0.7072437405586243, + "learning_rate": 7.487437185929649e-05, + "loss": 1.0835, + "step": 51 + }, + { + "epoch": 0.18293755496921724, + "grad_norm": 0.6130443811416626, + "learning_rate": 7.437185929648241e-05, + "loss": 1.1116, + "step": 52 + }, + { + "epoch": 0.1864555848724714, + "grad_norm": 0.6709645986557007, + "learning_rate": 7.386934673366834e-05, + "loss": 1.0814, + "step": 53 + }, + { + "epoch": 0.18997361477572558, + "grad_norm": 0.724520742893219, + "learning_rate": 7.336683417085427e-05, + "loss": 1.1467, + "step": 54 + }, + { + "epoch": 0.19349164467897978, + "grad_norm": 0.7026971578598022, + "learning_rate": 7.28643216080402e-05, + "loss": 1.1671, + "step": 55 + }, + { + "epoch": 0.19700967458223395, + "grad_norm": 0.6167840957641602, + "learning_rate": 7.236180904522614e-05, + "loss": 1.0342, + "step": 56 + }, + { + "epoch": 0.20052770448548812, + "grad_norm": 0.6177359819412231, + "learning_rate": 7.185929648241206e-05, + "loss": 0.9947, + "step": 57 + }, + { + "epoch": 0.2040457343887423, + "grad_norm": 0.6578599810600281, + "learning_rate": 7.135678391959799e-05, + "loss": 1.083, + "step": 58 + }, + { + "epoch": 0.2075637642919965, + "grad_norm": 0.6553827524185181, + "learning_rate": 7.085427135678392e-05, + "loss": 1.1451, + "step": 59 + }, + { + "epoch": 0.21108179419525067, + "grad_norm": 0.6147003769874573, + "learning_rate": 7.035175879396985e-05, + "loss": 1.1026, + "step": 60 + }, + { + "epoch": 0.21459982409850484, + "grad_norm": 0.6656669974327087, + "learning_rate": 6.984924623115579e-05, + "loss": 1.0638, + "step": 61 + }, + { + "epoch": 0.218117854001759, + "grad_norm": 0.6732162237167358, + "learning_rate": 6.93467336683417e-05, + "loss": 1.1639, + "step": 62 + }, + { + "epoch": 0.22163588390501318, + "grad_norm": 0.6583305597305298, + "learning_rate": 6.884422110552764e-05, + "loss": 1.0694, + "step": 63 + }, + { + "epoch": 0.22515391380826738, + "grad_norm": 0.573226809501648, + "learning_rate": 6.834170854271357e-05, + "loss": 1.0237, + "step": 64 + }, + { + "epoch": 0.22867194371152155, + "grad_norm": 0.6894761919975281, + "learning_rate": 6.78391959798995e-05, + "loss": 1.0884, + "step": 65 + }, + { + "epoch": 0.23218997361477572, + "grad_norm": 0.6620854735374451, + "learning_rate": 6.733668341708544e-05, + "loss": 1.0515, + "step": 66 + }, + { + "epoch": 0.2357080035180299, + "grad_norm": 0.695426344871521, + "learning_rate": 6.683417085427135e-05, + "loss": 1.1149, + "step": 67 + }, + { + "epoch": 0.23922603342128407, + "grad_norm": 0.6343066096305847, + "learning_rate": 6.633165829145729e-05, + "loss": 1.0736, + "step": 68 + }, + { + "epoch": 0.24274406332453827, + "grad_norm": 0.6478216052055359, + "learning_rate": 6.582914572864322e-05, + "loss": 1.0576, + "step": 69 + }, + { + "epoch": 0.24626209322779244, + "grad_norm": 0.7264822125434875, + "learning_rate": 6.532663316582915e-05, + "loss": 1.16, + "step": 70 + }, + { + "epoch": 0.2497801231310466, + "grad_norm": 0.7927188277244568, + "learning_rate": 6.482412060301508e-05, + "loss": 1.0784, + "step": 71 + }, + { + "epoch": 0.2532981530343008, + "grad_norm": 0.6734123826026917, + "learning_rate": 6.4321608040201e-05, + "loss": 1.1155, + "step": 72 + }, + { + "epoch": 0.256816182937555, + "grad_norm": 0.6928442120552063, + "learning_rate": 6.381909547738694e-05, + "loss": 1.1, + "step": 73 + }, + { + "epoch": 0.26033421284080915, + "grad_norm": 0.6205620765686035, + "learning_rate": 6.331658291457287e-05, + "loss": 1.0557, + "step": 74 + }, + { + "epoch": 0.2638522427440633, + "grad_norm": 0.6895455718040466, + "learning_rate": 6.28140703517588e-05, + "loss": 1.0893, + "step": 75 + }, + { + "epoch": 0.2673702726473175, + "grad_norm": 0.7075064778327942, + "learning_rate": 6.231155778894473e-05, + "loss": 1.0281, + "step": 76 + }, + { + "epoch": 0.27088830255057167, + "grad_norm": 0.8777890801429749, + "learning_rate": 6.180904522613065e-05, + "loss": 1.1, + "step": 77 + }, + { + "epoch": 0.27440633245382584, + "grad_norm": 0.7415732145309448, + "learning_rate": 6.130653266331658e-05, + "loss": 1.1266, + "step": 78 + }, + { + "epoch": 0.27792436235708, + "grad_norm": 0.6941065192222595, + "learning_rate": 6.080402010050251e-05, + "loss": 1.1073, + "step": 79 + }, + { + "epoch": 0.28144239226033424, + "grad_norm": 0.713752269744873, + "learning_rate": 6.030150753768844e-05, + "loss": 1.0299, + "step": 80 + }, + { + "epoch": 0.2849604221635884, + "grad_norm": 0.672386884689331, + "learning_rate": 5.979899497487438e-05, + "loss": 1.1285, + "step": 81 + }, + { + "epoch": 0.2884784520668426, + "grad_norm": 0.6600875854492188, + "learning_rate": 5.929648241206031e-05, + "loss": 1.0618, + "step": 82 + }, + { + "epoch": 0.29199648197009676, + "grad_norm": 0.7304966449737549, + "learning_rate": 5.879396984924623e-05, + "loss": 1.068, + "step": 83 + }, + { + "epoch": 0.2955145118733509, + "grad_norm": 0.7191479206085205, + "learning_rate": 5.829145728643216e-05, + "loss": 1.0915, + "step": 84 + }, + { + "epoch": 0.2990325417766051, + "grad_norm": 0.6817315220832825, + "learning_rate": 5.778894472361809e-05, + "loss": 1.0081, + "step": 85 + }, + { + "epoch": 0.30255057167985927, + "grad_norm": 0.7097010016441345, + "learning_rate": 5.728643216080403e-05, + "loss": 1.0442, + "step": 86 + }, + { + "epoch": 0.30606860158311344, + "grad_norm": 0.7585952281951904, + "learning_rate": 5.6783919597989955e-05, + "loss": 1.0238, + "step": 87 + }, + { + "epoch": 0.3095866314863676, + "grad_norm": 0.7607995271682739, + "learning_rate": 5.628140703517588e-05, + "loss": 1.0959, + "step": 88 + }, + { + "epoch": 0.3131046613896218, + "grad_norm": 0.67258220911026, + "learning_rate": 5.577889447236181e-05, + "loss": 0.9929, + "step": 89 + }, + { + "epoch": 0.316622691292876, + "grad_norm": 0.75568026304245, + "learning_rate": 5.527638190954774e-05, + "loss": 1.105, + "step": 90 + }, + { + "epoch": 0.3201407211961302, + "grad_norm": 0.8852781057357788, + "learning_rate": 5.477386934673368e-05, + "loss": 1.083, + "step": 91 + }, + { + "epoch": 0.32365875109938436, + "grad_norm": 0.6639973521232605, + "learning_rate": 5.4271356783919604e-05, + "loss": 1.073, + "step": 92 + }, + { + "epoch": 0.32717678100263853, + "grad_norm": 0.7528688311576843, + "learning_rate": 5.376884422110553e-05, + "loss": 1.0957, + "step": 93 + }, + { + "epoch": 0.3306948109058927, + "grad_norm": 0.7375084757804871, + "learning_rate": 5.3266331658291455e-05, + "loss": 1.0804, + "step": 94 + }, + { + "epoch": 0.33421284080914687, + "grad_norm": 0.8116129040718079, + "learning_rate": 5.276381909547739e-05, + "loss": 1.0797, + "step": 95 + }, + { + "epoch": 0.33773087071240104, + "grad_norm": 0.7964279055595398, + "learning_rate": 5.226130653266332e-05, + "loss": 1.1213, + "step": 96 + }, + { + "epoch": 0.3412489006156552, + "grad_norm": 0.765575110912323, + "learning_rate": 5.175879396984925e-05, + "loss": 1.0384, + "step": 97 + }, + { + "epoch": 0.3447669305189094, + "grad_norm": 0.6614196300506592, + "learning_rate": 5.125628140703518e-05, + "loss": 1.0332, + "step": 98 + }, + { + "epoch": 0.3482849604221636, + "grad_norm": 0.7407499551773071, + "learning_rate": 5.0753768844221104e-05, + "loss": 1.0688, + "step": 99 + }, + { + "epoch": 0.3518029903254178, + "grad_norm": 0.8672274947166443, + "learning_rate": 5.0251256281407036e-05, + "loss": 1.0742, + "step": 100 + }, + { + "epoch": 0.35532102022867196, + "grad_norm": 0.6899972558021545, + "learning_rate": 4.974874371859297e-05, + "loss": 0.9776, + "step": 101 + }, + { + "epoch": 0.35883905013192613, + "grad_norm": 0.7466877698898315, + "learning_rate": 4.92462311557789e-05, + "loss": 1.0293, + "step": 102 + }, + { + "epoch": 0.3623570800351803, + "grad_norm": 0.7986593842506409, + "learning_rate": 4.874371859296483e-05, + "loss": 1.0399, + "step": 103 + }, + { + "epoch": 0.3658751099384345, + "grad_norm": 0.6813223958015442, + "learning_rate": 4.824120603015075e-05, + "loss": 1.063, + "step": 104 + }, + { + "epoch": 0.36939313984168864, + "grad_norm": 0.7377122044563293, + "learning_rate": 4.7738693467336685e-05, + "loss": 0.9959, + "step": 105 + }, + { + "epoch": 0.3729111697449428, + "grad_norm": 0.7429965138435364, + "learning_rate": 4.723618090452262e-05, + "loss": 1.0617, + "step": 106 + }, + { + "epoch": 0.376429199648197, + "grad_norm": 0.8200985193252563, + "learning_rate": 4.673366834170855e-05, + "loss": 1.069, + "step": 107 + }, + { + "epoch": 0.37994722955145116, + "grad_norm": 0.734062910079956, + "learning_rate": 4.6231155778894475e-05, + "loss": 1.1513, + "step": 108 + }, + { + "epoch": 0.3834652594547054, + "grad_norm": 0.8677653670310974, + "learning_rate": 4.57286432160804e-05, + "loss": 1.1646, + "step": 109 + }, + { + "epoch": 0.38698328935795956, + "grad_norm": 0.7318121790885925, + "learning_rate": 4.522613065326633e-05, + "loss": 1.0443, + "step": 110 + }, + { + "epoch": 0.39050131926121373, + "grad_norm": 0.8211216330528259, + "learning_rate": 4.4723618090452266e-05, + "loss": 1.1295, + "step": 111 + }, + { + "epoch": 0.3940193491644679, + "grad_norm": 0.6949535608291626, + "learning_rate": 4.42211055276382e-05, + "loss": 1.0175, + "step": 112 + }, + { + "epoch": 0.3975373790677221, + "grad_norm": 0.7230639457702637, + "learning_rate": 4.3718592964824124e-05, + "loss": 1.0341, + "step": 113 + }, + { + "epoch": 0.40105540897097625, + "grad_norm": 0.793847918510437, + "learning_rate": 4.321608040201005e-05, + "loss": 1.0576, + "step": 114 + }, + { + "epoch": 0.4045734388742304, + "grad_norm": 0.7108281850814819, + "learning_rate": 4.271356783919598e-05, + "loss": 1.0636, + "step": 115 + }, + { + "epoch": 0.4080914687774846, + "grad_norm": 0.7297809720039368, + "learning_rate": 4.2211055276381914e-05, + "loss": 1.0821, + "step": 116 + }, + { + "epoch": 0.41160949868073876, + "grad_norm": 0.6856512427330017, + "learning_rate": 4.170854271356784e-05, + "loss": 0.9826, + "step": 117 + }, + { + "epoch": 0.415127528583993, + "grad_norm": 0.7112051248550415, + "learning_rate": 4.120603015075377e-05, + "loss": 1.0463, + "step": 118 + }, + { + "epoch": 0.41864555848724716, + "grad_norm": 0.6769644021987915, + "learning_rate": 4.07035175879397e-05, + "loss": 1.0091, + "step": 119 + }, + { + "epoch": 0.42216358839050133, + "grad_norm": 0.7250102758407593, + "learning_rate": 4.020100502512563e-05, + "loss": 1.0686, + "step": 120 + }, + { + "epoch": 0.4256816182937555, + "grad_norm": 0.7410470843315125, + "learning_rate": 3.969849246231156e-05, + "loss": 1.0755, + "step": 121 + }, + { + "epoch": 0.4291996481970097, + "grad_norm": 0.7236255407333374, + "learning_rate": 3.919597989949749e-05, + "loss": 1.0721, + "step": 122 + }, + { + "epoch": 0.43271767810026385, + "grad_norm": 0.7625666856765747, + "learning_rate": 3.869346733668342e-05, + "loss": 0.966, + "step": 123 + }, + { + "epoch": 0.436235708003518, + "grad_norm": 0.7245182394981384, + "learning_rate": 3.8190954773869346e-05, + "loss": 1.0801, + "step": 124 + }, + { + "epoch": 0.4397537379067722, + "grad_norm": 0.7869658470153809, + "learning_rate": 3.768844221105528e-05, + "loss": 1.022, + "step": 125 + }, + { + "epoch": 0.44327176781002636, + "grad_norm": 0.7516188621520996, + "learning_rate": 3.7185929648241204e-05, + "loss": 1.0499, + "step": 126 + }, + { + "epoch": 0.4467897977132806, + "grad_norm": 0.7964783906936646, + "learning_rate": 3.668341708542714e-05, + "loss": 1.0321, + "step": 127 + }, + { + "epoch": 0.45030782761653476, + "grad_norm": 0.8271761536598206, + "learning_rate": 3.618090452261307e-05, + "loss": 1.0488, + "step": 128 + }, + { + "epoch": 0.45382585751978893, + "grad_norm": 0.69193434715271, + "learning_rate": 3.5678391959798995e-05, + "loss": 0.9999, + "step": 129 + }, + { + "epoch": 0.4573438874230431, + "grad_norm": 0.7824375033378601, + "learning_rate": 3.517587939698493e-05, + "loss": 1.0199, + "step": 130 + }, + { + "epoch": 0.4608619173262973, + "grad_norm": 0.7616211771965027, + "learning_rate": 3.467336683417085e-05, + "loss": 0.9752, + "step": 131 + }, + { + "epoch": 0.46437994722955145, + "grad_norm": 0.7464612126350403, + "learning_rate": 3.4170854271356785e-05, + "loss": 0.9756, + "step": 132 + }, + { + "epoch": 0.4678979771328056, + "grad_norm": 0.7916256189346313, + "learning_rate": 3.366834170854272e-05, + "loss": 1.1048, + "step": 133 + }, + { + "epoch": 0.4714160070360598, + "grad_norm": 0.7534184455871582, + "learning_rate": 3.3165829145728643e-05, + "loss": 0.9938, + "step": 134 + }, + { + "epoch": 0.47493403693931396, + "grad_norm": 0.6909853219985962, + "learning_rate": 3.2663316582914576e-05, + "loss": 0.9762, + "step": 135 + }, + { + "epoch": 0.47845206684256814, + "grad_norm": 0.7753147482872009, + "learning_rate": 3.21608040201005e-05, + "loss": 1.105, + "step": 136 + }, + { + "epoch": 0.48197009674582236, + "grad_norm": 0.7884505391120911, + "learning_rate": 3.1658291457286434e-05, + "loss": 1.0783, + "step": 137 + }, + { + "epoch": 0.48548812664907653, + "grad_norm": 0.7265881896018982, + "learning_rate": 3.1155778894472366e-05, + "loss": 0.9864, + "step": 138 + }, + { + "epoch": 0.4890061565523307, + "grad_norm": 0.7939391732215881, + "learning_rate": 3.065326633165829e-05, + "loss": 1.1004, + "step": 139 + }, + { + "epoch": 0.4925241864555849, + "grad_norm": 0.739389955997467, + "learning_rate": 3.015075376884422e-05, + "loss": 1.0617, + "step": 140 + }, + { + "epoch": 0.49604221635883905, + "grad_norm": 0.8098007440567017, + "learning_rate": 2.9648241206030153e-05, + "loss": 1.0949, + "step": 141 + }, + { + "epoch": 0.4995602462620932, + "grad_norm": 0.8120628595352173, + "learning_rate": 2.914572864321608e-05, + "loss": 1.0509, + "step": 142 + }, + { + "epoch": 0.5030782761653474, + "grad_norm": 0.8424797654151917, + "learning_rate": 2.8643216080402015e-05, + "loss": 1.1095, + "step": 143 + }, + { + "epoch": 0.5065963060686016, + "grad_norm": 0.7528412938117981, + "learning_rate": 2.814070351758794e-05, + "loss": 0.9922, + "step": 144 + }, + { + "epoch": 0.5101143359718557, + "grad_norm": 0.7280577421188354, + "learning_rate": 2.763819095477387e-05, + "loss": 1.0284, + "step": 145 + }, + { + "epoch": 0.51363236587511, + "grad_norm": 0.7835600972175598, + "learning_rate": 2.7135678391959802e-05, + "loss": 1.0266, + "step": 146 + }, + { + "epoch": 0.5171503957783641, + "grad_norm": 0.7442212104797363, + "learning_rate": 2.6633165829145728e-05, + "loss": 1.0308, + "step": 147 + }, + { + "epoch": 0.5206684256816183, + "grad_norm": 0.7954034209251404, + "learning_rate": 2.613065326633166e-05, + "loss": 1.1187, + "step": 148 + }, + { + "epoch": 0.5241864555848724, + "grad_norm": 0.6770613193511963, + "learning_rate": 2.562814070351759e-05, + "loss": 0.9689, + "step": 149 + }, + { + "epoch": 0.5277044854881267, + "grad_norm": 0.7365478277206421, + "learning_rate": 2.5125628140703518e-05, + "loss": 0.9841, + "step": 150 + }, + { + "epoch": 0.5312225153913809, + "grad_norm": 0.7578640580177307, + "learning_rate": 2.462311557788945e-05, + "loss": 1.041, + "step": 151 + }, + { + "epoch": 0.534740545294635, + "grad_norm": 0.7007668614387512, + "learning_rate": 2.4120603015075376e-05, + "loss": 1.0407, + "step": 152 + }, + { + "epoch": 0.5382585751978892, + "grad_norm": 0.7602474689483643, + "learning_rate": 2.361809045226131e-05, + "loss": 0.9901, + "step": 153 + }, + { + "epoch": 0.5417766051011433, + "grad_norm": 0.8691968321800232, + "learning_rate": 2.3115577889447238e-05, + "loss": 1.0138, + "step": 154 + }, + { + "epoch": 0.5452946350043976, + "grad_norm": 0.7328104376792908, + "learning_rate": 2.2613065326633167e-05, + "loss": 1.0706, + "step": 155 + }, + { + "epoch": 0.5488126649076517, + "grad_norm": 0.7762755751609802, + "learning_rate": 2.21105527638191e-05, + "loss": 1.0248, + "step": 156 + }, + { + "epoch": 0.5523306948109059, + "grad_norm": 0.854016900062561, + "learning_rate": 2.1608040201005025e-05, + "loss": 1.0046, + "step": 157 + }, + { + "epoch": 0.55584872471416, + "grad_norm": 0.7478740215301514, + "learning_rate": 2.1105527638190957e-05, + "loss": 1.0434, + "step": 158 + }, + { + "epoch": 0.5593667546174143, + "grad_norm": 0.8043814301490784, + "learning_rate": 2.0603015075376886e-05, + "loss": 1.0655, + "step": 159 + }, + { + "epoch": 0.5628847845206685, + "grad_norm": 0.8493765592575073, + "learning_rate": 2.0100502512562815e-05, + "loss": 1.0395, + "step": 160 + }, + { + "epoch": 0.5664028144239226, + "grad_norm": 0.783991813659668, + "learning_rate": 1.9597989949748744e-05, + "loss": 0.9494, + "step": 161 + }, + { + "epoch": 0.5699208443271768, + "grad_norm": 0.7609344124794006, + "learning_rate": 1.9095477386934673e-05, + "loss": 1.055, + "step": 162 + }, + { + "epoch": 0.5734388742304309, + "grad_norm": 0.766476035118103, + "learning_rate": 1.8592964824120602e-05, + "loss": 1.0782, + "step": 163 + }, + { + "epoch": 0.5769569041336852, + "grad_norm": 0.7780715227127075, + "learning_rate": 1.8090452261306535e-05, + "loss": 0.9793, + "step": 164 + }, + { + "epoch": 0.5804749340369393, + "grad_norm": 0.7344515919685364, + "learning_rate": 1.7587939698492464e-05, + "loss": 1.0129, + "step": 165 + }, + { + "epoch": 0.5839929639401935, + "grad_norm": 0.7865444421768188, + "learning_rate": 1.7085427135678393e-05, + "loss": 1.0503, + "step": 166 + }, + { + "epoch": 0.5875109938434476, + "grad_norm": 0.8012449741363525, + "learning_rate": 1.6582914572864322e-05, + "loss": 1.0298, + "step": 167 + }, + { + "epoch": 0.5910290237467019, + "grad_norm": 0.8140902519226074, + "learning_rate": 1.608040201005025e-05, + "loss": 1.1027, + "step": 168 + }, + { + "epoch": 0.594547053649956, + "grad_norm": 0.9053994417190552, + "learning_rate": 1.5577889447236183e-05, + "loss": 1.0591, + "step": 169 + }, + { + "epoch": 0.5980650835532102, + "grad_norm": 0.7367292642593384, + "learning_rate": 1.507537688442211e-05, + "loss": 1.0475, + "step": 170 + }, + { + "epoch": 0.6015831134564644, + "grad_norm": 0.8504379391670227, + "learning_rate": 1.457286432160804e-05, + "loss": 0.9989, + "step": 171 + }, + { + "epoch": 0.6051011433597185, + "grad_norm": 0.7499436736106873, + "learning_rate": 1.407035175879397e-05, + "loss": 1.0329, + "step": 172 + }, + { + "epoch": 0.6086191732629728, + "grad_norm": 0.8187640309333801, + "learning_rate": 1.3567839195979901e-05, + "loss": 1.0425, + "step": 173 + }, + { + "epoch": 0.6121372031662269, + "grad_norm": 0.7070643305778503, + "learning_rate": 1.306532663316583e-05, + "loss": 0.9766, + "step": 174 + }, + { + "epoch": 0.6156552330694811, + "grad_norm": 0.8162341713905334, + "learning_rate": 1.2562814070351759e-05, + "loss": 0.9974, + "step": 175 + }, + { + "epoch": 0.6191732629727352, + "grad_norm": 0.7759721875190735, + "learning_rate": 1.2060301507537688e-05, + "loss": 1.0475, + "step": 176 + }, + { + "epoch": 0.6226912928759895, + "grad_norm": 0.7885333299636841, + "learning_rate": 1.1557788944723619e-05, + "loss": 1.0531, + "step": 177 + }, + { + "epoch": 0.6262093227792436, + "grad_norm": 0.7671830654144287, + "learning_rate": 1.105527638190955e-05, + "loss": 0.9974, + "step": 178 + }, + { + "epoch": 0.6297273526824978, + "grad_norm": 0.7737442851066589, + "learning_rate": 1.0552763819095479e-05, + "loss": 1.0145, + "step": 179 + }, + { + "epoch": 0.633245382585752, + "grad_norm": 0.8488346338272095, + "learning_rate": 1.0050251256281408e-05, + "loss": 1.024, + "step": 180 + }, + { + "epoch": 0.6367634124890061, + "grad_norm": 0.7485771775245667, + "learning_rate": 9.547738693467337e-06, + "loss": 1.0519, + "step": 181 + }, + { + "epoch": 0.6402814423922604, + "grad_norm": 0.8044915795326233, + "learning_rate": 9.045226130653267e-06, + "loss": 0.977, + "step": 182 + }, + { + "epoch": 0.6437994722955145, + "grad_norm": 0.8789907693862915, + "learning_rate": 8.542713567839196e-06, + "loss": 1.0284, + "step": 183 + }, + { + "epoch": 0.6473175021987687, + "grad_norm": 0.7542572617530823, + "learning_rate": 8.040201005025125e-06, + "loss": 1.0125, + "step": 184 + }, + { + "epoch": 0.6508355321020228, + "grad_norm": 0.7793267965316772, + "learning_rate": 7.537688442211055e-06, + "loss": 1.0383, + "step": 185 + }, + { + "epoch": 0.6543535620052771, + "grad_norm": 0.774917721748352, + "learning_rate": 7.035175879396985e-06, + "loss": 1.0392, + "step": 186 + }, + { + "epoch": 0.6578715919085312, + "grad_norm": 0.8436054587364197, + "learning_rate": 6.532663316582915e-06, + "loss": 1.0772, + "step": 187 + }, + { + "epoch": 0.6613896218117854, + "grad_norm": 0.7968306541442871, + "learning_rate": 6.030150753768844e-06, + "loss": 1.0723, + "step": 188 + }, + { + "epoch": 0.6649076517150396, + "grad_norm": 0.8724409341812134, + "learning_rate": 5.527638190954775e-06, + "loss": 1.0429, + "step": 189 + }, + { + "epoch": 0.6684256816182937, + "grad_norm": 0.9110769033432007, + "learning_rate": 5.025125628140704e-06, + "loss": 1.0439, + "step": 190 + }, + { + "epoch": 0.671943711521548, + "grad_norm": 0.8945828080177307, + "learning_rate": 4.522613065326634e-06, + "loss": 1.0797, + "step": 191 + }, + { + "epoch": 0.6754617414248021, + "grad_norm": 0.8030518889427185, + "learning_rate": 4.020100502512563e-06, + "loss": 1.0457, + "step": 192 + }, + { + "epoch": 0.6789797713280563, + "grad_norm": 0.8692275285720825, + "learning_rate": 3.5175879396984926e-06, + "loss": 1.0895, + "step": 193 + }, + { + "epoch": 0.6824978012313104, + "grad_norm": 0.7445128560066223, + "learning_rate": 3.015075376884422e-06, + "loss": 0.9904, + "step": 194 + }, + { + "epoch": 0.6860158311345647, + "grad_norm": 0.7754868865013123, + "learning_rate": 2.512562814070352e-06, + "loss": 1.0576, + "step": 195 + }, + { + "epoch": 0.6895338610378188, + "grad_norm": 0.8235899806022644, + "learning_rate": 2.0100502512562813e-06, + "loss": 0.9928, + "step": 196 + }, + { + "epoch": 0.693051890941073, + "grad_norm": 0.8219490051269531, + "learning_rate": 1.507537688442211e-06, + "loss": 1.0847, + "step": 197 + }, + { + "epoch": 0.6965699208443272, + "grad_norm": 0.7800722122192383, + "learning_rate": 1.0050251256281407e-06, + "loss": 1.0303, + "step": 198 + }, + { + "epoch": 0.7000879507475813, + "grad_norm": 0.8147994875907898, + "learning_rate": 5.025125628140703e-07, + "loss": 1.045, + "step": 199 + }, + { + "epoch": 0.7036059806508356, + "grad_norm": 0.7462975978851318, + "learning_rate": 0.0, + "loss": 0.9956, + "step": 200 + } + ], + "logging_steps": 1, + "max_steps": 200, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.871454142739251e+16, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/llama_Vietnamese_Chinese/checkpoint-200/training_args.bin b/llama_Vietnamese_Chinese/checkpoint-200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..170a1890aae16bdb5aeb793ef80fe414f6cc7c06 --- /dev/null +++ b/llama_Vietnamese_Chinese/checkpoint-200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5920c1d910635ce9ccdcc12db27c336dc5880803b81f5493bc3fb45dd7a8d51 +size 5624 diff --git a/llama_Vietnamese_English/checkpoint-200/README.md b/llama_Vietnamese_English/checkpoint-200/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ec91fdd050e2a5613ef04bb6c8fc77a63b8b10a1 --- /dev/null +++ b/llama_Vietnamese_English/checkpoint-200/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.14.0 \ No newline at end of file diff --git a/llama_Vietnamese_English/checkpoint-200/adapter_config.json b/llama_Vietnamese_English/checkpoint-200/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..03af368a611b1fe7dd495e2a18d9db75cc1171d2 --- /dev/null +++ b/llama_Vietnamese_English/checkpoint-200/adapter_config.json @@ -0,0 +1,37 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit", + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "q_proj", + "k_proj", + "down_proj", + "gate_proj", + "up_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/llama_Vietnamese_English/checkpoint-200/adapter_model.safetensors b/llama_Vietnamese_English/checkpoint-200/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f9cdaeb3a4a175e1387b4b2eecb72e1024b80288 --- /dev/null +++ b/llama_Vietnamese_English/checkpoint-200/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fef86f02f2b9822d1dcef80687c1c354af173af2b4809c91c8ef1ecf5482fc3a +size 167832240 diff --git a/llama_Vietnamese_English/checkpoint-200/optimizer.pt b/llama_Vietnamese_English/checkpoint-200/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..ac2ef3b9c3288d02893b1720d57e454a43b86395 --- /dev/null +++ b/llama_Vietnamese_English/checkpoint-200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f665532ddf4c645b42bb3e4b169ec476320077b674920452fcafe203bed1561 +size 85723284 diff --git a/llama_Vietnamese_English/checkpoint-200/rng_state.pth b/llama_Vietnamese_English/checkpoint-200/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1f8291cd6ce87668b786a72f3e93d072fbe54902 --- /dev/null +++ b/llama_Vietnamese_English/checkpoint-200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4c917636c7a58af68a29056522a757e9f9b99005b776641aa157c536967817d +size 14244 diff --git a/llama_Vietnamese_English/checkpoint-200/scheduler.pt b/llama_Vietnamese_English/checkpoint-200/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6ad11d6d5288d2841ba96a6d7d3838c80c80097a --- /dev/null +++ b/llama_Vietnamese_English/checkpoint-200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96 +size 1064 diff --git a/llama_Vietnamese_English/checkpoint-200/special_tokens_map.json b/llama_Vietnamese_English/checkpoint-200/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/llama_Vietnamese_English/checkpoint-200/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/llama_Vietnamese_English/checkpoint-200/tokenizer.json b/llama_Vietnamese_English/checkpoint-200/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/llama_Vietnamese_English/checkpoint-200/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/llama_Vietnamese_English/checkpoint-200/tokenizer_config.json b/llama_Vietnamese_English/checkpoint-200/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..447bd30f21e8c20e9e6497d6cfc650a3881e26a6 --- /dev/null +++ b/llama_Vietnamese_English/checkpoint-200/tokenizer_config.json @@ -0,0 +1,2064 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/llama_Vietnamese_English/checkpoint-200/trainer_state.json b/llama_Vietnamese_English/checkpoint-200/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b1d3cdf7681d6737d39f45d74c872ca878059ac1 --- /dev/null +++ b/llama_Vietnamese_English/checkpoint-200/trainer_state.json @@ -0,0 +1,1433 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.7036059806508356, + "eval_steps": 500, + "global_step": 200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.003518029903254178, + "grad_norm": 1.4669007062911987, + "learning_rate": 0.0001, + "loss": 2.9718, + "step": 1 + }, + { + "epoch": 0.007036059806508356, + "grad_norm": 1.5240416526794434, + "learning_rate": 9.949748743718594e-05, + "loss": 3.0249, + "step": 2 + }, + { + "epoch": 0.010554089709762533, + "grad_norm": 1.3310328722000122, + "learning_rate": 9.899497487437186e-05, + "loss": 2.7545, + "step": 3 + }, + { + "epoch": 0.014072119613016711, + "grad_norm": 1.4892698526382446, + "learning_rate": 9.84924623115578e-05, + "loss": 2.6703, + "step": 4 + }, + { + "epoch": 0.01759014951627089, + "grad_norm": 1.4727792739868164, + "learning_rate": 9.798994974874372e-05, + "loss": 2.4731, + "step": 5 + }, + { + "epoch": 0.021108179419525065, + "grad_norm": 1.4451979398727417, + "learning_rate": 9.748743718592965e-05, + "loss": 2.2243, + "step": 6 + }, + { + "epoch": 0.024626209322779244, + "grad_norm": 1.3103245496749878, + "learning_rate": 9.698492462311559e-05, + "loss": 2.0194, + "step": 7 + }, + { + "epoch": 0.028144239226033423, + "grad_norm": 1.4852089881896973, + "learning_rate": 9.64824120603015e-05, + "loss": 1.9349, + "step": 8 + }, + { + "epoch": 0.0316622691292876, + "grad_norm": 1.5170249938964844, + "learning_rate": 9.597989949748745e-05, + "loss": 1.7582, + "step": 9 + }, + { + "epoch": 0.03518029903254178, + "grad_norm": 1.3428442478179932, + "learning_rate": 9.547738693467337e-05, + "loss": 1.6313, + "step": 10 + }, + { + "epoch": 0.03869832893579595, + "grad_norm": 1.0400348901748657, + "learning_rate": 9.49748743718593e-05, + "loss": 1.4358, + "step": 11 + }, + { + "epoch": 0.04221635883905013, + "grad_norm": 0.9891974329948425, + "learning_rate": 9.447236180904523e-05, + "loss": 1.3738, + "step": 12 + }, + { + "epoch": 0.04573438874230431, + "grad_norm": 0.6980912685394287, + "learning_rate": 9.396984924623115e-05, + "loss": 1.425, + "step": 13 + }, + { + "epoch": 0.04925241864555849, + "grad_norm": 0.6836680769920349, + "learning_rate": 9.34673366834171e-05, + "loss": 1.4575, + "step": 14 + }, + { + "epoch": 0.052770448548812667, + "grad_norm": 0.9314870238304138, + "learning_rate": 9.296482412060302e-05, + "loss": 1.3206, + "step": 15 + }, + { + "epoch": 0.056288478452066845, + "grad_norm": 0.6797922253608704, + "learning_rate": 9.246231155778895e-05, + "loss": 1.3724, + "step": 16 + }, + { + "epoch": 0.05980650835532102, + "grad_norm": 0.6958814263343811, + "learning_rate": 9.195979899497488e-05, + "loss": 1.3661, + "step": 17 + }, + { + "epoch": 0.0633245382585752, + "grad_norm": 0.7188398241996765, + "learning_rate": 9.14572864321608e-05, + "loss": 1.3242, + "step": 18 + }, + { + "epoch": 0.06684256816182937, + "grad_norm": 0.8997742533683777, + "learning_rate": 9.095477386934675e-05, + "loss": 1.4049, + "step": 19 + }, + { + "epoch": 0.07036059806508356, + "grad_norm": 0.8283623456954956, + "learning_rate": 9.045226130653267e-05, + "loss": 1.3121, + "step": 20 + }, + { + "epoch": 0.07387862796833773, + "grad_norm": 0.8064684867858887, + "learning_rate": 8.99497487437186e-05, + "loss": 1.3451, + "step": 21 + }, + { + "epoch": 0.0773966578715919, + "grad_norm": 0.8180544972419739, + "learning_rate": 8.944723618090453e-05, + "loss": 1.2111, + "step": 22 + }, + { + "epoch": 0.08091468777484609, + "grad_norm": 0.8000004887580872, + "learning_rate": 8.894472361809045e-05, + "loss": 1.2933, + "step": 23 + }, + { + "epoch": 0.08443271767810026, + "grad_norm": 0.8804137706756592, + "learning_rate": 8.84422110552764e-05, + "loss": 1.3173, + "step": 24 + }, + { + "epoch": 0.08795074758135445, + "grad_norm": 0.8556327819824219, + "learning_rate": 8.793969849246232e-05, + "loss": 1.321, + "step": 25 + }, + { + "epoch": 0.09146877748460862, + "grad_norm": 0.827410876750946, + "learning_rate": 8.743718592964825e-05, + "loss": 1.2195, + "step": 26 + }, + { + "epoch": 0.09498680738786279, + "grad_norm": 0.9081262946128845, + "learning_rate": 8.693467336683418e-05, + "loss": 1.2451, + "step": 27 + }, + { + "epoch": 0.09850483729111698, + "grad_norm": 0.9331269860267639, + "learning_rate": 8.64321608040201e-05, + "loss": 1.2204, + "step": 28 + }, + { + "epoch": 0.10202286719437115, + "grad_norm": 1.0290558338165283, + "learning_rate": 8.592964824120603e-05, + "loss": 1.2379, + "step": 29 + }, + { + "epoch": 0.10554089709762533, + "grad_norm": 1.1296031475067139, + "learning_rate": 8.542713567839196e-05, + "loss": 1.2412, + "step": 30 + }, + { + "epoch": 0.1090589270008795, + "grad_norm": 1.1690081357955933, + "learning_rate": 8.49246231155779e-05, + "loss": 1.1888, + "step": 31 + }, + { + "epoch": 0.11257695690413369, + "grad_norm": 1.1313647031784058, + "learning_rate": 8.442211055276383e-05, + "loss": 1.2961, + "step": 32 + }, + { + "epoch": 0.11609498680738786, + "grad_norm": 1.1976656913757324, + "learning_rate": 8.391959798994975e-05, + "loss": 1.2387, + "step": 33 + }, + { + "epoch": 0.11961301671064203, + "grad_norm": 1.20232355594635, + "learning_rate": 8.341708542713568e-05, + "loss": 1.3125, + "step": 34 + }, + { + "epoch": 0.12313104661389622, + "grad_norm": 1.2482579946517944, + "learning_rate": 8.291457286432161e-05, + "loss": 1.322, + "step": 35 + }, + { + "epoch": 0.1266490765171504, + "grad_norm": 1.0197736024856567, + "learning_rate": 8.241206030150754e-05, + "loss": 1.1192, + "step": 36 + }, + { + "epoch": 0.13016710642040458, + "grad_norm": 0.9190375208854675, + "learning_rate": 8.190954773869348e-05, + "loss": 1.2522, + "step": 37 + }, + { + "epoch": 0.13368513632365875, + "grad_norm": 0.7511453032493591, + "learning_rate": 8.14070351758794e-05, + "loss": 1.0525, + "step": 38 + }, + { + "epoch": 0.13720316622691292, + "grad_norm": 0.7151877880096436, + "learning_rate": 8.090452261306533e-05, + "loss": 1.1839, + "step": 39 + }, + { + "epoch": 0.14072119613016712, + "grad_norm": 0.6375951766967773, + "learning_rate": 8.040201005025126e-05, + "loss": 1.2203, + "step": 40 + }, + { + "epoch": 0.1442392260334213, + "grad_norm": 0.6267354488372803, + "learning_rate": 7.989949748743719e-05, + "loss": 1.1996, + "step": 41 + }, + { + "epoch": 0.14775725593667546, + "grad_norm": 0.5620112419128418, + "learning_rate": 7.939698492462313e-05, + "loss": 1.1745, + "step": 42 + }, + { + "epoch": 0.15127528583992964, + "grad_norm": 0.6898969411849976, + "learning_rate": 7.889447236180904e-05, + "loss": 1.2377, + "step": 43 + }, + { + "epoch": 0.1547933157431838, + "grad_norm": 0.5548388957977295, + "learning_rate": 7.839195979899498e-05, + "loss": 1.1654, + "step": 44 + }, + { + "epoch": 0.158311345646438, + "grad_norm": 0.5869529843330383, + "learning_rate": 7.788944723618091e-05, + "loss": 1.1669, + "step": 45 + }, + { + "epoch": 0.16182937554969218, + "grad_norm": 0.6272417902946472, + "learning_rate": 7.738693467336684e-05, + "loss": 1.132, + "step": 46 + }, + { + "epoch": 0.16534740545294635, + "grad_norm": 0.6158267855644226, + "learning_rate": 7.688442211055277e-05, + "loss": 1.0767, + "step": 47 + }, + { + "epoch": 0.16886543535620052, + "grad_norm": 0.661561906337738, + "learning_rate": 7.638190954773869e-05, + "loss": 1.1867, + "step": 48 + }, + { + "epoch": 0.1723834652594547, + "grad_norm": 0.5605206489562988, + "learning_rate": 7.587939698492463e-05, + "loss": 1.1243, + "step": 49 + }, + { + "epoch": 0.1759014951627089, + "grad_norm": 0.6338799595832825, + "learning_rate": 7.537688442211056e-05, + "loss": 1.1635, + "step": 50 + }, + { + "epoch": 0.17941952506596306, + "grad_norm": 0.7251884937286377, + "learning_rate": 7.487437185929649e-05, + "loss": 1.1462, + "step": 51 + }, + { + "epoch": 0.18293755496921724, + "grad_norm": 0.5688169598579407, + "learning_rate": 7.437185929648241e-05, + "loss": 1.1351, + "step": 52 + }, + { + "epoch": 0.1864555848724714, + "grad_norm": 0.6056070923805237, + "learning_rate": 7.386934673366834e-05, + "loss": 1.1352, + "step": 53 + }, + { + "epoch": 0.18997361477572558, + "grad_norm": 0.8283679485321045, + "learning_rate": 7.336683417085427e-05, + "loss": 1.2222, + "step": 54 + }, + { + "epoch": 0.19349164467897978, + "grad_norm": 0.6316900253295898, + "learning_rate": 7.28643216080402e-05, + "loss": 1.2023, + "step": 55 + }, + { + "epoch": 0.19700967458223395, + "grad_norm": 0.6092143058776855, + "learning_rate": 7.236180904522614e-05, + "loss": 1.0762, + "step": 56 + }, + { + "epoch": 0.20052770448548812, + "grad_norm": 0.5600019097328186, + "learning_rate": 7.185929648241206e-05, + "loss": 1.0127, + "step": 57 + }, + { + "epoch": 0.2040457343887423, + "grad_norm": 0.6157863736152649, + "learning_rate": 7.135678391959799e-05, + "loss": 1.1016, + "step": 58 + }, + { + "epoch": 0.2075637642919965, + "grad_norm": 0.6391822099685669, + "learning_rate": 7.085427135678392e-05, + "loss": 1.2009, + "step": 59 + }, + { + "epoch": 0.21108179419525067, + "grad_norm": 0.5637600421905518, + "learning_rate": 7.035175879396985e-05, + "loss": 1.1419, + "step": 60 + }, + { + "epoch": 0.21459982409850484, + "grad_norm": 0.6826542019844055, + "learning_rate": 6.984924623115579e-05, + "loss": 1.1084, + "step": 61 + }, + { + "epoch": 0.218117854001759, + "grad_norm": 0.6475107073783875, + "learning_rate": 6.93467336683417e-05, + "loss": 1.2033, + "step": 62 + }, + { + "epoch": 0.22163588390501318, + "grad_norm": 0.5701493620872498, + "learning_rate": 6.884422110552764e-05, + "loss": 1.1425, + "step": 63 + }, + { + "epoch": 0.22515391380826738, + "grad_norm": 0.5416231155395508, + "learning_rate": 6.834170854271357e-05, + "loss": 1.0869, + "step": 64 + }, + { + "epoch": 0.22867194371152155, + "grad_norm": 0.611254870891571, + "learning_rate": 6.78391959798995e-05, + "loss": 1.1344, + "step": 65 + }, + { + "epoch": 0.23218997361477572, + "grad_norm": 0.5644116401672363, + "learning_rate": 6.733668341708544e-05, + "loss": 1.0655, + "step": 66 + }, + { + "epoch": 0.2357080035180299, + "grad_norm": 0.5953249931335449, + "learning_rate": 6.683417085427135e-05, + "loss": 1.1267, + "step": 67 + }, + { + "epoch": 0.23922603342128407, + "grad_norm": 0.5902895331382751, + "learning_rate": 6.633165829145729e-05, + "loss": 1.1207, + "step": 68 + }, + { + "epoch": 0.24274406332453827, + "grad_norm": 0.571882426738739, + "learning_rate": 6.582914572864322e-05, + "loss": 1.0945, + "step": 69 + }, + { + "epoch": 0.24626209322779244, + "grad_norm": 0.6372458934783936, + "learning_rate": 6.532663316582915e-05, + "loss": 1.1933, + "step": 70 + }, + { + "epoch": 0.2497801231310466, + "grad_norm": 0.6739147901535034, + "learning_rate": 6.482412060301508e-05, + "loss": 1.1202, + "step": 71 + }, + { + "epoch": 0.2532981530343008, + "grad_norm": 0.6515147686004639, + "learning_rate": 6.4321608040201e-05, + "loss": 1.1685, + "step": 72 + }, + { + "epoch": 0.256816182937555, + "grad_norm": 0.5706716775894165, + "learning_rate": 6.381909547738694e-05, + "loss": 1.1084, + "step": 73 + }, + { + "epoch": 0.26033421284080915, + "grad_norm": 0.595585286617279, + "learning_rate": 6.331658291457287e-05, + "loss": 1.1218, + "step": 74 + }, + { + "epoch": 0.2638522427440633, + "grad_norm": 0.6020475625991821, + "learning_rate": 6.28140703517588e-05, + "loss": 1.1282, + "step": 75 + }, + { + "epoch": 0.2673702726473175, + "grad_norm": 0.628376305103302, + "learning_rate": 6.231155778894473e-05, + "loss": 1.1067, + "step": 76 + }, + { + "epoch": 0.27088830255057167, + "grad_norm": 0.6371076107025146, + "learning_rate": 6.180904522613065e-05, + "loss": 1.1466, + "step": 77 + }, + { + "epoch": 0.27440633245382584, + "grad_norm": 0.6206318140029907, + "learning_rate": 6.130653266331658e-05, + "loss": 1.0801, + "step": 78 + }, + { + "epoch": 0.27792436235708, + "grad_norm": 0.6293841600418091, + "learning_rate": 6.080402010050251e-05, + "loss": 1.1644, + "step": 79 + }, + { + "epoch": 0.28144239226033424, + "grad_norm": 0.6434080600738525, + "learning_rate": 6.030150753768844e-05, + "loss": 1.0589, + "step": 80 + }, + { + "epoch": 0.2849604221635884, + "grad_norm": 0.5857638120651245, + "learning_rate": 5.979899497487438e-05, + "loss": 1.1711, + "step": 81 + }, + { + "epoch": 0.2884784520668426, + "grad_norm": 0.6163449883460999, + "learning_rate": 5.929648241206031e-05, + "loss": 1.1627, + "step": 82 + }, + { + "epoch": 0.29199648197009676, + "grad_norm": 0.6543634533882141, + "learning_rate": 5.879396984924623e-05, + "loss": 1.0909, + "step": 83 + }, + { + "epoch": 0.2955145118733509, + "grad_norm": 0.6609559059143066, + "learning_rate": 5.829145728643216e-05, + "loss": 1.1505, + "step": 84 + }, + { + "epoch": 0.2990325417766051, + "grad_norm": 0.5798302292823792, + "learning_rate": 5.778894472361809e-05, + "loss": 1.0834, + "step": 85 + }, + { + "epoch": 0.30255057167985927, + "grad_norm": 0.6974066495895386, + "learning_rate": 5.728643216080403e-05, + "loss": 1.0965, + "step": 86 + }, + { + "epoch": 0.30606860158311344, + "grad_norm": 0.67149817943573, + "learning_rate": 5.6783919597989955e-05, + "loss": 1.09, + "step": 87 + }, + { + "epoch": 0.3095866314863676, + "grad_norm": 0.5761735439300537, + "learning_rate": 5.628140703517588e-05, + "loss": 1.1436, + "step": 88 + }, + { + "epoch": 0.3131046613896218, + "grad_norm": 0.6142584681510925, + "learning_rate": 5.577889447236181e-05, + "loss": 1.0489, + "step": 89 + }, + { + "epoch": 0.316622691292876, + "grad_norm": 0.6407614946365356, + "learning_rate": 5.527638190954774e-05, + "loss": 1.1449, + "step": 90 + }, + { + "epoch": 0.3201407211961302, + "grad_norm": 0.6835021376609802, + "learning_rate": 5.477386934673368e-05, + "loss": 1.1332, + "step": 91 + }, + { + "epoch": 0.32365875109938436, + "grad_norm": 0.5755856037139893, + "learning_rate": 5.4271356783919604e-05, + "loss": 1.1195, + "step": 92 + }, + { + "epoch": 0.32717678100263853, + "grad_norm": 0.6232398748397827, + "learning_rate": 5.376884422110553e-05, + "loss": 1.1696, + "step": 93 + }, + { + "epoch": 0.3306948109058927, + "grad_norm": 0.6193405389785767, + "learning_rate": 5.3266331658291455e-05, + "loss": 1.1106, + "step": 94 + }, + { + "epoch": 0.33421284080914687, + "grad_norm": 0.6834057569503784, + "learning_rate": 5.276381909547739e-05, + "loss": 1.1349, + "step": 95 + }, + { + "epoch": 0.33773087071240104, + "grad_norm": 0.7168384790420532, + "learning_rate": 5.226130653266332e-05, + "loss": 1.2054, + "step": 96 + }, + { + "epoch": 0.3412489006156552, + "grad_norm": 0.6553971767425537, + "learning_rate": 5.175879396984925e-05, + "loss": 1.0975, + "step": 97 + }, + { + "epoch": 0.3447669305189094, + "grad_norm": 0.6329600811004639, + "learning_rate": 5.125628140703518e-05, + "loss": 1.1212, + "step": 98 + }, + { + "epoch": 0.3482849604221636, + "grad_norm": 0.6656339764595032, + "learning_rate": 5.0753768844221104e-05, + "loss": 1.1451, + "step": 99 + }, + { + "epoch": 0.3518029903254178, + "grad_norm": 0.6817747950553894, + "learning_rate": 5.0251256281407036e-05, + "loss": 1.084, + "step": 100 + }, + { + "epoch": 0.35532102022867196, + "grad_norm": 0.6384849548339844, + "learning_rate": 4.974874371859297e-05, + "loss": 1.047, + "step": 101 + }, + { + "epoch": 0.35883905013192613, + "grad_norm": 0.6342082023620605, + "learning_rate": 4.92462311557789e-05, + "loss": 1.1122, + "step": 102 + }, + { + "epoch": 0.3623570800351803, + "grad_norm": 0.6114000082015991, + "learning_rate": 4.874371859296483e-05, + "loss": 1.1094, + "step": 103 + }, + { + "epoch": 0.3658751099384345, + "grad_norm": 0.6310352683067322, + "learning_rate": 4.824120603015075e-05, + "loss": 1.1508, + "step": 104 + }, + { + "epoch": 0.36939313984168864, + "grad_norm": 0.6773234605789185, + "learning_rate": 4.7738693467336685e-05, + "loss": 1.0511, + "step": 105 + }, + { + "epoch": 0.3729111697449428, + "grad_norm": 0.6625077724456787, + "learning_rate": 4.723618090452262e-05, + "loss": 1.1422, + "step": 106 + }, + { + "epoch": 0.376429199648197, + "grad_norm": 0.6125949025154114, + "learning_rate": 4.673366834170855e-05, + "loss": 1.1189, + "step": 107 + }, + { + "epoch": 0.37994722955145116, + "grad_norm": 0.684280514717102, + "learning_rate": 4.6231155778894475e-05, + "loss": 1.2249, + "step": 108 + }, + { + "epoch": 0.3834652594547054, + "grad_norm": 0.8305927515029907, + "learning_rate": 4.57286432160804e-05, + "loss": 1.1758, + "step": 109 + }, + { + "epoch": 0.38698328935795956, + "grad_norm": 0.6081312894821167, + "learning_rate": 4.522613065326633e-05, + "loss": 1.0853, + "step": 110 + }, + { + "epoch": 0.39050131926121373, + "grad_norm": 0.716929018497467, + "learning_rate": 4.4723618090452266e-05, + "loss": 1.1903, + "step": 111 + }, + { + "epoch": 0.3940193491644679, + "grad_norm": 0.5968315005302429, + "learning_rate": 4.42211055276382e-05, + "loss": 1.0717, + "step": 112 + }, + { + "epoch": 0.3975373790677221, + "grad_norm": 0.6502510905265808, + "learning_rate": 4.3718592964824124e-05, + "loss": 1.0629, + "step": 113 + }, + { + "epoch": 0.40105540897097625, + "grad_norm": 0.6408775448799133, + "learning_rate": 4.321608040201005e-05, + "loss": 1.0937, + "step": 114 + }, + { + "epoch": 0.4045734388742304, + "grad_norm": 0.6137213110923767, + "learning_rate": 4.271356783919598e-05, + "loss": 1.0853, + "step": 115 + }, + { + "epoch": 0.4080914687774846, + "grad_norm": 0.6401947736740112, + "learning_rate": 4.2211055276381914e-05, + "loss": 1.1542, + "step": 116 + }, + { + "epoch": 0.41160949868073876, + "grad_norm": 0.6332412362098694, + "learning_rate": 4.170854271356784e-05, + "loss": 1.0731, + "step": 117 + }, + { + "epoch": 0.415127528583993, + "grad_norm": 0.6274076700210571, + "learning_rate": 4.120603015075377e-05, + "loss": 1.0707, + "step": 118 + }, + { + "epoch": 0.41864555848724716, + "grad_norm": 0.632633626461029, + "learning_rate": 4.07035175879397e-05, + "loss": 1.108, + "step": 119 + }, + { + "epoch": 0.42216358839050133, + "grad_norm": 0.6979479193687439, + "learning_rate": 4.020100502512563e-05, + "loss": 1.1483, + "step": 120 + }, + { + "epoch": 0.4256816182937555, + "grad_norm": 0.7355033755302429, + "learning_rate": 3.969849246231156e-05, + "loss": 1.1358, + "step": 121 + }, + { + "epoch": 0.4291996481970097, + "grad_norm": 0.6254828572273254, + "learning_rate": 3.919597989949749e-05, + "loss": 1.1753, + "step": 122 + }, + { + "epoch": 0.43271767810026385, + "grad_norm": 0.6851824522018433, + "learning_rate": 3.869346733668342e-05, + "loss": 1.0128, + "step": 123 + }, + { + "epoch": 0.436235708003518, + "grad_norm": 0.6097928285598755, + "learning_rate": 3.8190954773869346e-05, + "loss": 1.1235, + "step": 124 + }, + { + "epoch": 0.4397537379067722, + "grad_norm": 0.6748325824737549, + "learning_rate": 3.768844221105528e-05, + "loss": 1.0452, + "step": 125 + }, + { + "epoch": 0.44327176781002636, + "grad_norm": 0.6666128039360046, + "learning_rate": 3.7185929648241204e-05, + "loss": 1.1075, + "step": 126 + }, + { + "epoch": 0.4467897977132806, + "grad_norm": 0.7474984526634216, + "learning_rate": 3.668341708542714e-05, + "loss": 1.0695, + "step": 127 + }, + { + "epoch": 0.45030782761653476, + "grad_norm": 0.6925339698791504, + "learning_rate": 3.618090452261307e-05, + "loss": 1.1024, + "step": 128 + }, + { + "epoch": 0.45382585751978893, + "grad_norm": 0.6140123009681702, + "learning_rate": 3.5678391959798995e-05, + "loss": 1.0788, + "step": 129 + }, + { + "epoch": 0.4573438874230431, + "grad_norm": 0.6771907806396484, + "learning_rate": 3.517587939698493e-05, + "loss": 1.0913, + "step": 130 + }, + { + "epoch": 0.4608619173262973, + "grad_norm": 0.6700430512428284, + "learning_rate": 3.467336683417085e-05, + "loss": 1.0566, + "step": 131 + }, + { + "epoch": 0.46437994722955145, + "grad_norm": 0.6931480169296265, + "learning_rate": 3.4170854271356785e-05, + "loss": 1.059, + "step": 132 + }, + { + "epoch": 0.4678979771328056, + "grad_norm": 0.6608771085739136, + "learning_rate": 3.366834170854272e-05, + "loss": 1.119, + "step": 133 + }, + { + "epoch": 0.4714160070360598, + "grad_norm": 0.6470663547515869, + "learning_rate": 3.3165829145728643e-05, + "loss": 1.0662, + "step": 134 + }, + { + "epoch": 0.47493403693931396, + "grad_norm": 0.5729122757911682, + "learning_rate": 3.2663316582914576e-05, + "loss": 0.9999, + "step": 135 + }, + { + "epoch": 0.47845206684256814, + "grad_norm": 0.6993862390518188, + "learning_rate": 3.21608040201005e-05, + "loss": 1.1819, + "step": 136 + }, + { + "epoch": 0.48197009674582236, + "grad_norm": 0.6929494738578796, + "learning_rate": 3.1658291457286434e-05, + "loss": 1.1719, + "step": 137 + }, + { + "epoch": 0.48548812664907653, + "grad_norm": 0.6951282620429993, + "learning_rate": 3.1155778894472366e-05, + "loss": 1.0716, + "step": 138 + }, + { + "epoch": 0.4890061565523307, + "grad_norm": 0.6766693592071533, + "learning_rate": 3.065326633165829e-05, + "loss": 1.1589, + "step": 139 + }, + { + "epoch": 0.4925241864555849, + "grad_norm": 0.6500269174575806, + "learning_rate": 3.015075376884422e-05, + "loss": 1.1122, + "step": 140 + }, + { + "epoch": 0.49604221635883905, + "grad_norm": 0.7741857171058655, + "learning_rate": 2.9648241206030153e-05, + "loss": 1.1594, + "step": 141 + }, + { + "epoch": 0.4995602462620932, + "grad_norm": 0.6630749106407166, + "learning_rate": 2.914572864321608e-05, + "loss": 1.0615, + "step": 142 + }, + { + "epoch": 0.5030782761653474, + "grad_norm": 0.7230671048164368, + "learning_rate": 2.8643216080402015e-05, + "loss": 1.1521, + "step": 143 + }, + { + "epoch": 0.5065963060686016, + "grad_norm": 0.6624138355255127, + "learning_rate": 2.814070351758794e-05, + "loss": 1.0347, + "step": 144 + }, + { + "epoch": 0.5101143359718557, + "grad_norm": 0.6560067534446716, + "learning_rate": 2.763819095477387e-05, + "loss": 1.1214, + "step": 145 + }, + { + "epoch": 0.51363236587511, + "grad_norm": 0.6742956638336182, + "learning_rate": 2.7135678391959802e-05, + "loss": 1.0956, + "step": 146 + }, + { + "epoch": 0.5171503957783641, + "grad_norm": 0.706284761428833, + "learning_rate": 2.6633165829145728e-05, + "loss": 1.1058, + "step": 147 + }, + { + "epoch": 0.5206684256816183, + "grad_norm": 0.6924006938934326, + "learning_rate": 2.613065326633166e-05, + "loss": 1.186, + "step": 148 + }, + { + "epoch": 0.5241864555848724, + "grad_norm": 0.6287305951118469, + "learning_rate": 2.562814070351759e-05, + "loss": 1.0422, + "step": 149 + }, + { + "epoch": 0.5277044854881267, + "grad_norm": 0.6957104206085205, + "learning_rate": 2.5125628140703518e-05, + "loss": 1.0896, + "step": 150 + }, + { + "epoch": 0.5312225153913809, + "grad_norm": 0.7039506435394287, + "learning_rate": 2.462311557788945e-05, + "loss": 1.0818, + "step": 151 + }, + { + "epoch": 0.534740545294635, + "grad_norm": 0.6502148509025574, + "learning_rate": 2.4120603015075376e-05, + "loss": 1.112, + "step": 152 + }, + { + "epoch": 0.5382585751978892, + "grad_norm": 0.6823992133140564, + "learning_rate": 2.361809045226131e-05, + "loss": 1.0298, + "step": 153 + }, + { + "epoch": 0.5417766051011433, + "grad_norm": 0.7539629936218262, + "learning_rate": 2.3115577889447238e-05, + "loss": 1.0618, + "step": 154 + }, + { + "epoch": 0.5452946350043976, + "grad_norm": 0.6974697113037109, + "learning_rate": 2.2613065326633167e-05, + "loss": 1.1702, + "step": 155 + }, + { + "epoch": 0.5488126649076517, + "grad_norm": 0.7035180330276489, + "learning_rate": 2.21105527638191e-05, + "loss": 1.0714, + "step": 156 + }, + { + "epoch": 0.5523306948109059, + "grad_norm": 0.9007865786552429, + "learning_rate": 2.1608040201005025e-05, + "loss": 1.0565, + "step": 157 + }, + { + "epoch": 0.55584872471416, + "grad_norm": 0.7083996534347534, + "learning_rate": 2.1105527638190957e-05, + "loss": 1.1425, + "step": 158 + }, + { + "epoch": 0.5593667546174143, + "grad_norm": 0.7241733074188232, + "learning_rate": 2.0603015075376886e-05, + "loss": 1.1211, + "step": 159 + }, + { + "epoch": 0.5628847845206685, + "grad_norm": 0.7474963068962097, + "learning_rate": 2.0100502512562815e-05, + "loss": 1.0546, + "step": 160 + }, + { + "epoch": 0.5664028144239226, + "grad_norm": 0.7051181793212891, + "learning_rate": 1.9597989949748744e-05, + "loss": 0.9878, + "step": 161 + }, + { + "epoch": 0.5699208443271768, + "grad_norm": 0.7359694242477417, + "learning_rate": 1.9095477386934673e-05, + "loss": 1.1283, + "step": 162 + }, + { + "epoch": 0.5734388742304309, + "grad_norm": 0.6908060908317566, + "learning_rate": 1.8592964824120602e-05, + "loss": 1.1287, + "step": 163 + }, + { + "epoch": 0.5769569041336852, + "grad_norm": 0.7220682501792908, + "learning_rate": 1.8090452261306535e-05, + "loss": 1.0424, + "step": 164 + }, + { + "epoch": 0.5804749340369393, + "grad_norm": 0.7415404319763184, + "learning_rate": 1.7587939698492464e-05, + "loss": 1.0749, + "step": 165 + }, + { + "epoch": 0.5839929639401935, + "grad_norm": 0.7168678641319275, + "learning_rate": 1.7085427135678393e-05, + "loss": 1.1308, + "step": 166 + }, + { + "epoch": 0.5875109938434476, + "grad_norm": 0.653301477432251, + "learning_rate": 1.6582914572864322e-05, + "loss": 1.0777, + "step": 167 + }, + { + "epoch": 0.5910290237467019, + "grad_norm": 0.7567819952964783, + "learning_rate": 1.608040201005025e-05, + "loss": 1.1476, + "step": 168 + }, + { + "epoch": 0.594547053649956, + "grad_norm": 0.7353144288063049, + "learning_rate": 1.5577889447236183e-05, + "loss": 1.0961, + "step": 169 + }, + { + "epoch": 0.5980650835532102, + "grad_norm": 0.6990388035774231, + "learning_rate": 1.507537688442211e-05, + "loss": 1.1619, + "step": 170 + }, + { + "epoch": 0.6015831134564644, + "grad_norm": 0.7032533288002014, + "learning_rate": 1.457286432160804e-05, + "loss": 1.0619, + "step": 171 + }, + { + "epoch": 0.6051011433597185, + "grad_norm": 0.6197975873947144, + "learning_rate": 1.407035175879397e-05, + "loss": 1.0953, + "step": 172 + }, + { + "epoch": 0.6086191732629728, + "grad_norm": 0.746258556842804, + "learning_rate": 1.3567839195979901e-05, + "loss": 1.1201, + "step": 173 + }, + { + "epoch": 0.6121372031662269, + "grad_norm": 0.6444905996322632, + "learning_rate": 1.306532663316583e-05, + "loss": 1.0241, + "step": 174 + }, + { + "epoch": 0.6156552330694811, + "grad_norm": 0.7037890553474426, + "learning_rate": 1.2562814070351759e-05, + "loss": 1.0739, + "step": 175 + }, + { + "epoch": 0.6191732629727352, + "grad_norm": 0.7138697504997253, + "learning_rate": 1.2060301507537688e-05, + "loss": 1.1102, + "step": 176 + }, + { + "epoch": 0.6226912928759895, + "grad_norm": 0.7358911037445068, + "learning_rate": 1.1557788944723619e-05, + "loss": 1.1945, + "step": 177 + }, + { + "epoch": 0.6262093227792436, + "grad_norm": 0.7306352853775024, + "learning_rate": 1.105527638190955e-05, + "loss": 1.0887, + "step": 178 + }, + { + "epoch": 0.6297273526824978, + "grad_norm": 0.7626399993896484, + "learning_rate": 1.0552763819095479e-05, + "loss": 1.0918, + "step": 179 + }, + { + "epoch": 0.633245382585752, + "grad_norm": 0.7157562375068665, + "learning_rate": 1.0050251256281408e-05, + "loss": 1.0794, + "step": 180 + }, + { + "epoch": 0.6367634124890061, + "grad_norm": 0.674655556678772, + "learning_rate": 9.547738693467337e-06, + "loss": 1.1632, + "step": 181 + }, + { + "epoch": 0.6402814423922604, + "grad_norm": 0.7276845574378967, + "learning_rate": 9.045226130653267e-06, + "loss": 1.0664, + "step": 182 + }, + { + "epoch": 0.6437994722955145, + "grad_norm": 0.7614260315895081, + "learning_rate": 8.542713567839196e-06, + "loss": 1.1185, + "step": 183 + }, + { + "epoch": 0.6473175021987687, + "grad_norm": 0.691209614276886, + "learning_rate": 8.040201005025125e-06, + "loss": 1.0648, + "step": 184 + }, + { + "epoch": 0.6508355321020228, + "grad_norm": 0.6736161708831787, + "learning_rate": 7.537688442211055e-06, + "loss": 1.11, + "step": 185 + }, + { + "epoch": 0.6543535620052771, + "grad_norm": 0.6875973343849182, + "learning_rate": 7.035175879396985e-06, + "loss": 1.1085, + "step": 186 + }, + { + "epoch": 0.6578715919085312, + "grad_norm": 0.6715053915977478, + "learning_rate": 6.532663316582915e-06, + "loss": 1.1391, + "step": 187 + }, + { + "epoch": 0.6613896218117854, + "grad_norm": 0.7241913080215454, + "learning_rate": 6.030150753768844e-06, + "loss": 1.193, + "step": 188 + }, + { + "epoch": 0.6649076517150396, + "grad_norm": 0.722939133644104, + "learning_rate": 5.527638190954775e-06, + "loss": 1.1218, + "step": 189 + }, + { + "epoch": 0.6684256816182937, + "grad_norm": 0.7348630428314209, + "learning_rate": 5.025125628140704e-06, + "loss": 1.0771, + "step": 190 + }, + { + "epoch": 0.671943711521548, + "grad_norm": 0.72852623462677, + "learning_rate": 4.522613065326634e-06, + "loss": 1.1196, + "step": 191 + }, + { + "epoch": 0.6754617414248021, + "grad_norm": 0.7617117762565613, + "learning_rate": 4.020100502512563e-06, + "loss": 1.1313, + "step": 192 + }, + { + "epoch": 0.6789797713280563, + "grad_norm": 0.8029654622077942, + "learning_rate": 3.5175879396984926e-06, + "loss": 1.1405, + "step": 193 + }, + { + "epoch": 0.6824978012313104, + "grad_norm": 0.6885625123977661, + "learning_rate": 3.015075376884422e-06, + "loss": 1.0565, + "step": 194 + }, + { + "epoch": 0.6860158311345647, + "grad_norm": 0.7057883143424988, + "learning_rate": 2.512562814070352e-06, + "loss": 1.1625, + "step": 195 + }, + { + "epoch": 0.6895338610378188, + "grad_norm": 0.7429342269897461, + "learning_rate": 2.0100502512562813e-06, + "loss": 1.044, + "step": 196 + }, + { + "epoch": 0.693051890941073, + "grad_norm": 0.7036694884300232, + "learning_rate": 1.507537688442211e-06, + "loss": 1.0991, + "step": 197 + }, + { + "epoch": 0.6965699208443272, + "grad_norm": 0.6950182318687439, + "learning_rate": 1.0050251256281407e-06, + "loss": 1.1014, + "step": 198 + }, + { + "epoch": 0.7000879507475813, + "grad_norm": 0.7009806632995605, + "learning_rate": 5.025125628140703e-07, + "loss": 1.1108, + "step": 199 + }, + { + "epoch": 0.7036059806508356, + "grad_norm": 0.6382765769958496, + "learning_rate": 0.0, + "loss": 1.0479, + "step": 200 + } + ], + "logging_steps": 1, + "max_steps": 200, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.5191482454605824e+16, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/llama_Vietnamese_English/checkpoint-200/training_args.bin b/llama_Vietnamese_English/checkpoint-200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..8d177d2a025123f79bd57e5fd10884a0217f7844 --- /dev/null +++ b/llama_Vietnamese_English/checkpoint-200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1db0e684ae9b89568150793cd71fc77d55d5755640494e4814ebc866d7c4cbce +size 5624 diff --git a/llama_Vietnamese_French/checkpoint-200/README.md b/llama_Vietnamese_French/checkpoint-200/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ec91fdd050e2a5613ef04bb6c8fc77a63b8b10a1 --- /dev/null +++ b/llama_Vietnamese_French/checkpoint-200/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.14.0 \ No newline at end of file diff --git a/llama_Vietnamese_French/checkpoint-200/adapter_config.json b/llama_Vietnamese_French/checkpoint-200/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1a9278e9bb969536072b10c0e4aff55a617e5d9c --- /dev/null +++ b/llama_Vietnamese_French/checkpoint-200/adapter_config.json @@ -0,0 +1,37 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit", + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "gate_proj", + "q_proj", + "down_proj", + "up_proj", + "o_proj", + "k_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/llama_Vietnamese_French/checkpoint-200/adapter_model.safetensors b/llama_Vietnamese_French/checkpoint-200/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..13984b765275dafc211d13ac22f59276508538ee --- /dev/null +++ b/llama_Vietnamese_French/checkpoint-200/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba78cc593dcc77e30dbc2f13049d6d1145641a8a341c1edcafa6a16572711c87 +size 167832240 diff --git a/llama_Vietnamese_French/checkpoint-200/optimizer.pt b/llama_Vietnamese_French/checkpoint-200/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c43fb4c4d2b1b6b99a7134aa5d3cdb985fffbc3d --- /dev/null +++ b/llama_Vietnamese_French/checkpoint-200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c419c00c0d08518590f3b53badfea11d2f5e7f3294a3855b780d60446f671366 +size 85723284 diff --git a/llama_Vietnamese_French/checkpoint-200/rng_state.pth b/llama_Vietnamese_French/checkpoint-200/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1f8291cd6ce87668b786a72f3e93d072fbe54902 --- /dev/null +++ b/llama_Vietnamese_French/checkpoint-200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4c917636c7a58af68a29056522a757e9f9b99005b776641aa157c536967817d +size 14244 diff --git a/llama_Vietnamese_French/checkpoint-200/scheduler.pt b/llama_Vietnamese_French/checkpoint-200/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6ad11d6d5288d2841ba96a6d7d3838c80c80097a --- /dev/null +++ b/llama_Vietnamese_French/checkpoint-200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96 +size 1064 diff --git a/llama_Vietnamese_French/checkpoint-200/special_tokens_map.json b/llama_Vietnamese_French/checkpoint-200/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/llama_Vietnamese_French/checkpoint-200/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/llama_Vietnamese_French/checkpoint-200/tokenizer.json b/llama_Vietnamese_French/checkpoint-200/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/llama_Vietnamese_French/checkpoint-200/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/llama_Vietnamese_French/checkpoint-200/tokenizer_config.json b/llama_Vietnamese_French/checkpoint-200/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..447bd30f21e8c20e9e6497d6cfc650a3881e26a6 --- /dev/null +++ b/llama_Vietnamese_French/checkpoint-200/tokenizer_config.json @@ -0,0 +1,2064 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/llama_Vietnamese_French/checkpoint-200/trainer_state.json b/llama_Vietnamese_French/checkpoint-200/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c0a907ce42cc51f91a28b322718f6f525eaee51e --- /dev/null +++ b/llama_Vietnamese_French/checkpoint-200/trainer_state.json @@ -0,0 +1,1433 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.7036059806508356, + "eval_steps": 500, + "global_step": 200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.003518029903254178, + "grad_norm": 1.3099981546401978, + "learning_rate": 0.0001, + "loss": 2.8051, + "step": 1 + }, + { + "epoch": 0.007036059806508356, + "grad_norm": 1.419170618057251, + "learning_rate": 9.949748743718594e-05, + "loss": 2.9216, + "step": 2 + }, + { + "epoch": 0.010554089709762533, + "grad_norm": 1.1821376085281372, + "learning_rate": 9.899497487437186e-05, + "loss": 2.5986, + "step": 3 + }, + { + "epoch": 0.014072119613016711, + "grad_norm": 1.344305157661438, + "learning_rate": 9.84924623115578e-05, + "loss": 2.5351, + "step": 4 + }, + { + "epoch": 0.01759014951627089, + "grad_norm": 1.4239567518234253, + "learning_rate": 9.798994974874372e-05, + "loss": 2.3687, + "step": 5 + }, + { + "epoch": 0.021108179419525065, + "grad_norm": 1.4257348775863647, + "learning_rate": 9.748743718592965e-05, + "loss": 2.0895, + "step": 6 + }, + { + "epoch": 0.024626209322779244, + "grad_norm": 1.305893063545227, + "learning_rate": 9.698492462311559e-05, + "loss": 1.9073, + "step": 7 + }, + { + "epoch": 0.028144239226033423, + "grad_norm": 1.2924154996871948, + "learning_rate": 9.64824120603015e-05, + "loss": 1.8633, + "step": 8 + }, + { + "epoch": 0.0316622691292876, + "grad_norm": 1.2596007585525513, + "learning_rate": 9.597989949748745e-05, + "loss": 1.6994, + "step": 9 + }, + { + "epoch": 0.03518029903254178, + "grad_norm": 1.309537649154663, + "learning_rate": 9.547738693467337e-05, + "loss": 1.5786, + "step": 10 + }, + { + "epoch": 0.03869832893579595, + "grad_norm": 1.0517432689666748, + "learning_rate": 9.49748743718593e-05, + "loss": 1.3837, + "step": 11 + }, + { + "epoch": 0.04221635883905013, + "grad_norm": 0.8410754799842834, + "learning_rate": 9.447236180904523e-05, + "loss": 1.3317, + "step": 12 + }, + { + "epoch": 0.04573438874230431, + "grad_norm": 0.5960184335708618, + "learning_rate": 9.396984924623115e-05, + "loss": 1.3083, + "step": 13 + }, + { + "epoch": 0.04925241864555849, + "grad_norm": 0.6199663877487183, + "learning_rate": 9.34673366834171e-05, + "loss": 1.3976, + "step": 14 + }, + { + "epoch": 0.052770448548812667, + "grad_norm": 0.6572591662406921, + "learning_rate": 9.296482412060302e-05, + "loss": 1.2445, + "step": 15 + }, + { + "epoch": 0.056288478452066845, + "grad_norm": 0.6266248822212219, + "learning_rate": 9.246231155778895e-05, + "loss": 1.3037, + "step": 16 + }, + { + "epoch": 0.05980650835532102, + "grad_norm": 0.6370198726654053, + "learning_rate": 9.195979899497488e-05, + "loss": 1.2624, + "step": 17 + }, + { + "epoch": 0.0633245382585752, + "grad_norm": 0.6880143284797668, + "learning_rate": 9.14572864321608e-05, + "loss": 1.2399, + "step": 18 + }, + { + "epoch": 0.06684256816182937, + "grad_norm": 0.8517648577690125, + "learning_rate": 9.095477386934675e-05, + "loss": 1.3088, + "step": 19 + }, + { + "epoch": 0.07036059806508356, + "grad_norm": 0.7807110548019409, + "learning_rate": 9.045226130653267e-05, + "loss": 1.2596, + "step": 20 + }, + { + "epoch": 0.07387862796833773, + "grad_norm": 0.7446685433387756, + "learning_rate": 8.99497487437186e-05, + "loss": 1.2561, + "step": 21 + }, + { + "epoch": 0.0773966578715919, + "grad_norm": 0.7602100968360901, + "learning_rate": 8.944723618090453e-05, + "loss": 1.1424, + "step": 22 + }, + { + "epoch": 0.08091468777484609, + "grad_norm": 0.7028201818466187, + "learning_rate": 8.894472361809045e-05, + "loss": 1.2534, + "step": 23 + }, + { + "epoch": 0.08443271767810026, + "grad_norm": 0.8267808556556702, + "learning_rate": 8.84422110552764e-05, + "loss": 1.2624, + "step": 24 + }, + { + "epoch": 0.08795074758135445, + "grad_norm": 0.7587990164756775, + "learning_rate": 8.793969849246232e-05, + "loss": 1.2473, + "step": 25 + }, + { + "epoch": 0.09146877748460862, + "grad_norm": 0.7513137459754944, + "learning_rate": 8.743718592964825e-05, + "loss": 1.1537, + "step": 26 + }, + { + "epoch": 0.09498680738786279, + "grad_norm": 0.7992008328437805, + "learning_rate": 8.693467336683418e-05, + "loss": 1.1854, + "step": 27 + }, + { + "epoch": 0.09850483729111698, + "grad_norm": 0.839791476726532, + "learning_rate": 8.64321608040201e-05, + "loss": 1.1757, + "step": 28 + }, + { + "epoch": 0.10202286719437115, + "grad_norm": 0.8871113657951355, + "learning_rate": 8.592964824120603e-05, + "loss": 1.1712, + "step": 29 + }, + { + "epoch": 0.10554089709762533, + "grad_norm": 0.9801968932151794, + "learning_rate": 8.542713567839196e-05, + "loss": 1.2115, + "step": 30 + }, + { + "epoch": 0.1090589270008795, + "grad_norm": 1.0081522464752197, + "learning_rate": 8.49246231155779e-05, + "loss": 1.1353, + "step": 31 + }, + { + "epoch": 0.11257695690413369, + "grad_norm": 1.0660901069641113, + "learning_rate": 8.442211055276383e-05, + "loss": 1.283, + "step": 32 + }, + { + "epoch": 0.11609498680738786, + "grad_norm": 1.060646891593933, + "learning_rate": 8.391959798994975e-05, + "loss": 1.1808, + "step": 33 + }, + { + "epoch": 0.11961301671064203, + "grad_norm": 1.0443567037582397, + "learning_rate": 8.341708542713568e-05, + "loss": 1.2391, + "step": 34 + }, + { + "epoch": 0.12313104661389622, + "grad_norm": 1.1405633687973022, + "learning_rate": 8.291457286432161e-05, + "loss": 1.2314, + "step": 35 + }, + { + "epoch": 0.1266490765171504, + "grad_norm": 1.0178916454315186, + "learning_rate": 8.241206030150754e-05, + "loss": 1.0565, + "step": 36 + }, + { + "epoch": 0.13016710642040458, + "grad_norm": 0.8882055878639221, + "learning_rate": 8.190954773869348e-05, + "loss": 1.1768, + "step": 37 + }, + { + "epoch": 0.13368513632365875, + "grad_norm": 0.8350165486335754, + "learning_rate": 8.14070351758794e-05, + "loss": 1.0259, + "step": 38 + }, + { + "epoch": 0.13720316622691292, + "grad_norm": 0.7027862071990967, + "learning_rate": 8.090452261306533e-05, + "loss": 1.145, + "step": 39 + }, + { + "epoch": 0.14072119613016712, + "grad_norm": 0.7188254594802856, + "learning_rate": 8.040201005025126e-05, + "loss": 1.1662, + "step": 40 + }, + { + "epoch": 0.1442392260334213, + "grad_norm": 0.6351075172424316, + "learning_rate": 7.989949748743719e-05, + "loss": 1.1641, + "step": 41 + }, + { + "epoch": 0.14775725593667546, + "grad_norm": 0.5557705760002136, + "learning_rate": 7.939698492462313e-05, + "loss": 1.1442, + "step": 42 + }, + { + "epoch": 0.15127528583992964, + "grad_norm": 0.6325380802154541, + "learning_rate": 7.889447236180904e-05, + "loss": 1.1855, + "step": 43 + }, + { + "epoch": 0.1547933157431838, + "grad_norm": 0.5265153646469116, + "learning_rate": 7.839195979899498e-05, + "loss": 1.115, + "step": 44 + }, + { + "epoch": 0.158311345646438, + "grad_norm": 0.5598963499069214, + "learning_rate": 7.788944723618091e-05, + "loss": 1.0898, + "step": 45 + }, + { + "epoch": 0.16182937554969218, + "grad_norm": 0.5514755249023438, + "learning_rate": 7.738693467336684e-05, + "loss": 1.0811, + "step": 46 + }, + { + "epoch": 0.16534740545294635, + "grad_norm": 0.6218781471252441, + "learning_rate": 7.688442211055277e-05, + "loss": 1.0203, + "step": 47 + }, + { + "epoch": 0.16886543535620052, + "grad_norm": 0.607247531414032, + "learning_rate": 7.638190954773869e-05, + "loss": 1.0839, + "step": 48 + }, + { + "epoch": 0.1723834652594547, + "grad_norm": 0.5506616234779358, + "learning_rate": 7.587939698492463e-05, + "loss": 1.0826, + "step": 49 + }, + { + "epoch": 0.1759014951627089, + "grad_norm": 0.647196352481842, + "learning_rate": 7.537688442211056e-05, + "loss": 1.1292, + "step": 50 + }, + { + "epoch": 0.17941952506596306, + "grad_norm": 0.797627866268158, + "learning_rate": 7.487437185929649e-05, + "loss": 1.0895, + "step": 51 + }, + { + "epoch": 0.18293755496921724, + "grad_norm": 0.5852944254875183, + "learning_rate": 7.437185929648241e-05, + "loss": 1.0946, + "step": 52 + }, + { + "epoch": 0.1864555848724714, + "grad_norm": 0.5789614319801331, + "learning_rate": 7.386934673366834e-05, + "loss": 1.1042, + "step": 53 + }, + { + "epoch": 0.18997361477572558, + "grad_norm": 0.7171733975410461, + "learning_rate": 7.336683417085427e-05, + "loss": 1.1673, + "step": 54 + }, + { + "epoch": 0.19349164467897978, + "grad_norm": 0.696244478225708, + "learning_rate": 7.28643216080402e-05, + "loss": 1.194, + "step": 55 + }, + { + "epoch": 0.19700967458223395, + "grad_norm": 0.5782675743103027, + "learning_rate": 7.236180904522614e-05, + "loss": 1.0215, + "step": 56 + }, + { + "epoch": 0.20052770448548812, + "grad_norm": 0.56525719165802, + "learning_rate": 7.185929648241206e-05, + "loss": 0.9575, + "step": 57 + }, + { + "epoch": 0.2040457343887423, + "grad_norm": 0.5938221216201782, + "learning_rate": 7.135678391959799e-05, + "loss": 1.0871, + "step": 58 + }, + { + "epoch": 0.2075637642919965, + "grad_norm": 0.6361908316612244, + "learning_rate": 7.085427135678392e-05, + "loss": 1.1663, + "step": 59 + }, + { + "epoch": 0.21108179419525067, + "grad_norm": 0.5714772939682007, + "learning_rate": 7.035175879396985e-05, + "loss": 1.0725, + "step": 60 + }, + { + "epoch": 0.21459982409850484, + "grad_norm": 0.6428077220916748, + "learning_rate": 6.984924623115579e-05, + "loss": 1.0742, + "step": 61 + }, + { + "epoch": 0.218117854001759, + "grad_norm": 0.5870627164840698, + "learning_rate": 6.93467336683417e-05, + "loss": 1.1117, + "step": 62 + }, + { + "epoch": 0.22163588390501318, + "grad_norm": 0.5735807418823242, + "learning_rate": 6.884422110552764e-05, + "loss": 1.1055, + "step": 63 + }, + { + "epoch": 0.22515391380826738, + "grad_norm": 0.5287720561027527, + "learning_rate": 6.834170854271357e-05, + "loss": 1.0209, + "step": 64 + }, + { + "epoch": 0.22867194371152155, + "grad_norm": 0.5561414361000061, + "learning_rate": 6.78391959798995e-05, + "loss": 1.0687, + "step": 65 + }, + { + "epoch": 0.23218997361477572, + "grad_norm": 0.5748591423034668, + "learning_rate": 6.733668341708544e-05, + "loss": 1.0486, + "step": 66 + }, + { + "epoch": 0.2357080035180299, + "grad_norm": 0.5794631242752075, + "learning_rate": 6.683417085427135e-05, + "loss": 1.0788, + "step": 67 + }, + { + "epoch": 0.23922603342128407, + "grad_norm": 0.549004077911377, + "learning_rate": 6.633165829145729e-05, + "loss": 1.0527, + "step": 68 + }, + { + "epoch": 0.24274406332453827, + "grad_norm": 0.5533168911933899, + "learning_rate": 6.582914572864322e-05, + "loss": 1.0368, + "step": 69 + }, + { + "epoch": 0.24626209322779244, + "grad_norm": 0.6204813122749329, + "learning_rate": 6.532663316582915e-05, + "loss": 1.1146, + "step": 70 + }, + { + "epoch": 0.2497801231310466, + "grad_norm": 0.6446285843849182, + "learning_rate": 6.482412060301508e-05, + "loss": 1.0497, + "step": 71 + }, + { + "epoch": 0.2532981530343008, + "grad_norm": 0.6504891514778137, + "learning_rate": 6.4321608040201e-05, + "loss": 1.1445, + "step": 72 + }, + { + "epoch": 0.256816182937555, + "grad_norm": 0.5598528981208801, + "learning_rate": 6.381909547738694e-05, + "loss": 1.0714, + "step": 73 + }, + { + "epoch": 0.26033421284080915, + "grad_norm": 0.5397841930389404, + "learning_rate": 6.331658291457287e-05, + "loss": 1.0545, + "step": 74 + }, + { + "epoch": 0.2638522427440633, + "grad_norm": 0.572364330291748, + "learning_rate": 6.28140703517588e-05, + "loss": 1.0755, + "step": 75 + }, + { + "epoch": 0.2673702726473175, + "grad_norm": 0.5797960758209229, + "learning_rate": 6.231155778894473e-05, + "loss": 1.0245, + "step": 76 + }, + { + "epoch": 0.27088830255057167, + "grad_norm": 0.6335171461105347, + "learning_rate": 6.180904522613065e-05, + "loss": 1.108, + "step": 77 + }, + { + "epoch": 0.27440633245382584, + "grad_norm": 0.6319923400878906, + "learning_rate": 6.130653266331658e-05, + "loss": 1.0469, + "step": 78 + }, + { + "epoch": 0.27792436235708, + "grad_norm": 0.5728479623794556, + "learning_rate": 6.080402010050251e-05, + "loss": 1.0828, + "step": 79 + }, + { + "epoch": 0.28144239226033424, + "grad_norm": 0.6093153953552246, + "learning_rate": 6.030150753768844e-05, + "loss": 1.0147, + "step": 80 + }, + { + "epoch": 0.2849604221635884, + "grad_norm": 0.5884273648262024, + "learning_rate": 5.979899497487438e-05, + "loss": 1.1164, + "step": 81 + }, + { + "epoch": 0.2884784520668426, + "grad_norm": 0.571577787399292, + "learning_rate": 5.929648241206031e-05, + "loss": 1.1087, + "step": 82 + }, + { + "epoch": 0.29199648197009676, + "grad_norm": 0.7632778286933899, + "learning_rate": 5.879396984924623e-05, + "loss": 1.0113, + "step": 83 + }, + { + "epoch": 0.2955145118733509, + "grad_norm": 0.6267596483230591, + "learning_rate": 5.829145728643216e-05, + "loss": 1.0931, + "step": 84 + }, + { + "epoch": 0.2990325417766051, + "grad_norm": 0.5532920360565186, + "learning_rate": 5.778894472361809e-05, + "loss": 1.0201, + "step": 85 + }, + { + "epoch": 0.30255057167985927, + "grad_norm": 0.6293766498565674, + "learning_rate": 5.728643216080403e-05, + "loss": 1.0352, + "step": 86 + }, + { + "epoch": 0.30606860158311344, + "grad_norm": 0.6214576363563538, + "learning_rate": 5.6783919597989955e-05, + "loss": 1.0369, + "step": 87 + }, + { + "epoch": 0.3095866314863676, + "grad_norm": 0.5308843851089478, + "learning_rate": 5.628140703517588e-05, + "loss": 1.0555, + "step": 88 + }, + { + "epoch": 0.3131046613896218, + "grad_norm": 0.5727891325950623, + "learning_rate": 5.577889447236181e-05, + "loss": 0.9891, + "step": 89 + }, + { + "epoch": 0.316622691292876, + "grad_norm": 0.6149362921714783, + "learning_rate": 5.527638190954774e-05, + "loss": 1.0972, + "step": 90 + }, + { + "epoch": 0.3201407211961302, + "grad_norm": 0.6297855973243713, + "learning_rate": 5.477386934673368e-05, + "loss": 1.0708, + "step": 91 + }, + { + "epoch": 0.32365875109938436, + "grad_norm": 0.573946475982666, + "learning_rate": 5.4271356783919604e-05, + "loss": 1.0533, + "step": 92 + }, + { + "epoch": 0.32717678100263853, + "grad_norm": 0.5881888270378113, + "learning_rate": 5.376884422110553e-05, + "loss": 1.0988, + "step": 93 + }, + { + "epoch": 0.3306948109058927, + "grad_norm": 0.5887706279754639, + "learning_rate": 5.3266331658291455e-05, + "loss": 1.0445, + "step": 94 + }, + { + "epoch": 0.33421284080914687, + "grad_norm": 0.6907920241355896, + "learning_rate": 5.276381909547739e-05, + "loss": 1.1062, + "step": 95 + }, + { + "epoch": 0.33773087071240104, + "grad_norm": 0.6591424942016602, + "learning_rate": 5.226130653266332e-05, + "loss": 1.1621, + "step": 96 + }, + { + "epoch": 0.3412489006156552, + "grad_norm": 0.6175463795661926, + "learning_rate": 5.175879396984925e-05, + "loss": 1.0579, + "step": 97 + }, + { + "epoch": 0.3447669305189094, + "grad_norm": 0.6269632577896118, + "learning_rate": 5.125628140703518e-05, + "loss": 1.0859, + "step": 98 + }, + { + "epoch": 0.3482849604221636, + "grad_norm": 0.6850054860115051, + "learning_rate": 5.0753768844221104e-05, + "loss": 1.0723, + "step": 99 + }, + { + "epoch": 0.3518029903254178, + "grad_norm": 0.6422050595283508, + "learning_rate": 5.0251256281407036e-05, + "loss": 1.0087, + "step": 100 + }, + { + "epoch": 0.35532102022867196, + "grad_norm": 0.5717617273330688, + "learning_rate": 4.974874371859297e-05, + "loss": 0.9635, + "step": 101 + }, + { + "epoch": 0.35883905013192613, + "grad_norm": 0.5820629000663757, + "learning_rate": 4.92462311557789e-05, + "loss": 1.0405, + "step": 102 + }, + { + "epoch": 0.3623570800351803, + "grad_norm": 0.5893539786338806, + "learning_rate": 4.874371859296483e-05, + "loss": 1.0752, + "step": 103 + }, + { + "epoch": 0.3658751099384345, + "grad_norm": 0.6108641624450684, + "learning_rate": 4.824120603015075e-05, + "loss": 1.0714, + "step": 104 + }, + { + "epoch": 0.36939313984168864, + "grad_norm": 0.5932585000991821, + "learning_rate": 4.7738693467336685e-05, + "loss": 0.9836, + "step": 105 + }, + { + "epoch": 0.3729111697449428, + "grad_norm": 0.67912757396698, + "learning_rate": 4.723618090452262e-05, + "loss": 1.0986, + "step": 106 + }, + { + "epoch": 0.376429199648197, + "grad_norm": 0.6052365899085999, + "learning_rate": 4.673366834170855e-05, + "loss": 1.085, + "step": 107 + }, + { + "epoch": 0.37994722955145116, + "grad_norm": 0.6279844641685486, + "learning_rate": 4.6231155778894475e-05, + "loss": 1.1691, + "step": 108 + }, + { + "epoch": 0.3834652594547054, + "grad_norm": 0.7127268314361572, + "learning_rate": 4.57286432160804e-05, + "loss": 1.109, + "step": 109 + }, + { + "epoch": 0.38698328935795956, + "grad_norm": 0.6054704189300537, + "learning_rate": 4.522613065326633e-05, + "loss": 1.0202, + "step": 110 + }, + { + "epoch": 0.39050131926121373, + "grad_norm": 0.689251184463501, + "learning_rate": 4.4723618090452266e-05, + "loss": 1.1302, + "step": 111 + }, + { + "epoch": 0.3940193491644679, + "grad_norm": 0.5943912863731384, + "learning_rate": 4.42211055276382e-05, + "loss": 1.0227, + "step": 112 + }, + { + "epoch": 0.3975373790677221, + "grad_norm": 0.6490674018859863, + "learning_rate": 4.3718592964824124e-05, + "loss": 0.9969, + "step": 113 + }, + { + "epoch": 0.40105540897097625, + "grad_norm": 0.6700933575630188, + "learning_rate": 4.321608040201005e-05, + "loss": 1.0294, + "step": 114 + }, + { + "epoch": 0.4045734388742304, + "grad_norm": 0.598827064037323, + "learning_rate": 4.271356783919598e-05, + "loss": 1.0252, + "step": 115 + }, + { + "epoch": 0.4080914687774846, + "grad_norm": 0.6202813386917114, + "learning_rate": 4.2211055276381914e-05, + "loss": 1.1204, + "step": 116 + }, + { + "epoch": 0.41160949868073876, + "grad_norm": 0.6458904147148132, + "learning_rate": 4.170854271356784e-05, + "loss": 1.0188, + "step": 117 + }, + { + "epoch": 0.415127528583993, + "grad_norm": 0.6134166121482849, + "learning_rate": 4.120603015075377e-05, + "loss": 0.9824, + "step": 118 + }, + { + "epoch": 0.41864555848724716, + "grad_norm": 0.6141886115074158, + "learning_rate": 4.07035175879397e-05, + "loss": 1.0266, + "step": 119 + }, + { + "epoch": 0.42216358839050133, + "grad_norm": 0.6362076997756958, + "learning_rate": 4.020100502512563e-05, + "loss": 1.081, + "step": 120 + }, + { + "epoch": 0.4256816182937555, + "grad_norm": 0.6695643067359924, + "learning_rate": 3.969849246231156e-05, + "loss": 1.0661, + "step": 121 + }, + { + "epoch": 0.4291996481970097, + "grad_norm": 0.59818434715271, + "learning_rate": 3.919597989949749e-05, + "loss": 1.0997, + "step": 122 + }, + { + "epoch": 0.43271767810026385, + "grad_norm": 0.6445558667182922, + "learning_rate": 3.869346733668342e-05, + "loss": 0.9797, + "step": 123 + }, + { + "epoch": 0.436235708003518, + "grad_norm": 0.6482073664665222, + "learning_rate": 3.8190954773869346e-05, + "loss": 1.0827, + "step": 124 + }, + { + "epoch": 0.4397537379067722, + "grad_norm": 0.7097709774971008, + "learning_rate": 3.768844221105528e-05, + "loss": 0.9915, + "step": 125 + }, + { + "epoch": 0.44327176781002636, + "grad_norm": 0.6752858757972717, + "learning_rate": 3.7185929648241204e-05, + "loss": 1.0699, + "step": 126 + }, + { + "epoch": 0.4467897977132806, + "grad_norm": 0.7337484359741211, + "learning_rate": 3.668341708542714e-05, + "loss": 1.0176, + "step": 127 + }, + { + "epoch": 0.45030782761653476, + "grad_norm": 0.711415708065033, + "learning_rate": 3.618090452261307e-05, + "loss": 1.0419, + "step": 128 + }, + { + "epoch": 0.45382585751978893, + "grad_norm": 0.6305501461029053, + "learning_rate": 3.5678391959798995e-05, + "loss": 1.042, + "step": 129 + }, + { + "epoch": 0.4573438874230431, + "grad_norm": 0.6175320744514465, + "learning_rate": 3.517587939698493e-05, + "loss": 0.9975, + "step": 130 + }, + { + "epoch": 0.4608619173262973, + "grad_norm": 0.6159985065460205, + "learning_rate": 3.467336683417085e-05, + "loss": 1.0048, + "step": 131 + }, + { + "epoch": 0.46437994722955145, + "grad_norm": 0.6344972848892212, + "learning_rate": 3.4170854271356785e-05, + "loss": 0.9882, + "step": 132 + }, + { + "epoch": 0.4678979771328056, + "grad_norm": 0.6693101525306702, + "learning_rate": 3.366834170854272e-05, + "loss": 1.0668, + "step": 133 + }, + { + "epoch": 0.4714160070360598, + "grad_norm": 0.6465882062911987, + "learning_rate": 3.3165829145728643e-05, + "loss": 1.0226, + "step": 134 + }, + { + "epoch": 0.47493403693931396, + "grad_norm": 0.5668264031410217, + "learning_rate": 3.2663316582914576e-05, + "loss": 0.9576, + "step": 135 + }, + { + "epoch": 0.47845206684256814, + "grad_norm": 0.6542699337005615, + "learning_rate": 3.21608040201005e-05, + "loss": 1.1298, + "step": 136 + }, + { + "epoch": 0.48197009674582236, + "grad_norm": 0.6975705027580261, + "learning_rate": 3.1658291457286434e-05, + "loss": 1.1102, + "step": 137 + }, + { + "epoch": 0.48548812664907653, + "grad_norm": 0.619424045085907, + "learning_rate": 3.1155778894472366e-05, + "loss": 1.019, + "step": 138 + }, + { + "epoch": 0.4890061565523307, + "grad_norm": 0.6337068676948547, + "learning_rate": 3.065326633165829e-05, + "loss": 1.0859, + "step": 139 + }, + { + "epoch": 0.4925241864555849, + "grad_norm": 0.6132563948631287, + "learning_rate": 3.015075376884422e-05, + "loss": 1.0584, + "step": 140 + }, + { + "epoch": 0.49604221635883905, + "grad_norm": 0.7202109694480896, + "learning_rate": 2.9648241206030153e-05, + "loss": 1.0862, + "step": 141 + }, + { + "epoch": 0.4995602462620932, + "grad_norm": 0.6384497284889221, + "learning_rate": 2.914572864321608e-05, + "loss": 1.013, + "step": 142 + }, + { + "epoch": 0.5030782761653474, + "grad_norm": 0.7006785273551941, + "learning_rate": 2.8643216080402015e-05, + "loss": 1.0592, + "step": 143 + }, + { + "epoch": 0.5065963060686016, + "grad_norm": 0.6348549127578735, + "learning_rate": 2.814070351758794e-05, + "loss": 1.0016, + "step": 144 + }, + { + "epoch": 0.5101143359718557, + "grad_norm": 0.6265092492103577, + "learning_rate": 2.763819095477387e-05, + "loss": 1.0603, + "step": 145 + }, + { + "epoch": 0.51363236587511, + "grad_norm": 0.6380993127822876, + "learning_rate": 2.7135678391959802e-05, + "loss": 1.0245, + "step": 146 + }, + { + "epoch": 0.5171503957783641, + "grad_norm": 0.6516243815422058, + "learning_rate": 2.6633165829145728e-05, + "loss": 1.0505, + "step": 147 + }, + { + "epoch": 0.5206684256816183, + "grad_norm": 0.6753267049789429, + "learning_rate": 2.613065326633166e-05, + "loss": 1.1249, + "step": 148 + }, + { + "epoch": 0.5241864555848724, + "grad_norm": 0.6012075543403625, + "learning_rate": 2.562814070351759e-05, + "loss": 0.9827, + "step": 149 + }, + { + "epoch": 0.5277044854881267, + "grad_norm": 0.6272696256637573, + "learning_rate": 2.5125628140703518e-05, + "loss": 1.0322, + "step": 150 + }, + { + "epoch": 0.5312225153913809, + "grad_norm": 0.6878204345703125, + "learning_rate": 2.462311557788945e-05, + "loss": 1.002, + "step": 151 + }, + { + "epoch": 0.534740545294635, + "grad_norm": 0.6404775381088257, + "learning_rate": 2.4120603015075376e-05, + "loss": 1.0774, + "step": 152 + }, + { + "epoch": 0.5382585751978892, + "grad_norm": 0.7648995518684387, + "learning_rate": 2.361809045226131e-05, + "loss": 1.0134, + "step": 153 + }, + { + "epoch": 0.5417766051011433, + "grad_norm": 0.7643718719482422, + "learning_rate": 2.3115577889447238e-05, + "loss": 0.9977, + "step": 154 + }, + { + "epoch": 0.5452946350043976, + "grad_norm": 0.6613923907279968, + "learning_rate": 2.2613065326633167e-05, + "loss": 1.0871, + "step": 155 + }, + { + "epoch": 0.5488126649076517, + "grad_norm": 0.6442428231239319, + "learning_rate": 2.21105527638191e-05, + "loss": 1.0188, + "step": 156 + }, + { + "epoch": 0.5523306948109059, + "grad_norm": 0.7460324764251709, + "learning_rate": 2.1608040201005025e-05, + "loss": 1.0014, + "step": 157 + }, + { + "epoch": 0.55584872471416, + "grad_norm": 0.6691567301750183, + "learning_rate": 2.1105527638190957e-05, + "loss": 1.0813, + "step": 158 + }, + { + "epoch": 0.5593667546174143, + "grad_norm": 0.6895260214805603, + "learning_rate": 2.0603015075376886e-05, + "loss": 1.0574, + "step": 159 + }, + { + "epoch": 0.5628847845206685, + "grad_norm": 0.7147626280784607, + "learning_rate": 2.0100502512562815e-05, + "loss": 0.9972, + "step": 160 + }, + { + "epoch": 0.5664028144239226, + "grad_norm": 0.6976203918457031, + "learning_rate": 1.9597989949748744e-05, + "loss": 0.9591, + "step": 161 + }, + { + "epoch": 0.5699208443271768, + "grad_norm": 0.6942258477210999, + "learning_rate": 1.9095477386934673e-05, + "loss": 1.0612, + "step": 162 + }, + { + "epoch": 0.5734388742304309, + "grad_norm": 0.6492617726325989, + "learning_rate": 1.8592964824120602e-05, + "loss": 1.0369, + "step": 163 + }, + { + "epoch": 0.5769569041336852, + "grad_norm": 0.6161103844642639, + "learning_rate": 1.8090452261306535e-05, + "loss": 1.008, + "step": 164 + }, + { + "epoch": 0.5804749340369393, + "grad_norm": 0.7450383305549622, + "learning_rate": 1.7587939698492464e-05, + "loss": 1.0301, + "step": 165 + }, + { + "epoch": 0.5839929639401935, + "grad_norm": 0.6780828237533569, + "learning_rate": 1.7085427135678393e-05, + "loss": 1.0437, + "step": 166 + }, + { + "epoch": 0.5875109938434476, + "grad_norm": 0.6515783667564392, + "learning_rate": 1.6582914572864322e-05, + "loss": 1.0518, + "step": 167 + }, + { + "epoch": 0.5910290237467019, + "grad_norm": 0.679307222366333, + "learning_rate": 1.608040201005025e-05, + "loss": 1.111, + "step": 168 + }, + { + "epoch": 0.594547053649956, + "grad_norm": 0.6973927021026611, + "learning_rate": 1.5577889447236183e-05, + "loss": 1.0352, + "step": 169 + }, + { + "epoch": 0.5980650835532102, + "grad_norm": 0.6596704125404358, + "learning_rate": 1.507537688442211e-05, + "loss": 1.0896, + "step": 170 + }, + { + "epoch": 0.6015831134564644, + "grad_norm": 0.6504830121994019, + "learning_rate": 1.457286432160804e-05, + "loss": 0.9842, + "step": 171 + }, + { + "epoch": 0.6051011433597185, + "grad_norm": 0.5862520337104797, + "learning_rate": 1.407035175879397e-05, + "loss": 1.034, + "step": 172 + }, + { + "epoch": 0.6086191732629728, + "grad_norm": 0.6879701018333435, + "learning_rate": 1.3567839195979901e-05, + "loss": 1.0594, + "step": 173 + }, + { + "epoch": 0.6121372031662269, + "grad_norm": 0.635339081287384, + "learning_rate": 1.306532663316583e-05, + "loss": 0.9909, + "step": 174 + }, + { + "epoch": 0.6156552330694811, + "grad_norm": 0.7221688628196716, + "learning_rate": 1.2562814070351759e-05, + "loss": 1.0642, + "step": 175 + }, + { + "epoch": 0.6191732629727352, + "grad_norm": 0.6766290068626404, + "learning_rate": 1.2060301507537688e-05, + "loss": 1.0459, + "step": 176 + }, + { + "epoch": 0.6226912928759895, + "grad_norm": 0.7117000818252563, + "learning_rate": 1.1557788944723619e-05, + "loss": 1.1526, + "step": 177 + }, + { + "epoch": 0.6262093227792436, + "grad_norm": 0.6454238295555115, + "learning_rate": 1.105527638190955e-05, + "loss": 1.0554, + "step": 178 + }, + { + "epoch": 0.6297273526824978, + "grad_norm": 0.6204365491867065, + "learning_rate": 1.0552763819095479e-05, + "loss": 1.0129, + "step": 179 + }, + { + "epoch": 0.633245382585752, + "grad_norm": 0.6786676645278931, + "learning_rate": 1.0050251256281408e-05, + "loss": 1.0237, + "step": 180 + }, + { + "epoch": 0.6367634124890061, + "grad_norm": 0.6604948043823242, + "learning_rate": 9.547738693467337e-06, + "loss": 1.1062, + "step": 181 + }, + { + "epoch": 0.6402814423922604, + "grad_norm": 0.6684358716011047, + "learning_rate": 9.045226130653267e-06, + "loss": 1.0005, + "step": 182 + }, + { + "epoch": 0.6437994722955145, + "grad_norm": 0.6991133093833923, + "learning_rate": 8.542713567839196e-06, + "loss": 1.0499, + "step": 183 + }, + { + "epoch": 0.6473175021987687, + "grad_norm": 0.6463094353675842, + "learning_rate": 8.040201005025125e-06, + "loss": 0.993, + "step": 184 + }, + { + "epoch": 0.6508355321020228, + "grad_norm": 0.6408785581588745, + "learning_rate": 7.537688442211055e-06, + "loss": 1.0373, + "step": 185 + }, + { + "epoch": 0.6543535620052771, + "grad_norm": 0.6173821687698364, + "learning_rate": 7.035175879396985e-06, + "loss": 1.0423, + "step": 186 + }, + { + "epoch": 0.6578715919085312, + "grad_norm": 0.6197496652603149, + "learning_rate": 6.532663316582915e-06, + "loss": 1.0692, + "step": 187 + }, + { + "epoch": 0.6613896218117854, + "grad_norm": 0.6757541298866272, + "learning_rate": 6.030150753768844e-06, + "loss": 1.1329, + "step": 188 + }, + { + "epoch": 0.6649076517150396, + "grad_norm": 0.7068001627922058, + "learning_rate": 5.527638190954775e-06, + "loss": 1.0555, + "step": 189 + }, + { + "epoch": 0.6684256816182937, + "grad_norm": 0.7299413681030273, + "learning_rate": 5.025125628140704e-06, + "loss": 1.0452, + "step": 190 + }, + { + "epoch": 0.671943711521548, + "grad_norm": 0.6925146579742432, + "learning_rate": 4.522613065326634e-06, + "loss": 1.0594, + "step": 191 + }, + { + "epoch": 0.6754617414248021, + "grad_norm": 0.7352191805839539, + "learning_rate": 4.020100502512563e-06, + "loss": 1.0917, + "step": 192 + }, + { + "epoch": 0.6789797713280563, + "grad_norm": 0.7139914035797119, + "learning_rate": 3.5175879396984926e-06, + "loss": 1.0938, + "step": 193 + }, + { + "epoch": 0.6824978012313104, + "grad_norm": 0.6927515864372253, + "learning_rate": 3.015075376884422e-06, + "loss": 1.01, + "step": 194 + }, + { + "epoch": 0.6860158311345647, + "grad_norm": 0.6721327900886536, + "learning_rate": 2.512562814070352e-06, + "loss": 1.1126, + "step": 195 + }, + { + "epoch": 0.6895338610378188, + "grad_norm": 0.6539496183395386, + "learning_rate": 2.0100502512562813e-06, + "loss": 0.9925, + "step": 196 + }, + { + "epoch": 0.693051890941073, + "grad_norm": 0.6722233295440674, + "learning_rate": 1.507537688442211e-06, + "loss": 1.0793, + "step": 197 + }, + { + "epoch": 0.6965699208443272, + "grad_norm": 0.6497196555137634, + "learning_rate": 1.0050251256281407e-06, + "loss": 1.0375, + "step": 198 + }, + { + "epoch": 0.7000879507475813, + "grad_norm": 0.7125354409217834, + "learning_rate": 5.025125628140703e-07, + "loss": 1.0533, + "step": 199 + }, + { + "epoch": 0.7036059806508356, + "grad_norm": 0.6215020418167114, + "learning_rate": 0.0, + "loss": 1.0122, + "step": 200 + } + ], + "logging_steps": 1, + "max_steps": 200, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.7282387435421696e+16, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/llama_Vietnamese_French/checkpoint-200/training_args.bin b/llama_Vietnamese_French/checkpoint-200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e8762e4ac42c4a6b78e75d11055e06e5d1c6634b --- /dev/null +++ b/llama_Vietnamese_French/checkpoint-200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6004a7bf4167be6ad7655b3854b9575474e8fdd5a4010763c3cb2ea95309f09 +size 5624 diff --git a/llama_Vietnamese_German/checkpoint-200/README.md b/llama_Vietnamese_German/checkpoint-200/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ec91fdd050e2a5613ef04bb6c8fc77a63b8b10a1 --- /dev/null +++ b/llama_Vietnamese_German/checkpoint-200/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.14.0 \ No newline at end of file diff --git a/llama_Vietnamese_German/checkpoint-200/adapter_config.json b/llama_Vietnamese_German/checkpoint-200/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d80c34b3e5225e2d114bdd1c95822afed5d012c2 --- /dev/null +++ b/llama_Vietnamese_German/checkpoint-200/adapter_config.json @@ -0,0 +1,37 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit", + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "down_proj", + "q_proj", + "v_proj", + "k_proj", + "gate_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/llama_Vietnamese_German/checkpoint-200/adapter_model.safetensors b/llama_Vietnamese_German/checkpoint-200/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3f50727e81998be5b6cde687ee4f7b25b9557742 --- /dev/null +++ b/llama_Vietnamese_German/checkpoint-200/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43c01213d7cc8e9f901ee10e6757196bc1d455cd792c3459d8a4bc6a0243d78f +size 167832240 diff --git a/llama_Vietnamese_German/checkpoint-200/optimizer.pt b/llama_Vietnamese_German/checkpoint-200/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6404cef50d509e1b46f38602ddcf3f2293d4a44a --- /dev/null +++ b/llama_Vietnamese_German/checkpoint-200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce75e38a3c3c6c03cbffaa9287d1976ecfaf327d393385983adf21437517fe8e +size 85723284 diff --git a/llama_Vietnamese_German/checkpoint-200/rng_state.pth b/llama_Vietnamese_German/checkpoint-200/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1f8291cd6ce87668b786a72f3e93d072fbe54902 --- /dev/null +++ b/llama_Vietnamese_German/checkpoint-200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4c917636c7a58af68a29056522a757e9f9b99005b776641aa157c536967817d +size 14244 diff --git a/llama_Vietnamese_German/checkpoint-200/scheduler.pt b/llama_Vietnamese_German/checkpoint-200/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6ad11d6d5288d2841ba96a6d7d3838c80c80097a --- /dev/null +++ b/llama_Vietnamese_German/checkpoint-200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96 +size 1064 diff --git a/llama_Vietnamese_German/checkpoint-200/special_tokens_map.json b/llama_Vietnamese_German/checkpoint-200/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/llama_Vietnamese_German/checkpoint-200/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/llama_Vietnamese_German/checkpoint-200/tokenizer.json b/llama_Vietnamese_German/checkpoint-200/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/llama_Vietnamese_German/checkpoint-200/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/llama_Vietnamese_German/checkpoint-200/tokenizer_config.json b/llama_Vietnamese_German/checkpoint-200/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..447bd30f21e8c20e9e6497d6cfc650a3881e26a6 --- /dev/null +++ b/llama_Vietnamese_German/checkpoint-200/tokenizer_config.json @@ -0,0 +1,2064 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/llama_Vietnamese_German/checkpoint-200/trainer_state.json b/llama_Vietnamese_German/checkpoint-200/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6a8b38cac6e4513273811495d3477ca1a6dd5663 --- /dev/null +++ b/llama_Vietnamese_German/checkpoint-200/trainer_state.json @@ -0,0 +1,1433 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.7036059806508356, + "eval_steps": 500, + "global_step": 200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.003518029903254178, + "grad_norm": 1.3360792398452759, + "learning_rate": 0.0001, + "loss": 2.8417, + "step": 1 + }, + { + "epoch": 0.007036059806508356, + "grad_norm": 1.3685423135757446, + "learning_rate": 9.949748743718594e-05, + "loss": 2.92, + "step": 2 + }, + { + "epoch": 0.010554089709762533, + "grad_norm": 1.2149922847747803, + "learning_rate": 9.899497487437186e-05, + "loss": 2.6424, + "step": 3 + }, + { + "epoch": 0.014072119613016711, + "grad_norm": 1.3716614246368408, + "learning_rate": 9.84924623115578e-05, + "loss": 2.5368, + "step": 4 + }, + { + "epoch": 0.01759014951627089, + "grad_norm": 1.3972867727279663, + "learning_rate": 9.798994974874372e-05, + "loss": 2.418, + "step": 5 + }, + { + "epoch": 0.021108179419525065, + "grad_norm": 1.4744296073913574, + "learning_rate": 9.748743718592965e-05, + "loss": 2.2083, + "step": 6 + }, + { + "epoch": 0.024626209322779244, + "grad_norm": 1.244842767715454, + "learning_rate": 9.698492462311559e-05, + "loss": 1.8786, + "step": 7 + }, + { + "epoch": 0.028144239226033423, + "grad_norm": 1.3736884593963623, + "learning_rate": 9.64824120603015e-05, + "loss": 1.9143, + "step": 8 + }, + { + "epoch": 0.0316622691292876, + "grad_norm": 1.237295389175415, + "learning_rate": 9.597989949748745e-05, + "loss": 1.7465, + "step": 9 + }, + { + "epoch": 0.03518029903254178, + "grad_norm": 1.4365873336791992, + "learning_rate": 9.547738693467337e-05, + "loss": 1.6363, + "step": 10 + }, + { + "epoch": 0.03869832893579595, + "grad_norm": 1.0808076858520508, + "learning_rate": 9.49748743718593e-05, + "loss": 1.4111, + "step": 11 + }, + { + "epoch": 0.04221635883905013, + "grad_norm": 0.7661070823669434, + "learning_rate": 9.447236180904523e-05, + "loss": 1.3568, + "step": 12 + }, + { + "epoch": 0.04573438874230431, + "grad_norm": 0.5925489068031311, + "learning_rate": 9.396984924623115e-05, + "loss": 1.3769, + "step": 13 + }, + { + "epoch": 0.04925241864555849, + "grad_norm": 0.5987533926963806, + "learning_rate": 9.34673366834171e-05, + "loss": 1.4287, + "step": 14 + }, + { + "epoch": 0.052770448548812667, + "grad_norm": 0.6528617739677429, + "learning_rate": 9.296482412060302e-05, + "loss": 1.2877, + "step": 15 + }, + { + "epoch": 0.056288478452066845, + "grad_norm": 0.6112993955612183, + "learning_rate": 9.246231155778895e-05, + "loss": 1.3118, + "step": 16 + }, + { + "epoch": 0.05980650835532102, + "grad_norm": 0.6623355746269226, + "learning_rate": 9.195979899497488e-05, + "loss": 1.2819, + "step": 17 + }, + { + "epoch": 0.0633245382585752, + "grad_norm": 0.7247167229652405, + "learning_rate": 9.14572864321608e-05, + "loss": 1.2531, + "step": 18 + }, + { + "epoch": 0.06684256816182937, + "grad_norm": 0.7712353467941284, + "learning_rate": 9.095477386934675e-05, + "loss": 1.364, + "step": 19 + }, + { + "epoch": 0.07036059806508356, + "grad_norm": 0.7478406429290771, + "learning_rate": 9.045226130653267e-05, + "loss": 1.2567, + "step": 20 + }, + { + "epoch": 0.07387862796833773, + "grad_norm": 0.7744011282920837, + "learning_rate": 8.99497487437186e-05, + "loss": 1.3122, + "step": 21 + }, + { + "epoch": 0.0773966578715919, + "grad_norm": 0.7793160676956177, + "learning_rate": 8.944723618090453e-05, + "loss": 1.1926, + "step": 22 + }, + { + "epoch": 0.08091468777484609, + "grad_norm": 0.7187597155570984, + "learning_rate": 8.894472361809045e-05, + "loss": 1.2711, + "step": 23 + }, + { + "epoch": 0.08443271767810026, + "grad_norm": 0.8387140035629272, + "learning_rate": 8.84422110552764e-05, + "loss": 1.3181, + "step": 24 + }, + { + "epoch": 0.08795074758135445, + "grad_norm": 0.7830463647842407, + "learning_rate": 8.793969849246232e-05, + "loss": 1.2816, + "step": 25 + }, + { + "epoch": 0.09146877748460862, + "grad_norm": 0.7619427442550659, + "learning_rate": 8.743718592964825e-05, + "loss": 1.1811, + "step": 26 + }, + { + "epoch": 0.09498680738786279, + "grad_norm": 0.8177812099456787, + "learning_rate": 8.693467336683418e-05, + "loss": 1.2285, + "step": 27 + }, + { + "epoch": 0.09850483729111698, + "grad_norm": 0.8426425457000732, + "learning_rate": 8.64321608040201e-05, + "loss": 1.2357, + "step": 28 + }, + { + "epoch": 0.10202286719437115, + "grad_norm": 0.9162719249725342, + "learning_rate": 8.592964824120603e-05, + "loss": 1.2044, + "step": 29 + }, + { + "epoch": 0.10554089709762533, + "grad_norm": 1.0077379941940308, + "learning_rate": 8.542713567839196e-05, + "loss": 1.2544, + "step": 30 + }, + { + "epoch": 0.1090589270008795, + "grad_norm": 1.0242115259170532, + "learning_rate": 8.49246231155779e-05, + "loss": 1.1817, + "step": 31 + }, + { + "epoch": 0.11257695690413369, + "grad_norm": 0.998924732208252, + "learning_rate": 8.442211055276383e-05, + "loss": 1.2813, + "step": 32 + }, + { + "epoch": 0.11609498680738786, + "grad_norm": 1.0408722162246704, + "learning_rate": 8.391959798994975e-05, + "loss": 1.2037, + "step": 33 + }, + { + "epoch": 0.11961301671064203, + "grad_norm": 1.0635907649993896, + "learning_rate": 8.341708542713568e-05, + "loss": 1.2522, + "step": 34 + }, + { + "epoch": 0.12313104661389622, + "grad_norm": 1.1223053932189941, + "learning_rate": 8.291457286432161e-05, + "loss": 1.2586, + "step": 35 + }, + { + "epoch": 0.1266490765171504, + "grad_norm": 1.0165467262268066, + "learning_rate": 8.241206030150754e-05, + "loss": 1.1002, + "step": 36 + }, + { + "epoch": 0.13016710642040458, + "grad_norm": 0.940078616142273, + "learning_rate": 8.190954773869348e-05, + "loss": 1.2345, + "step": 37 + }, + { + "epoch": 0.13368513632365875, + "grad_norm": 0.8242586255073547, + "learning_rate": 8.14070351758794e-05, + "loss": 1.047, + "step": 38 + }, + { + "epoch": 0.13720316622691292, + "grad_norm": 0.7331597805023193, + "learning_rate": 8.090452261306533e-05, + "loss": 1.1825, + "step": 39 + }, + { + "epoch": 0.14072119613016712, + "grad_norm": 0.7018619179725647, + "learning_rate": 8.040201005025126e-05, + "loss": 1.2153, + "step": 40 + }, + { + "epoch": 0.1442392260334213, + "grad_norm": 0.6312869787216187, + "learning_rate": 7.989949748743719e-05, + "loss": 1.1311, + "step": 41 + }, + { + "epoch": 0.14775725593667546, + "grad_norm": 0.5583871006965637, + "learning_rate": 7.939698492462313e-05, + "loss": 1.1342, + "step": 42 + }, + { + "epoch": 0.15127528583992964, + "grad_norm": 0.650381326675415, + "learning_rate": 7.889447236180904e-05, + "loss": 1.2097, + "step": 43 + }, + { + "epoch": 0.1547933157431838, + "grad_norm": 0.5556297898292542, + "learning_rate": 7.839195979899498e-05, + "loss": 1.1223, + "step": 44 + }, + { + "epoch": 0.158311345646438, + "grad_norm": 0.5799769163131714, + "learning_rate": 7.788944723618091e-05, + "loss": 1.1326, + "step": 45 + }, + { + "epoch": 0.16182937554969218, + "grad_norm": 0.6063705682754517, + "learning_rate": 7.738693467336684e-05, + "loss": 1.0996, + "step": 46 + }, + { + "epoch": 0.16534740545294635, + "grad_norm": 0.6463246941566467, + "learning_rate": 7.688442211055277e-05, + "loss": 1.0475, + "step": 47 + }, + { + "epoch": 0.16886543535620052, + "grad_norm": 0.6706294417381287, + "learning_rate": 7.638190954773869e-05, + "loss": 1.1118, + "step": 48 + }, + { + "epoch": 0.1723834652594547, + "grad_norm": 0.5560976266860962, + "learning_rate": 7.587939698492463e-05, + "loss": 1.0719, + "step": 49 + }, + { + "epoch": 0.1759014951627089, + "grad_norm": 0.6131833791732788, + "learning_rate": 7.537688442211056e-05, + "loss": 1.1433, + "step": 50 + }, + { + "epoch": 0.17941952506596306, + "grad_norm": 0.7590206861495972, + "learning_rate": 7.487437185929649e-05, + "loss": 1.1409, + "step": 51 + }, + { + "epoch": 0.18293755496921724, + "grad_norm": 0.5790630578994751, + "learning_rate": 7.437185929648241e-05, + "loss": 1.0945, + "step": 52 + }, + { + "epoch": 0.1864555848724714, + "grad_norm": 0.604043185710907, + "learning_rate": 7.386934673366834e-05, + "loss": 1.1306, + "step": 53 + }, + { + "epoch": 0.18997361477572558, + "grad_norm": 0.7015763521194458, + "learning_rate": 7.336683417085427e-05, + "loss": 1.1653, + "step": 54 + }, + { + "epoch": 0.19349164467897978, + "grad_norm": 0.6483339667320251, + "learning_rate": 7.28643216080402e-05, + "loss": 1.1857, + "step": 55 + }, + { + "epoch": 0.19700967458223395, + "grad_norm": 0.6077854037284851, + "learning_rate": 7.236180904522614e-05, + "loss": 1.0471, + "step": 56 + }, + { + "epoch": 0.20052770448548812, + "grad_norm": 0.5716294646263123, + "learning_rate": 7.185929648241206e-05, + "loss": 0.9573, + "step": 57 + }, + { + "epoch": 0.2040457343887423, + "grad_norm": 0.6317845582962036, + "learning_rate": 7.135678391959799e-05, + "loss": 1.0947, + "step": 58 + }, + { + "epoch": 0.2075637642919965, + "grad_norm": 0.6517119407653809, + "learning_rate": 7.085427135678392e-05, + "loss": 1.1891, + "step": 59 + }, + { + "epoch": 0.21108179419525067, + "grad_norm": 0.5757074952125549, + "learning_rate": 7.035175879396985e-05, + "loss": 1.1053, + "step": 60 + }, + { + "epoch": 0.21459982409850484, + "grad_norm": 0.7416216731071472, + "learning_rate": 6.984924623115579e-05, + "loss": 1.0992, + "step": 61 + }, + { + "epoch": 0.218117854001759, + "grad_norm": 0.6461533904075623, + "learning_rate": 6.93467336683417e-05, + "loss": 1.1575, + "step": 62 + }, + { + "epoch": 0.22163588390501318, + "grad_norm": 0.6048409342765808, + "learning_rate": 6.884422110552764e-05, + "loss": 1.1324, + "step": 63 + }, + { + "epoch": 0.22515391380826738, + "grad_norm": 0.5819121599197388, + "learning_rate": 6.834170854271357e-05, + "loss": 1.0737, + "step": 64 + }, + { + "epoch": 0.22867194371152155, + "grad_norm": 0.5394372344017029, + "learning_rate": 6.78391959798995e-05, + "loss": 1.0811, + "step": 65 + }, + { + "epoch": 0.23218997361477572, + "grad_norm": 0.5430871844291687, + "learning_rate": 6.733668341708544e-05, + "loss": 1.067, + "step": 66 + }, + { + "epoch": 0.2357080035180299, + "grad_norm": 0.6227731704711914, + "learning_rate": 6.683417085427135e-05, + "loss": 1.1439, + "step": 67 + }, + { + "epoch": 0.23922603342128407, + "grad_norm": 0.6189538836479187, + "learning_rate": 6.633165829145729e-05, + "loss": 1.1436, + "step": 68 + }, + { + "epoch": 0.24274406332453827, + "grad_norm": 0.5866702198982239, + "learning_rate": 6.582914572864322e-05, + "loss": 1.0853, + "step": 69 + }, + { + "epoch": 0.24626209322779244, + "grad_norm": 0.641211748123169, + "learning_rate": 6.532663316582915e-05, + "loss": 1.1915, + "step": 70 + }, + { + "epoch": 0.2497801231310466, + "grad_norm": 0.643389880657196, + "learning_rate": 6.482412060301508e-05, + "loss": 1.1001, + "step": 71 + }, + { + "epoch": 0.2532981530343008, + "grad_norm": 0.68277907371521, + "learning_rate": 6.4321608040201e-05, + "loss": 1.1372, + "step": 72 + }, + { + "epoch": 0.256816182937555, + "grad_norm": 0.5645820498466492, + "learning_rate": 6.381909547738694e-05, + "loss": 1.0957, + "step": 73 + }, + { + "epoch": 0.26033421284080915, + "grad_norm": 0.6058771014213562, + "learning_rate": 6.331658291457287e-05, + "loss": 1.0999, + "step": 74 + }, + { + "epoch": 0.2638522427440633, + "grad_norm": 0.5692002773284912, + "learning_rate": 6.28140703517588e-05, + "loss": 1.1026, + "step": 75 + }, + { + "epoch": 0.2673702726473175, + "grad_norm": 0.6211134791374207, + "learning_rate": 6.231155778894473e-05, + "loss": 1.0817, + "step": 76 + }, + { + "epoch": 0.27088830255057167, + "grad_norm": 0.7277857661247253, + "learning_rate": 6.180904522613065e-05, + "loss": 1.1175, + "step": 77 + }, + { + "epoch": 0.27440633245382584, + "grad_norm": 0.5896360278129578, + "learning_rate": 6.130653266331658e-05, + "loss": 1.0779, + "step": 78 + }, + { + "epoch": 0.27792436235708, + "grad_norm": 0.6623082756996155, + "learning_rate": 6.080402010050251e-05, + "loss": 1.1133, + "step": 79 + }, + { + "epoch": 0.28144239226033424, + "grad_norm": 0.7041612267494202, + "learning_rate": 6.030150753768844e-05, + "loss": 1.0468, + "step": 80 + }, + { + "epoch": 0.2849604221635884, + "grad_norm": 0.6113973259925842, + "learning_rate": 5.979899497487438e-05, + "loss": 1.1638, + "step": 81 + }, + { + "epoch": 0.2884784520668426, + "grad_norm": 0.5631269216537476, + "learning_rate": 5.929648241206031e-05, + "loss": 1.1362, + "step": 82 + }, + { + "epoch": 0.29199648197009676, + "grad_norm": 0.6552721261978149, + "learning_rate": 5.879396984924623e-05, + "loss": 1.0311, + "step": 83 + }, + { + "epoch": 0.2955145118733509, + "grad_norm": 0.6466890573501587, + "learning_rate": 5.829145728643216e-05, + "loss": 1.121, + "step": 84 + }, + { + "epoch": 0.2990325417766051, + "grad_norm": 0.5726430416107178, + "learning_rate": 5.778894472361809e-05, + "loss": 1.0496, + "step": 85 + }, + { + "epoch": 0.30255057167985927, + "grad_norm": 0.6875731348991394, + "learning_rate": 5.728643216080403e-05, + "loss": 1.0434, + "step": 86 + }, + { + "epoch": 0.30606860158311344, + "grad_norm": 0.6598615646362305, + "learning_rate": 5.6783919597989955e-05, + "loss": 1.0345, + "step": 87 + }, + { + "epoch": 0.3095866314863676, + "grad_norm": 0.5496817231178284, + "learning_rate": 5.628140703517588e-05, + "loss": 1.101, + "step": 88 + }, + { + "epoch": 0.3131046613896218, + "grad_norm": 0.5997037887573242, + "learning_rate": 5.577889447236181e-05, + "loss": 1.0054, + "step": 89 + }, + { + "epoch": 0.316622691292876, + "grad_norm": 0.6353289484977722, + "learning_rate": 5.527638190954774e-05, + "loss": 1.0978, + "step": 90 + }, + { + "epoch": 0.3201407211961302, + "grad_norm": 0.64150071144104, + "learning_rate": 5.477386934673368e-05, + "loss": 1.0793, + "step": 91 + }, + { + "epoch": 0.32365875109938436, + "grad_norm": 0.6099105477333069, + "learning_rate": 5.4271356783919604e-05, + "loss": 1.104, + "step": 92 + }, + { + "epoch": 0.32717678100263853, + "grad_norm": 0.6526631116867065, + "learning_rate": 5.376884422110553e-05, + "loss": 1.1231, + "step": 93 + }, + { + "epoch": 0.3306948109058927, + "grad_norm": 0.6510050892829895, + "learning_rate": 5.3266331658291455e-05, + "loss": 1.0941, + "step": 94 + }, + { + "epoch": 0.33421284080914687, + "grad_norm": 0.7586386203765869, + "learning_rate": 5.276381909547739e-05, + "loss": 1.1206, + "step": 95 + }, + { + "epoch": 0.33773087071240104, + "grad_norm": 0.7145034074783325, + "learning_rate": 5.226130653266332e-05, + "loss": 1.1884, + "step": 96 + }, + { + "epoch": 0.3412489006156552, + "grad_norm": 0.6233460307121277, + "learning_rate": 5.175879396984925e-05, + "loss": 1.0947, + "step": 97 + }, + { + "epoch": 0.3447669305189094, + "grad_norm": 0.6299147009849548, + "learning_rate": 5.125628140703518e-05, + "loss": 1.1269, + "step": 98 + }, + { + "epoch": 0.3482849604221636, + "grad_norm": 0.67592453956604, + "learning_rate": 5.0753768844221104e-05, + "loss": 1.1006, + "step": 99 + }, + { + "epoch": 0.3518029903254178, + "grad_norm": 0.670915961265564, + "learning_rate": 5.0251256281407036e-05, + "loss": 1.0494, + "step": 100 + }, + { + "epoch": 0.35532102022867196, + "grad_norm": 0.6665855050086975, + "learning_rate": 4.974874371859297e-05, + "loss": 1.0161, + "step": 101 + }, + { + "epoch": 0.35883905013192613, + "grad_norm": 0.6221528649330139, + "learning_rate": 4.92462311557789e-05, + "loss": 1.0839, + "step": 102 + }, + { + "epoch": 0.3623570800351803, + "grad_norm": 0.6152099370956421, + "learning_rate": 4.874371859296483e-05, + "loss": 1.0866, + "step": 103 + }, + { + "epoch": 0.3658751099384345, + "grad_norm": 0.629520833492279, + "learning_rate": 4.824120603015075e-05, + "loss": 1.1221, + "step": 104 + }, + { + "epoch": 0.36939313984168864, + "grad_norm": 0.6432055234909058, + "learning_rate": 4.7738693467336685e-05, + "loss": 1.0278, + "step": 105 + }, + { + "epoch": 0.3729111697449428, + "grad_norm": 0.6751112937927246, + "learning_rate": 4.723618090452262e-05, + "loss": 1.1385, + "step": 106 + }, + { + "epoch": 0.376429199648197, + "grad_norm": 0.6311681270599365, + "learning_rate": 4.673366834170855e-05, + "loss": 1.1093, + "step": 107 + }, + { + "epoch": 0.37994722955145116, + "grad_norm": 0.6406072378158569, + "learning_rate": 4.6231155778894475e-05, + "loss": 1.1946, + "step": 108 + }, + { + "epoch": 0.3834652594547054, + "grad_norm": 0.7560979723930359, + "learning_rate": 4.57286432160804e-05, + "loss": 1.1549, + "step": 109 + }, + { + "epoch": 0.38698328935795956, + "grad_norm": 0.631024956703186, + "learning_rate": 4.522613065326633e-05, + "loss": 1.0503, + "step": 110 + }, + { + "epoch": 0.39050131926121373, + "grad_norm": 0.7526475787162781, + "learning_rate": 4.4723618090452266e-05, + "loss": 1.2114, + "step": 111 + }, + { + "epoch": 0.3940193491644679, + "grad_norm": 0.5904809832572937, + "learning_rate": 4.42211055276382e-05, + "loss": 1.043, + "step": 112 + }, + { + "epoch": 0.3975373790677221, + "grad_norm": 0.6790784597396851, + "learning_rate": 4.3718592964824124e-05, + "loss": 1.0366, + "step": 113 + }, + { + "epoch": 0.40105540897097625, + "grad_norm": 0.7012593746185303, + "learning_rate": 4.321608040201005e-05, + "loss": 1.0719, + "step": 114 + }, + { + "epoch": 0.4045734388742304, + "grad_norm": 0.589539110660553, + "learning_rate": 4.271356783919598e-05, + "loss": 1.0543, + "step": 115 + }, + { + "epoch": 0.4080914687774846, + "grad_norm": 0.6790391802787781, + "learning_rate": 4.2211055276381914e-05, + "loss": 1.1211, + "step": 116 + }, + { + "epoch": 0.41160949868073876, + "grad_norm": 0.6352446675300598, + "learning_rate": 4.170854271356784e-05, + "loss": 1.0284, + "step": 117 + }, + { + "epoch": 0.415127528583993, + "grad_norm": 0.5925474166870117, + "learning_rate": 4.120603015075377e-05, + "loss": 1.0015, + "step": 118 + }, + { + "epoch": 0.41864555848724716, + "grad_norm": 0.6509227752685547, + "learning_rate": 4.07035175879397e-05, + "loss": 1.0755, + "step": 119 + }, + { + "epoch": 0.42216358839050133, + "grad_norm": 0.6765084266662598, + "learning_rate": 4.020100502512563e-05, + "loss": 1.0958, + "step": 120 + }, + { + "epoch": 0.4256816182937555, + "grad_norm": 0.6184081435203552, + "learning_rate": 3.969849246231156e-05, + "loss": 1.0848, + "step": 121 + }, + { + "epoch": 0.4291996481970097, + "grad_norm": 0.622106671333313, + "learning_rate": 3.919597989949749e-05, + "loss": 1.1392, + "step": 122 + }, + { + "epoch": 0.43271767810026385, + "grad_norm": 0.6595653891563416, + "learning_rate": 3.869346733668342e-05, + "loss": 1.02, + "step": 123 + }, + { + "epoch": 0.436235708003518, + "grad_norm": 0.648871660232544, + "learning_rate": 3.8190954773869346e-05, + "loss": 1.0971, + "step": 124 + }, + { + "epoch": 0.4397537379067722, + "grad_norm": 0.6348839402198792, + "learning_rate": 3.768844221105528e-05, + "loss": 1.0369, + "step": 125 + }, + { + "epoch": 0.44327176781002636, + "grad_norm": 0.6669360399246216, + "learning_rate": 3.7185929648241204e-05, + "loss": 1.0772, + "step": 126 + }, + { + "epoch": 0.4467897977132806, + "grad_norm": 0.7374732494354248, + "learning_rate": 3.668341708542714e-05, + "loss": 1.0589, + "step": 127 + }, + { + "epoch": 0.45030782761653476, + "grad_norm": 0.7171958088874817, + "learning_rate": 3.618090452261307e-05, + "loss": 1.0726, + "step": 128 + }, + { + "epoch": 0.45382585751978893, + "grad_norm": 0.6351063847541809, + "learning_rate": 3.5678391959798995e-05, + "loss": 1.0492, + "step": 129 + }, + { + "epoch": 0.4573438874230431, + "grad_norm": 0.6534695625305176, + "learning_rate": 3.517587939698493e-05, + "loss": 1.0588, + "step": 130 + }, + { + "epoch": 0.4608619173262973, + "grad_norm": 0.6171225309371948, + "learning_rate": 3.467336683417085e-05, + "loss": 1.0203, + "step": 131 + }, + { + "epoch": 0.46437994722955145, + "grad_norm": 0.6640408635139465, + "learning_rate": 3.4170854271356785e-05, + "loss": 1.0269, + "step": 132 + }, + { + "epoch": 0.4678979771328056, + "grad_norm": 0.6307463645935059, + "learning_rate": 3.366834170854272e-05, + "loss": 1.0888, + "step": 133 + }, + { + "epoch": 0.4714160070360598, + "grad_norm": 0.8293411135673523, + "learning_rate": 3.3165829145728643e-05, + "loss": 1.0647, + "step": 134 + }, + { + "epoch": 0.47493403693931396, + "grad_norm": 0.6019444465637207, + "learning_rate": 3.2663316582914576e-05, + "loss": 0.9662, + "step": 135 + }, + { + "epoch": 0.47845206684256814, + "grad_norm": 0.6690985560417175, + "learning_rate": 3.21608040201005e-05, + "loss": 1.1377, + "step": 136 + }, + { + "epoch": 0.48197009674582236, + "grad_norm": 0.7211702466011047, + "learning_rate": 3.1658291457286434e-05, + "loss": 1.1561, + "step": 137 + }, + { + "epoch": 0.48548812664907653, + "grad_norm": 0.677986204624176, + "learning_rate": 3.1155778894472366e-05, + "loss": 1.0744, + "step": 138 + }, + { + "epoch": 0.4890061565523307, + "grad_norm": 0.6566460728645325, + "learning_rate": 3.065326633165829e-05, + "loss": 1.1041, + "step": 139 + }, + { + "epoch": 0.4925241864555849, + "grad_norm": 0.6608115434646606, + "learning_rate": 3.015075376884422e-05, + "loss": 1.0962, + "step": 140 + }, + { + "epoch": 0.49604221635883905, + "grad_norm": 0.7585815787315369, + "learning_rate": 2.9648241206030153e-05, + "loss": 1.1615, + "step": 141 + }, + { + "epoch": 0.4995602462620932, + "grad_norm": 0.6302761435508728, + "learning_rate": 2.914572864321608e-05, + "loss": 1.0501, + "step": 142 + }, + { + "epoch": 0.5030782761653474, + "grad_norm": 0.7121193408966064, + "learning_rate": 2.8643216080402015e-05, + "loss": 1.092, + "step": 143 + }, + { + "epoch": 0.5065963060686016, + "grad_norm": 0.6791670918464661, + "learning_rate": 2.814070351758794e-05, + "loss": 1.0259, + "step": 144 + }, + { + "epoch": 0.5101143359718557, + "grad_norm": 0.6103968024253845, + "learning_rate": 2.763819095477387e-05, + "loss": 1.0982, + "step": 145 + }, + { + "epoch": 0.51363236587511, + "grad_norm": 0.6536839604377747, + "learning_rate": 2.7135678391959802e-05, + "loss": 1.0601, + "step": 146 + }, + { + "epoch": 0.5171503957783641, + "grad_norm": 0.7174214124679565, + "learning_rate": 2.6633165829145728e-05, + "loss": 1.1181, + "step": 147 + }, + { + "epoch": 0.5206684256816183, + "grad_norm": 0.6724268198013306, + "learning_rate": 2.613065326633166e-05, + "loss": 1.1056, + "step": 148 + }, + { + "epoch": 0.5241864555848724, + "grad_norm": 0.5918770432472229, + "learning_rate": 2.562814070351759e-05, + "loss": 1.0136, + "step": 149 + }, + { + "epoch": 0.5277044854881267, + "grad_norm": 0.6460606455802917, + "learning_rate": 2.5125628140703518e-05, + "loss": 1.0797, + "step": 150 + }, + { + "epoch": 0.5312225153913809, + "grad_norm": 0.7316471934318542, + "learning_rate": 2.462311557788945e-05, + "loss": 1.0201, + "step": 151 + }, + { + "epoch": 0.534740545294635, + "grad_norm": 0.6556833982467651, + "learning_rate": 2.4120603015075376e-05, + "loss": 1.1061, + "step": 152 + }, + { + "epoch": 0.5382585751978892, + "grad_norm": 0.6981020569801331, + "learning_rate": 2.361809045226131e-05, + "loss": 1.0391, + "step": 153 + }, + { + "epoch": 0.5417766051011433, + "grad_norm": 0.7724502682685852, + "learning_rate": 2.3115577889447238e-05, + "loss": 1.0317, + "step": 154 + }, + { + "epoch": 0.5452946350043976, + "grad_norm": 0.6132215857505798, + "learning_rate": 2.2613065326633167e-05, + "loss": 1.0934, + "step": 155 + }, + { + "epoch": 0.5488126649076517, + "grad_norm": 0.64773029088974, + "learning_rate": 2.21105527638191e-05, + "loss": 1.0384, + "step": 156 + }, + { + "epoch": 0.5523306948109059, + "grad_norm": 0.6717387437820435, + "learning_rate": 2.1608040201005025e-05, + "loss": 1.0051, + "step": 157 + }, + { + "epoch": 0.55584872471416, + "grad_norm": 0.6664286851882935, + "learning_rate": 2.1105527638190957e-05, + "loss": 1.0796, + "step": 158 + }, + { + "epoch": 0.5593667546174143, + "grad_norm": 0.6668258905410767, + "learning_rate": 2.0603015075376886e-05, + "loss": 1.0438, + "step": 159 + }, + { + "epoch": 0.5628847845206685, + "grad_norm": 0.7474943399429321, + "learning_rate": 2.0100502512562815e-05, + "loss": 1.038, + "step": 160 + }, + { + "epoch": 0.5664028144239226, + "grad_norm": 0.6635732054710388, + "learning_rate": 1.9597989949748744e-05, + "loss": 0.9833, + "step": 161 + }, + { + "epoch": 0.5699208443271768, + "grad_norm": 0.7757295966148376, + "learning_rate": 1.9095477386934673e-05, + "loss": 1.137, + "step": 162 + }, + { + "epoch": 0.5734388742304309, + "grad_norm": 0.6844589710235596, + "learning_rate": 1.8592964824120602e-05, + "loss": 1.0902, + "step": 163 + }, + { + "epoch": 0.5769569041336852, + "grad_norm": 0.6839131116867065, + "learning_rate": 1.8090452261306535e-05, + "loss": 1.0804, + "step": 164 + }, + { + "epoch": 0.5804749340369393, + "grad_norm": 0.6727124452590942, + "learning_rate": 1.7587939698492464e-05, + "loss": 1.0573, + "step": 165 + }, + { + "epoch": 0.5839929639401935, + "grad_norm": 0.7024590373039246, + "learning_rate": 1.7085427135678393e-05, + "loss": 1.0856, + "step": 166 + }, + { + "epoch": 0.5875109938434476, + "grad_norm": 0.7294171452522278, + "learning_rate": 1.6582914572864322e-05, + "loss": 1.0777, + "step": 167 + }, + { + "epoch": 0.5910290237467019, + "grad_norm": 0.8080787062644958, + "learning_rate": 1.608040201005025e-05, + "loss": 1.1296, + "step": 168 + }, + { + "epoch": 0.594547053649956, + "grad_norm": 0.7388249039649963, + "learning_rate": 1.5577889447236183e-05, + "loss": 1.0979, + "step": 169 + }, + { + "epoch": 0.5980650835532102, + "grad_norm": 0.7029967904090881, + "learning_rate": 1.507537688442211e-05, + "loss": 1.1244, + "step": 170 + }, + { + "epoch": 0.6015831134564644, + "grad_norm": 0.6801552772521973, + "learning_rate": 1.457286432160804e-05, + "loss": 1.0342, + "step": 171 + }, + { + "epoch": 0.6051011433597185, + "grad_norm": 0.6317315101623535, + "learning_rate": 1.407035175879397e-05, + "loss": 1.0702, + "step": 172 + }, + { + "epoch": 0.6086191732629728, + "grad_norm": 0.8335021734237671, + "learning_rate": 1.3567839195979901e-05, + "loss": 1.1075, + "step": 173 + }, + { + "epoch": 0.6121372031662269, + "grad_norm": 0.619908332824707, + "learning_rate": 1.306532663316583e-05, + "loss": 1.0069, + "step": 174 + }, + { + "epoch": 0.6156552330694811, + "grad_norm": 0.7482386827468872, + "learning_rate": 1.2562814070351759e-05, + "loss": 1.0786, + "step": 175 + }, + { + "epoch": 0.6191732629727352, + "grad_norm": 0.7039029598236084, + "learning_rate": 1.2060301507537688e-05, + "loss": 1.1005, + "step": 176 + }, + { + "epoch": 0.6226912928759895, + "grad_norm": 0.6952759623527527, + "learning_rate": 1.1557788944723619e-05, + "loss": 1.1621, + "step": 177 + }, + { + "epoch": 0.6262093227792436, + "grad_norm": 0.7525346875190735, + "learning_rate": 1.105527638190955e-05, + "loss": 1.1016, + "step": 178 + }, + { + "epoch": 0.6297273526824978, + "grad_norm": 0.6753787398338318, + "learning_rate": 1.0552763819095479e-05, + "loss": 1.0479, + "step": 179 + }, + { + "epoch": 0.633245382585752, + "grad_norm": 0.7498345375061035, + "learning_rate": 1.0050251256281408e-05, + "loss": 1.0728, + "step": 180 + }, + { + "epoch": 0.6367634124890061, + "grad_norm": 0.6624960899353027, + "learning_rate": 9.547738693467337e-06, + "loss": 1.1561, + "step": 181 + }, + { + "epoch": 0.6402814423922604, + "grad_norm": 0.6787440180778503, + "learning_rate": 9.045226130653267e-06, + "loss": 1.0025, + "step": 182 + }, + { + "epoch": 0.6437994722955145, + "grad_norm": 0.7153716683387756, + "learning_rate": 8.542713567839196e-06, + "loss": 1.0914, + "step": 183 + }, + { + "epoch": 0.6473175021987687, + "grad_norm": 0.6593692302703857, + "learning_rate": 8.040201005025125e-06, + "loss": 1.0307, + "step": 184 + }, + { + "epoch": 0.6508355321020228, + "grad_norm": 0.6757813692092896, + "learning_rate": 7.537688442211055e-06, + "loss": 1.088, + "step": 185 + }, + { + "epoch": 0.6543535620052771, + "grad_norm": 0.6268891096115112, + "learning_rate": 7.035175879396985e-06, + "loss": 1.0596, + "step": 186 + }, + { + "epoch": 0.6578715919085312, + "grad_norm": 0.6860636472702026, + "learning_rate": 6.532663316582915e-06, + "loss": 1.0891, + "step": 187 + }, + { + "epoch": 0.6613896218117854, + "grad_norm": 0.6836390495300293, + "learning_rate": 6.030150753768844e-06, + "loss": 1.1322, + "step": 188 + }, + { + "epoch": 0.6649076517150396, + "grad_norm": 0.6405102014541626, + "learning_rate": 5.527638190954775e-06, + "loss": 1.068, + "step": 189 + }, + { + "epoch": 0.6684256816182937, + "grad_norm": 0.7387605309486389, + "learning_rate": 5.025125628140704e-06, + "loss": 1.0569, + "step": 190 + }, + { + "epoch": 0.671943711521548, + "grad_norm": 0.7345817685127258, + "learning_rate": 4.522613065326634e-06, + "loss": 1.1315, + "step": 191 + }, + { + "epoch": 0.6754617414248021, + "grad_norm": 0.7385038137435913, + "learning_rate": 4.020100502512563e-06, + "loss": 1.1027, + "step": 192 + }, + { + "epoch": 0.6789797713280563, + "grad_norm": 0.699575662612915, + "learning_rate": 3.5175879396984926e-06, + "loss": 1.0889, + "step": 193 + }, + { + "epoch": 0.6824978012313104, + "grad_norm": 0.7223588824272156, + "learning_rate": 3.015075376884422e-06, + "loss": 1.0535, + "step": 194 + }, + { + "epoch": 0.6860158311345647, + "grad_norm": 0.6895872950553894, + "learning_rate": 2.512562814070352e-06, + "loss": 1.1323, + "step": 195 + }, + { + "epoch": 0.6895338610378188, + "grad_norm": 0.6990038156509399, + "learning_rate": 2.0100502512562813e-06, + "loss": 1.0109, + "step": 196 + }, + { + "epoch": 0.693051890941073, + "grad_norm": 0.6835456490516663, + "learning_rate": 1.507537688442211e-06, + "loss": 1.0858, + "step": 197 + }, + { + "epoch": 0.6965699208443272, + "grad_norm": 0.7230079174041748, + "learning_rate": 1.0050251256281407e-06, + "loss": 1.0965, + "step": 198 + }, + { + "epoch": 0.7000879507475813, + "grad_norm": 0.6774978637695312, + "learning_rate": 5.025125628140703e-07, + "loss": 1.0649, + "step": 199 + }, + { + "epoch": 0.7036059806508356, + "grad_norm": 0.6156166195869446, + "learning_rate": 0.0, + "loss": 1.0368, + "step": 200 + } + ], + "logging_steps": 1, + "max_steps": 200, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.7240004226351104e+16, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/llama_Vietnamese_German/checkpoint-200/training_args.bin b/llama_Vietnamese_German/checkpoint-200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..5783987217157dc436eee1051a00ac385e83043f --- /dev/null +++ b/llama_Vietnamese_German/checkpoint-200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf5cf9bf9d0f34f67e7446154bc647af291346c242c9eeb21ce56b8e08849b48 +size 5624