diff --git a/checkpoint-1095/added_tokens.json b/checkpoint-1095/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..3f5132007c4fcf42b75b65c8b6aa49c7098bcdf4 --- /dev/null +++ b/checkpoint-1095/added_tokens.json @@ -0,0 +1,102 @@ +{ + "": 32099, + "": 32089, + "": 32088, + "": 32087, + "": 32086, + "": 32085, + "": 32084, + "": 32083, + "": 32082, + "": 32081, + "": 32080, + "": 32098, + "": 32079, + "": 32078, + "": 32077, + "": 32076, + "": 32075, + "": 32074, + "": 32073, + "": 32072, + "": 32071, + "": 32070, + "": 32097, + "": 32069, + "": 32068, + "": 32067, + "": 32066, + "": 32065, + "": 32064, + "": 32063, + "": 32062, + "": 32061, + "": 32060, + "": 32096, + "": 32059, + "": 32058, + "": 32057, + "": 32056, + "": 32055, + "": 32054, + "": 32053, + "": 32052, + "": 32051, + "": 32050, + "": 32095, + "": 32049, + "": 32048, + "": 32047, + "": 32046, + "": 32045, + "": 32044, + "": 32043, + "": 32042, + "": 32041, + "": 32040, + "": 32094, + "": 32039, + "": 32038, + "": 32037, + "": 32036, + "": 32035, + "": 32034, + "": 32033, + "": 32032, + "": 32031, + "": 32030, + "": 32093, + "": 32029, + "": 32028, + "": 32027, + "": 32026, + "": 32025, + "": 32024, + "": 32023, + "": 32022, + "": 32021, + "": 32020, + "": 32092, + "": 32019, + "": 32018, + "": 32017, + "": 32016, + "": 32015, + "": 32014, + "": 32013, + "": 32012, + "": 32011, + "": 32010, + "": 32091, + "": 32009, + "": 32008, + "": 32007, + "": 32006, + "": 32005, + "": 32004, + "": 32003, + "": 32002, + "": 32001, + "": 32000, + "": 32090 +} diff --git a/checkpoint-1095/config.json b/checkpoint-1095/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7adeb5014f60a4a25c0c42eb288f6284e773e6dd --- /dev/null +++ b/checkpoint-1095/config.json @@ -0,0 +1,33 @@ +{ + "_name_or_path": "teapotai/teapotllm", + "architectures": [ + "T5ForConditionalGeneration" + ], + "classifier_dropout": 0.0, + "d_ff": 2816, + "d_kv": 64, + "d_model": 1024, + "decoder_start_token_id": 0, + "dense_act_fn": "gelu_new", + "dropout_rate": 0.1, + "eos_token_id": 1, + "feed_forward_proj": "gated-gelu", + "initializer_factor": 1.0, + "is_encoder_decoder": true, + "is_gated_act": true, + "layer_norm_epsilon": 1e-06, + "model_type": "t5", + "n_positions": 512, + "num_decoder_layers": 24, + "num_heads": 16, + "num_layers": 24, + "output_past": true, + "pad_token_id": 0, + "relative_attention_max_distance": 128, + "relative_attention_num_buckets": 32, + "tie_word_embeddings": false, + "torch_dtype": "float32", + "transformers_version": "4.48.3", + "use_cache": true, + "vocab_size": 32128 +} diff --git a/checkpoint-1095/generation_config.json b/checkpoint-1095/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..058f73f97f63923f298d59a1d6b4f78f510c5146 --- /dev/null +++ b/checkpoint-1095/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "decoder_start_token_id": 0, + "eos_token_id": 1, + "pad_token_id": 0, + "transformers_version": "4.48.3" +} diff --git a/checkpoint-1095/model.safetensors b/checkpoint-1095/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..97d309cf8a5606e2dc6d3da452cd3b23e9e3c802 --- /dev/null +++ b/checkpoint-1095/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8d3684e7f24b7bc3774ff4dc275b3d9e0b33f20294b8dd89cc7a435e7b4ed0b +size 3132668808 diff --git a/checkpoint-1095/optimizer.pt b/checkpoint-1095/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..3986bad5b6538e14db22bdb6dd933efd7c510e29 --- /dev/null +++ b/checkpoint-1095/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1454e183fc256d2c8b7a5bb4c504f3b1b2b5653369905cf1fe062dc03b79f67d +size 6265677800 diff --git a/checkpoint-1095/rng_state.pth b/checkpoint-1095/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..bb402f317c8d66242d5f7d5022a7c06e6e4036ed --- /dev/null +++ b/checkpoint-1095/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c5910ff7863539dd4442d70828b8313ad83f1d382daae117c3526f151cda859 +size 14244 diff --git a/checkpoint-1095/scheduler.pt b/checkpoint-1095/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a44ebb5717d566596adf852a53374382bebaa9ed --- /dev/null +++ b/checkpoint-1095/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8e4f11ac5ade6c9447d498652862b151331a28a44fd4b9fe70a4ac281deb197 +size 1064 diff --git a/checkpoint-1095/special_tokens_map.json b/checkpoint-1095/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..17ade346a1042cbe0c1436f5bedcbd85c099d582 --- /dev/null +++ b/checkpoint-1095/special_tokens_map.json @@ -0,0 +1,125 @@ +{ + "additional_special_tokens": [ + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + ], + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-1095/spiece.model b/checkpoint-1095/spiece.model new file mode 100644 index 0000000000000000000000000000000000000000..317a5ccbde45300f5d1d970d4d449af2108b147e --- /dev/null +++ b/checkpoint-1095/spiece.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86 +size 791656 diff --git a/checkpoint-1095/tokenizer_config.json b/checkpoint-1095/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..7d9f418ab1e49d1eaa3832e9970c9c503f565484 --- /dev/null +++ b/checkpoint-1095/tokenizer_config.json @@ -0,0 +1,941 @@ +{ + "add_prefix_space": true, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32000": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32001": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32002": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32003": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32004": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32005": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32006": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32007": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32008": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32009": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32010": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32011": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32012": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32013": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32014": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32015": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32016": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32017": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32018": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32019": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32020": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32021": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32022": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32023": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32024": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32025": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32026": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32027": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32028": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32029": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32030": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32031": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32032": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32033": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32034": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32035": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32036": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32037": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32038": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32039": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32040": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32041": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32042": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32043": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32044": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32045": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32046": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32047": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32048": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32049": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32050": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32051": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32052": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32053": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32054": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32055": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32056": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32057": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32058": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32059": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32060": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32061": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32062": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32063": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32064": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32065": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32066": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32067": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32068": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32069": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32070": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32071": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32072": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32073": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32074": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32075": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32076": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32077": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32078": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32079": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32080": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32081": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32082": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32083": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32084": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32085": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32086": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32087": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32088": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32089": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32090": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32091": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32092": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32093": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32094": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32095": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32096": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32097": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32098": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32099": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + ], + "clean_up_tokenization_spaces": false, + "eos_token": "", + "extra_ids": 100, + "extra_special_tokens": {}, + "legacy": true, + "model_max_length": 512, + "pad_token": "", + "sp_model_kwargs": {}, + "tokenizer_class": "T5Tokenizer", + "unk_token": "" +} diff --git a/checkpoint-1095/trainer_state.json b/checkpoint-1095/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..5147f65be5e8b21b596f29ff08ae54f75d02f497 --- /dev/null +++ b/checkpoint-1095/trainer_state.json @@ -0,0 +1,228 @@ +{ + "best_metric": 0.042461033910512924, + "best_model_checkpoint": "./teapotllm/checkpoint-876", + "epoch": 5.0, + "eval_steps": 500, + "global_step": 1095, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "em_boolean": 0.5294117647058824, + "em_chat": 0.03783783783783784, + "em_extraction": 0.5333333333333333, + "em_qa": 0.5333333333333333, + "em_summarization": 0.0, + "em_unanswerable": 0.0, + "epoch": 1.0, + "mean_em": 0.12075471698113208, + "mean_similarity": 0.6624939311014594, + "mean_word_count_diff": 11.958490566037735, + "similarity_boolean": 0.649950959993636, + "similarity_chat": 0.6068178282194846, + "similarity_extraction": 0.8274670541286469, + "similarity_qa": 0.8895131280024846, + "similarity_summarization": 0.7897715005609725, + "similarity_unanswerable": 0.8186558306217193, + "word_count_diff_boolean": 2.764705882352941, + "word_count_diff_chat": 11.275675675675675, + "word_count_diff_extraction": 1.8666666666666667, + "word_count_diff_qa": 2.7333333333333334, + "word_count_diff_summarization": 50.05555555555556, + "word_count_diff_unanswerable": 4.4 + }, + { + "epoch": 1.0, + "grad_norm": 0.29226627945899963, + "learning_rate": 1.314e-05, + "loss": 0.0548, + "step": 219 + }, + { + "epoch": 1.0, + "eval_loss": 0.04732182249426842, + "eval_runtime": 17.2425, + "eval_samples_per_second": 15.369, + "eval_steps_per_second": 1.972, + "step": 219 + }, + { + "em_boolean": 0.7058823529411765, + "em_chat": 0.05405405405405406, + "em_extraction": 0.4, + "em_qa": 0.4, + "em_summarization": 0.0, + "em_unanswerable": 0.0, + "epoch": 2.0, + "mean_em": 0.12830188679245283, + "mean_similarity": 0.6936857629426808, + "mean_word_count_diff": 11.475471698113207, + "similarity_boolean": 0.7671171505661571, + "similarity_chat": 0.6481531772158435, + "similarity_extraction": 0.7221033732096355, + "similarity_qa": 0.8645438591639201, + "similarity_summarization": 0.824706514676412, + "similarity_unanswerable": 0.8155314723650614, + "word_count_diff_boolean": 2.7058823529411766, + "word_count_diff_chat": 10.556756756756757, + "word_count_diff_extraction": 2.933333333333333, + "word_count_diff_qa": 3.2, + "word_count_diff_summarization": 50.166666666666664, + "word_count_diff_unanswerable": 3.1333333333333333 + }, + { + "epoch": 2.0, + "grad_norm": 0.30153563618659973, + "learning_rate": 2.628e-05, + "loss": 0.0445, + "step": 438 + }, + { + "epoch": 2.0, + "eval_loss": 0.04412226751446724, + "eval_runtime": 17.2255, + "eval_samples_per_second": 15.384, + "eval_steps_per_second": 1.974, + "step": 438 + }, + { + "em_boolean": 0.5882352941176471, + "em_chat": 0.05945945945945946, + "em_extraction": 0.4666666666666667, + "em_qa": 0.5333333333333333, + "em_summarization": 0.0, + "em_unanswerable": 0.0, + "epoch": 3.0, + "mean_em": 0.13584905660377358, + "mean_similarity": 0.7036299928109039, + "mean_word_count_diff": 11.230188679245282, + "similarity_boolean": 0.702288385699777, + "similarity_chat": 0.6661249467549292, + "similarity_extraction": 0.7363929619391759, + "similarity_qa": 0.9095388889312744, + "similarity_summarization": 0.7749250100718604, + "similarity_unanswerable": 0.8434868295987447, + "word_count_diff_boolean": 1.8823529411764706, + "word_count_diff_chat": 10.41081081081081, + "word_count_diff_extraction": 1.4, + "word_count_diff_qa": 2.8666666666666667, + "word_count_diff_summarization": 50.111111111111114, + "word_count_diff_unanswerable": 3.466666666666667 + }, + { + "epoch": 3.0, + "grad_norm": 0.4190770983695984, + "learning_rate": 2.9365689308796065e-05, + "loss": 0.0366, + "step": 657 + }, + { + "epoch": 3.0, + "eval_loss": 0.04255302622914314, + "eval_runtime": 17.2484, + "eval_samples_per_second": 15.364, + "eval_steps_per_second": 1.971, + "step": 657 + }, + { + "em_boolean": 0.8823529411764706, + "em_chat": 0.05405405405405406, + "em_extraction": 0.6666666666666666, + "em_qa": 0.3333333333333333, + "em_summarization": 0.0, + "em_unanswerable": 0.0, + "epoch": 4.0, + "mean_em": 0.1509433962264151, + "mean_similarity": 0.7214625703522338, + "mean_word_count_diff": 11.275471698113208, + "similarity_boolean": 0.9250823608325685, + "similarity_chat": 0.6629158062407294, + "similarity_extraction": 0.9287973960240682, + "similarity_qa": 0.8280642042557399, + "similarity_summarization": 0.7930781609482236, + "similarity_unanswerable": 0.8128950635592143, + "word_count_diff_boolean": 0.5882352941176471, + "word_count_diff_chat": 10.556756756756757, + "word_count_diff_extraction": 0.4666666666666667, + "word_count_diff_qa": 3.2, + "word_count_diff_summarization": 50.27777777777778, + "word_count_diff_unanswerable": 4.333333333333333 + }, + { + "epoch": 4.0, + "grad_norm": 0.34906384348869324, + "learning_rate": 2.6482696742411827e-05, + "loss": 0.0301, + "step": 876 + }, + { + "epoch": 4.0, + "eval_loss": 0.042461033910512924, + "eval_runtime": 17.2294, + "eval_samples_per_second": 15.381, + "eval_steps_per_second": 1.973, + "step": 876 + }, + { + "em_boolean": 0.8823529411764706, + "em_chat": 0.07027027027027027, + "em_extraction": 0.4, + "em_qa": 0.6, + "em_summarization": 0.0, + "em_unanswerable": 0.0, + "epoch": 5.0, + "mean_em": 0.16226415094339622, + "mean_similarity": 0.7417481038367973, + "mean_word_count_diff": 10.89811320754717, + "similarity_boolean": 0.9421353953726151, + "similarity_chat": 0.6902955391922513, + "similarity_extraction": 0.7581887672344844, + "similarity_qa": 0.9694860418637593, + "similarity_summarization": 0.795685844288932, + "similarity_unanswerable": 0.840320247411728, + "word_count_diff_boolean": 0.0, + "word_count_diff_chat": 10.04864864864865, + "word_count_diff_extraction": 2.066666666666667, + "word_count_diff_qa": 2.3333333333333335, + "word_count_diff_summarization": 50.111111111111114, + "word_count_diff_unanswerable": 4.066666666666666 + }, + { + "epoch": 5.0, + "grad_norm": 0.2575681209564209, + "learning_rate": 2.17227572135781e-05, + "loss": 0.0252, + "step": 1095 + }, + { + "epoch": 5.0, + "eval_loss": 0.043407145887613297, + "eval_runtime": 17.2466, + "eval_samples_per_second": 15.365, + "eval_steps_per_second": 1.971, + "step": 1095 + } + ], + "logging_steps": 500, + "max_steps": 2190, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.006031210545152e+16, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1095/training_args.bin b/checkpoint-1095/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d977d8d4afc5bd92bf7fc4298b8866c8a6c8438c --- /dev/null +++ b/checkpoint-1095/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f96c10a0a2b43c05318a394de2f8a40b06b79bafe7c52911b0bd4f1a90733fe +size 5304 diff --git a/checkpoint-1314/added_tokens.json b/checkpoint-1314/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..3f5132007c4fcf42b75b65c8b6aa49c7098bcdf4 --- /dev/null +++ b/checkpoint-1314/added_tokens.json @@ -0,0 +1,102 @@ +{ + "": 32099, + "": 32089, + "": 32088, + "": 32087, + "": 32086, + "": 32085, + "": 32084, + "": 32083, + "": 32082, + "": 32081, + "": 32080, + "": 32098, + "": 32079, + "": 32078, + "": 32077, + "": 32076, + "": 32075, + "": 32074, + "": 32073, + "": 32072, + "": 32071, + "": 32070, + "": 32097, + "": 32069, + "": 32068, + "": 32067, + "": 32066, + "": 32065, + "": 32064, + "": 32063, + "": 32062, + "": 32061, + "": 32060, + "": 32096, + "": 32059, + "": 32058, + "": 32057, + "": 32056, + "": 32055, + "": 32054, + "": 32053, + "": 32052, + "": 32051, + "": 32050, + "": 32095, + "": 32049, + "": 32048, + "": 32047, + "": 32046, + "": 32045, + "": 32044, + "": 32043, + "": 32042, + "": 32041, + "": 32040, + "": 32094, + "": 32039, + "": 32038, + "": 32037, + "": 32036, + "": 32035, + "": 32034, + "": 32033, + "": 32032, + "": 32031, + "": 32030, + "": 32093, + "": 32029, + "": 32028, + "": 32027, + "": 32026, + "": 32025, + "": 32024, + "": 32023, + "": 32022, + "": 32021, + "": 32020, + "": 32092, + "": 32019, + "": 32018, + "": 32017, + "": 32016, + "": 32015, + "": 32014, + "": 32013, + "": 32012, + "": 32011, + "": 32010, + "": 32091, + "": 32009, + "": 32008, + "": 32007, + "": 32006, + "": 32005, + "": 32004, + "": 32003, + "": 32002, + "": 32001, + "": 32000, + "": 32090 +} diff --git a/checkpoint-1314/config.json b/checkpoint-1314/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7adeb5014f60a4a25c0c42eb288f6284e773e6dd --- /dev/null +++ b/checkpoint-1314/config.json @@ -0,0 +1,33 @@ +{ + "_name_or_path": "teapotai/teapotllm", + "architectures": [ + "T5ForConditionalGeneration" + ], + "classifier_dropout": 0.0, + "d_ff": 2816, + "d_kv": 64, + "d_model": 1024, + "decoder_start_token_id": 0, + "dense_act_fn": "gelu_new", + "dropout_rate": 0.1, + "eos_token_id": 1, + "feed_forward_proj": "gated-gelu", + "initializer_factor": 1.0, + "is_encoder_decoder": true, + "is_gated_act": true, + "layer_norm_epsilon": 1e-06, + "model_type": "t5", + "n_positions": 512, + "num_decoder_layers": 24, + "num_heads": 16, + "num_layers": 24, + "output_past": true, + "pad_token_id": 0, + "relative_attention_max_distance": 128, + "relative_attention_num_buckets": 32, + "tie_word_embeddings": false, + "torch_dtype": "float32", + "transformers_version": "4.48.3", + "use_cache": true, + "vocab_size": 32128 +} diff --git a/checkpoint-1314/generation_config.json b/checkpoint-1314/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..058f73f97f63923f298d59a1d6b4f78f510c5146 --- /dev/null +++ b/checkpoint-1314/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "decoder_start_token_id": 0, + "eos_token_id": 1, + "pad_token_id": 0, + "transformers_version": "4.48.3" +} diff --git a/checkpoint-1314/model.safetensors b/checkpoint-1314/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fe1a9fae98e6363a5eadc53a1ae34b0c51fa8b68 --- /dev/null +++ b/checkpoint-1314/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38009b92dacd3f6229b17739db5e839556e4e67e3bca862362c4e18639318d9b +size 3132668808 diff --git a/checkpoint-1314/optimizer.pt b/checkpoint-1314/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..1619e65e9ae560f92bdac566eaabb471e963e3fb --- /dev/null +++ b/checkpoint-1314/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8517873d1535c17a78e69e43e03ddf2cae4f39faf3b97bd449c0c75accc6c0c +size 6265677800 diff --git a/checkpoint-1314/rng_state.pth b/checkpoint-1314/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..49114bc4db024c0da6912b90f9558ea2df0230c5 --- /dev/null +++ b/checkpoint-1314/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5715ebcb2484c8a15faba0f6de4db7fa7241ab05f67a67bda19ebd44a6ebe951 +size 14244 diff --git a/checkpoint-1314/scheduler.pt b/checkpoint-1314/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..031833a067784ce16a528266e9e1969094d4f83d --- /dev/null +++ b/checkpoint-1314/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3409639ad3e4fec86ac9cf2499235e2e587c5caa40365fe03bcaeab47cce403f +size 1064 diff --git a/checkpoint-1314/special_tokens_map.json b/checkpoint-1314/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..17ade346a1042cbe0c1436f5bedcbd85c099d582 --- /dev/null +++ b/checkpoint-1314/special_tokens_map.json @@ -0,0 +1,125 @@ +{ + "additional_special_tokens": [ + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + ], + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-1314/spiece.model b/checkpoint-1314/spiece.model new file mode 100644 index 0000000000000000000000000000000000000000..317a5ccbde45300f5d1d970d4d449af2108b147e --- /dev/null +++ b/checkpoint-1314/spiece.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86 +size 791656 diff --git a/checkpoint-1314/tokenizer_config.json b/checkpoint-1314/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..7d9f418ab1e49d1eaa3832e9970c9c503f565484 --- /dev/null +++ b/checkpoint-1314/tokenizer_config.json @@ -0,0 +1,941 @@ +{ + "add_prefix_space": true, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32000": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32001": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32002": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32003": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32004": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32005": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32006": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32007": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32008": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32009": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32010": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32011": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32012": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32013": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32014": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32015": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32016": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32017": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32018": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32019": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32020": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32021": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32022": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32023": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32024": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32025": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32026": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32027": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32028": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32029": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32030": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32031": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32032": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32033": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32034": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32035": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32036": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32037": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32038": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32039": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32040": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32041": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32042": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32043": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32044": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32045": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32046": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32047": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32048": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32049": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32050": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32051": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32052": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32053": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32054": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32055": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32056": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32057": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32058": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32059": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32060": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32061": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32062": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32063": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32064": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32065": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32066": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32067": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32068": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32069": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32070": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32071": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32072": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32073": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32074": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32075": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32076": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32077": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32078": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32079": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32080": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32081": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32082": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32083": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32084": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32085": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32086": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32087": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32088": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32089": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32090": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32091": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32092": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32093": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32094": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32095": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32096": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32097": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32098": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32099": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + ], + "clean_up_tokenization_spaces": false, + "eos_token": "", + "extra_ids": 100, + "extra_special_tokens": {}, + "legacy": true, + "model_max_length": 512, + "pad_token": "", + "sp_model_kwargs": {}, + "tokenizer_class": "T5Tokenizer", + "unk_token": "" +} diff --git a/checkpoint-1314/trainer_state.json b/checkpoint-1314/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a9ae5382a61c85532074c298e73fc2b05e58a7a2 --- /dev/null +++ b/checkpoint-1314/trainer_state.json @@ -0,0 +1,267 @@ +{ + "best_metric": 0.042461033910512924, + "best_model_checkpoint": "./teapotllm/checkpoint-876", + "epoch": 6.0, + "eval_steps": 500, + "global_step": 1314, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "em_boolean": 0.5294117647058824, + "em_chat": 0.03783783783783784, + "em_extraction": 0.5333333333333333, + "em_qa": 0.5333333333333333, + "em_summarization": 0.0, + "em_unanswerable": 0.0, + "epoch": 1.0, + "mean_em": 0.12075471698113208, + "mean_similarity": 0.6624939311014594, + "mean_word_count_diff": 11.958490566037735, + "similarity_boolean": 0.649950959993636, + "similarity_chat": 0.6068178282194846, + "similarity_extraction": 0.8274670541286469, + "similarity_qa": 0.8895131280024846, + "similarity_summarization": 0.7897715005609725, + "similarity_unanswerable": 0.8186558306217193, + "word_count_diff_boolean": 2.764705882352941, + "word_count_diff_chat": 11.275675675675675, + "word_count_diff_extraction": 1.8666666666666667, + "word_count_diff_qa": 2.7333333333333334, + "word_count_diff_summarization": 50.05555555555556, + "word_count_diff_unanswerable": 4.4 + }, + { + "epoch": 1.0, + "grad_norm": 0.29226627945899963, + "learning_rate": 1.314e-05, + "loss": 0.0548, + "step": 219 + }, + { + "epoch": 1.0, + "eval_loss": 0.04732182249426842, + "eval_runtime": 17.2425, + "eval_samples_per_second": 15.369, + "eval_steps_per_second": 1.972, + "step": 219 + }, + { + "em_boolean": 0.7058823529411765, + "em_chat": 0.05405405405405406, + "em_extraction": 0.4, + "em_qa": 0.4, + "em_summarization": 0.0, + "em_unanswerable": 0.0, + "epoch": 2.0, + "mean_em": 0.12830188679245283, + "mean_similarity": 0.6936857629426808, + "mean_word_count_diff": 11.475471698113207, + "similarity_boolean": 0.7671171505661571, + "similarity_chat": 0.6481531772158435, + "similarity_extraction": 0.7221033732096355, + "similarity_qa": 0.8645438591639201, + "similarity_summarization": 0.824706514676412, + "similarity_unanswerable": 0.8155314723650614, + "word_count_diff_boolean": 2.7058823529411766, + "word_count_diff_chat": 10.556756756756757, + "word_count_diff_extraction": 2.933333333333333, + "word_count_diff_qa": 3.2, + "word_count_diff_summarization": 50.166666666666664, + "word_count_diff_unanswerable": 3.1333333333333333 + }, + { + "epoch": 2.0, + "grad_norm": 0.30153563618659973, + "learning_rate": 2.628e-05, + "loss": 0.0445, + "step": 438 + }, + { + "epoch": 2.0, + "eval_loss": 0.04412226751446724, + "eval_runtime": 17.2255, + "eval_samples_per_second": 15.384, + "eval_steps_per_second": 1.974, + "step": 438 + }, + { + "em_boolean": 0.5882352941176471, + "em_chat": 0.05945945945945946, + "em_extraction": 0.4666666666666667, + "em_qa": 0.5333333333333333, + "em_summarization": 0.0, + "em_unanswerable": 0.0, + "epoch": 3.0, + "mean_em": 0.13584905660377358, + "mean_similarity": 0.7036299928109039, + "mean_word_count_diff": 11.230188679245282, + "similarity_boolean": 0.702288385699777, + "similarity_chat": 0.6661249467549292, + "similarity_extraction": 0.7363929619391759, + "similarity_qa": 0.9095388889312744, + "similarity_summarization": 0.7749250100718604, + "similarity_unanswerable": 0.8434868295987447, + "word_count_diff_boolean": 1.8823529411764706, + "word_count_diff_chat": 10.41081081081081, + "word_count_diff_extraction": 1.4, + "word_count_diff_qa": 2.8666666666666667, + "word_count_diff_summarization": 50.111111111111114, + "word_count_diff_unanswerable": 3.466666666666667 + }, + { + "epoch": 3.0, + "grad_norm": 0.4190770983695984, + "learning_rate": 2.9365689308796065e-05, + "loss": 0.0366, + "step": 657 + }, + { + "epoch": 3.0, + "eval_loss": 0.04255302622914314, + "eval_runtime": 17.2484, + "eval_samples_per_second": 15.364, + "eval_steps_per_second": 1.971, + "step": 657 + }, + { + "em_boolean": 0.8823529411764706, + "em_chat": 0.05405405405405406, + "em_extraction": 0.6666666666666666, + "em_qa": 0.3333333333333333, + "em_summarization": 0.0, + "em_unanswerable": 0.0, + "epoch": 4.0, + "mean_em": 0.1509433962264151, + "mean_similarity": 0.7214625703522338, + "mean_word_count_diff": 11.275471698113208, + "similarity_boolean": 0.9250823608325685, + "similarity_chat": 0.6629158062407294, + "similarity_extraction": 0.9287973960240682, + "similarity_qa": 0.8280642042557399, + "similarity_summarization": 0.7930781609482236, + "similarity_unanswerable": 0.8128950635592143, + "word_count_diff_boolean": 0.5882352941176471, + "word_count_diff_chat": 10.556756756756757, + "word_count_diff_extraction": 0.4666666666666667, + "word_count_diff_qa": 3.2, + "word_count_diff_summarization": 50.27777777777778, + "word_count_diff_unanswerable": 4.333333333333333 + }, + { + "epoch": 4.0, + "grad_norm": 0.34906384348869324, + "learning_rate": 2.6482696742411827e-05, + "loss": 0.0301, + "step": 876 + }, + { + "epoch": 4.0, + "eval_loss": 0.042461033910512924, + "eval_runtime": 17.2294, + "eval_samples_per_second": 15.381, + "eval_steps_per_second": 1.973, + "step": 876 + }, + { + "em_boolean": 0.8823529411764706, + "em_chat": 0.07027027027027027, + "em_extraction": 0.4, + "em_qa": 0.6, + "em_summarization": 0.0, + "em_unanswerable": 0.0, + "epoch": 5.0, + "mean_em": 0.16226415094339622, + "mean_similarity": 0.7417481038367973, + "mean_word_count_diff": 10.89811320754717, + "similarity_boolean": 0.9421353953726151, + "similarity_chat": 0.6902955391922513, + "similarity_extraction": 0.7581887672344844, + "similarity_qa": 0.9694860418637593, + "similarity_summarization": 0.795685844288932, + "similarity_unanswerable": 0.840320247411728, + "word_count_diff_boolean": 0.0, + "word_count_diff_chat": 10.04864864864865, + "word_count_diff_extraction": 2.066666666666667, + "word_count_diff_qa": 2.3333333333333335, + "word_count_diff_summarization": 50.111111111111114, + "word_count_diff_unanswerable": 4.066666666666666 + }, + { + "epoch": 5.0, + "grad_norm": 0.2575681209564209, + "learning_rate": 2.17227572135781e-05, + "loss": 0.0252, + "step": 1095 + }, + { + "epoch": 5.0, + "eval_loss": 0.043407145887613297, + "eval_runtime": 17.2466, + "eval_samples_per_second": 15.365, + "eval_steps_per_second": 1.971, + "step": 1095 + }, + { + "em_boolean": 0.8235294117647058, + "em_chat": 0.04864864864864865, + "em_extraction": 0.4666666666666667, + "em_qa": 0.3333333333333333, + "em_summarization": 0.0, + "em_unanswerable": 0.0, + "epoch": 6.0, + "mean_em": 0.1320754716981132, + "mean_similarity": 0.712902327184126, + "mean_word_count_diff": 11.09433962264151, + "similarity_boolean": 0.8407609323587488, + "similarity_chat": 0.6657742724023961, + "similarity_extraction": 0.8315838446219762, + "similarity_qa": 0.8299160649379095, + "similarity_summarization": 0.7715526024500529, + "similarity_unanswerable": 0.8431663314501444, + "word_count_diff_boolean": 1.2352941176470589, + "word_count_diff_chat": 10.183783783783785, + "word_count_diff_extraction": 1.5333333333333334, + "word_count_diff_qa": 3.2, + "word_count_diff_summarization": 50.22222222222222, + "word_count_diff_unanswerable": 4.0 + }, + { + "epoch": 6.0, + "grad_norm": 0.08031666278839111, + "learning_rate": 1.586392436600814e-05, + "loss": 0.0221, + "step": 1314 + }, + { + "epoch": 6.0, + "eval_loss": 0.04415750876069069, + "eval_runtime": 17.2236, + "eval_samples_per_second": 15.386, + "eval_steps_per_second": 1.974, + "step": 1314 + } + ], + "logging_steps": 500, + "max_steps": 2190, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.2072374526541824e+16, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1314/training_args.bin b/checkpoint-1314/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d977d8d4afc5bd92bf7fc4298b8866c8a6c8438c --- /dev/null +++ b/checkpoint-1314/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f96c10a0a2b43c05318a394de2f8a40b06b79bafe7c52911b0bd4f1a90733fe +size 5304 diff --git a/checkpoint-1533/added_tokens.json b/checkpoint-1533/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..3f5132007c4fcf42b75b65c8b6aa49c7098bcdf4 --- /dev/null +++ b/checkpoint-1533/added_tokens.json @@ -0,0 +1,102 @@ +{ + "": 32099, + "": 32089, + "": 32088, + "": 32087, + "": 32086, + "": 32085, + "": 32084, + "": 32083, + "": 32082, + "": 32081, + "": 32080, + "": 32098, + "": 32079, + "": 32078, + "": 32077, + "": 32076, + "": 32075, + "": 32074, + "": 32073, + "": 32072, + "": 32071, + "": 32070, + "": 32097, + "": 32069, + "": 32068, + "": 32067, + "": 32066, + "": 32065, + "": 32064, + "": 32063, + "": 32062, + "": 32061, + "": 32060, + "": 32096, + "": 32059, + "": 32058, + "": 32057, + "": 32056, + "": 32055, + "": 32054, + "": 32053, + "": 32052, + "": 32051, + "": 32050, + "": 32095, + "": 32049, + "": 32048, + "": 32047, + "": 32046, + "": 32045, + "": 32044, + "": 32043, + "": 32042, + "": 32041, + "": 32040, + "": 32094, + "": 32039, + "": 32038, + "": 32037, + "": 32036, + "": 32035, + "": 32034, + "": 32033, + "": 32032, + "": 32031, + "": 32030, + "": 32093, + "": 32029, + "": 32028, + "": 32027, + "": 32026, + "": 32025, + "": 32024, + "": 32023, + "": 32022, + "": 32021, + "": 32020, + "": 32092, + "": 32019, + "": 32018, + "": 32017, + "": 32016, + "": 32015, + "": 32014, + "": 32013, + "": 32012, + "": 32011, + "": 32010, + "": 32091, + "": 32009, + "": 32008, + "": 32007, + "": 32006, + "": 32005, + "": 32004, + "": 32003, + "": 32002, + "": 32001, + "": 32000, + "": 32090 +} diff --git a/checkpoint-1533/config.json b/checkpoint-1533/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7adeb5014f60a4a25c0c42eb288f6284e773e6dd --- /dev/null +++ b/checkpoint-1533/config.json @@ -0,0 +1,33 @@ +{ + "_name_or_path": "teapotai/teapotllm", + "architectures": [ + "T5ForConditionalGeneration" + ], + "classifier_dropout": 0.0, + "d_ff": 2816, + "d_kv": 64, + "d_model": 1024, + "decoder_start_token_id": 0, + "dense_act_fn": "gelu_new", + "dropout_rate": 0.1, + "eos_token_id": 1, + "feed_forward_proj": "gated-gelu", + "initializer_factor": 1.0, + "is_encoder_decoder": true, + "is_gated_act": true, + "layer_norm_epsilon": 1e-06, + "model_type": "t5", + "n_positions": 512, + "num_decoder_layers": 24, + "num_heads": 16, + "num_layers": 24, + "output_past": true, + "pad_token_id": 0, + "relative_attention_max_distance": 128, + "relative_attention_num_buckets": 32, + "tie_word_embeddings": false, + "torch_dtype": "float32", + "transformers_version": "4.48.3", + "use_cache": true, + "vocab_size": 32128 +} diff --git a/checkpoint-1533/generation_config.json b/checkpoint-1533/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..058f73f97f63923f298d59a1d6b4f78f510c5146 --- /dev/null +++ b/checkpoint-1533/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "decoder_start_token_id": 0, + "eos_token_id": 1, + "pad_token_id": 0, + "transformers_version": "4.48.3" +} diff --git a/checkpoint-1533/model.safetensors b/checkpoint-1533/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b107a4a7cefbfea22d948b1fa02cad2ce59522f5 --- /dev/null +++ b/checkpoint-1533/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54b76f2d7d6e692d7bd5d95f05279724d09afd184a3aaca5643c135a1a3558f0 +size 3132668808 diff --git a/checkpoint-1533/optimizer.pt b/checkpoint-1533/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..e8c16068d8ba3e4c05a382b8ed0072a9e10379fd --- /dev/null +++ b/checkpoint-1533/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ab50f85477c761d11d67233cff7b5f53d562f830ca8e18e522560151628c271 +size 6265677800 diff --git a/checkpoint-1533/rng_state.pth b/checkpoint-1533/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..73c1cae4a1b836083f065f2333870d8bf959dfa2 --- /dev/null +++ b/checkpoint-1533/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d20dee884d8cc789ec02d664f2aa5fb6261c586f29853f5723c92cb24a76c919 +size 14244 diff --git a/checkpoint-1533/scheduler.pt b/checkpoint-1533/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..0bf7f179a2653416418f2632a890bb43be4c7020 --- /dev/null +++ b/checkpoint-1533/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7dd0ac44d9ae2fcce2d00aa64502bf84f6a32cb40a7889b70f289c39706dbe1e +size 1064 diff --git a/checkpoint-1533/special_tokens_map.json b/checkpoint-1533/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..17ade346a1042cbe0c1436f5bedcbd85c099d582 --- /dev/null +++ b/checkpoint-1533/special_tokens_map.json @@ -0,0 +1,125 @@ +{ + "additional_special_tokens": [ + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + ], + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-1533/spiece.model b/checkpoint-1533/spiece.model new file mode 100644 index 0000000000000000000000000000000000000000..317a5ccbde45300f5d1d970d4d449af2108b147e --- /dev/null +++ b/checkpoint-1533/spiece.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86 +size 791656 diff --git a/checkpoint-1533/tokenizer_config.json b/checkpoint-1533/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..7d9f418ab1e49d1eaa3832e9970c9c503f565484 --- /dev/null +++ b/checkpoint-1533/tokenizer_config.json @@ -0,0 +1,941 @@ +{ + "add_prefix_space": true, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32000": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32001": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32002": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32003": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32004": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32005": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32006": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32007": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32008": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32009": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32010": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32011": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32012": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32013": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32014": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32015": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32016": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32017": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32018": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32019": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32020": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32021": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32022": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32023": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32024": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32025": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32026": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32027": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32028": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32029": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32030": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32031": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32032": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32033": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32034": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32035": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32036": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32037": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32038": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32039": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32040": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32041": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32042": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32043": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32044": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32045": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32046": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32047": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32048": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32049": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32050": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32051": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32052": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32053": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32054": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32055": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32056": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32057": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32058": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32059": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32060": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32061": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32062": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32063": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32064": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32065": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32066": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32067": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32068": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32069": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32070": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32071": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32072": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32073": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32074": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32075": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32076": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32077": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32078": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32079": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32080": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32081": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32082": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32083": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32084": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32085": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32086": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32087": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32088": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32089": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32090": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32091": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32092": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32093": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32094": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32095": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32096": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32097": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32098": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32099": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + ], + "clean_up_tokenization_spaces": false, + "eos_token": "", + "extra_ids": 100, + "extra_special_tokens": {}, + "legacy": true, + "model_max_length": 512, + "pad_token": "", + "sp_model_kwargs": {}, + "tokenizer_class": "T5Tokenizer", + "unk_token": "" +} diff --git a/checkpoint-1533/trainer_state.json b/checkpoint-1533/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..07522b7f06233be47731c3646d6f45f0ce1c43da --- /dev/null +++ b/checkpoint-1533/trainer_state.json @@ -0,0 +1,306 @@ +{ + "best_metric": 0.042461033910512924, + "best_model_checkpoint": "./teapotllm/checkpoint-876", + "epoch": 7.0, + "eval_steps": 500, + "global_step": 1533, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "em_boolean": 0.5294117647058824, + "em_chat": 0.03783783783783784, + "em_extraction": 0.5333333333333333, + "em_qa": 0.5333333333333333, + "em_summarization": 0.0, + "em_unanswerable": 0.0, + "epoch": 1.0, + "mean_em": 0.12075471698113208, + "mean_similarity": 0.6624939311014594, + "mean_word_count_diff": 11.958490566037735, + "similarity_boolean": 0.649950959993636, + "similarity_chat": 0.6068178282194846, + "similarity_extraction": 0.8274670541286469, + "similarity_qa": 0.8895131280024846, + "similarity_summarization": 0.7897715005609725, + "similarity_unanswerable": 0.8186558306217193, + "word_count_diff_boolean": 2.764705882352941, + "word_count_diff_chat": 11.275675675675675, + "word_count_diff_extraction": 1.8666666666666667, + "word_count_diff_qa": 2.7333333333333334, + "word_count_diff_summarization": 50.05555555555556, + "word_count_diff_unanswerable": 4.4 + }, + { + "epoch": 1.0, + "grad_norm": 0.29226627945899963, + "learning_rate": 1.314e-05, + "loss": 0.0548, + "step": 219 + }, + { + "epoch": 1.0, + "eval_loss": 0.04732182249426842, + "eval_runtime": 17.2425, + "eval_samples_per_second": 15.369, + "eval_steps_per_second": 1.972, + "step": 219 + }, + { + "em_boolean": 0.7058823529411765, + "em_chat": 0.05405405405405406, + "em_extraction": 0.4, + "em_qa": 0.4, + "em_summarization": 0.0, + "em_unanswerable": 0.0, + "epoch": 2.0, + "mean_em": 0.12830188679245283, + "mean_similarity": 0.6936857629426808, + "mean_word_count_diff": 11.475471698113207, + "similarity_boolean": 0.7671171505661571, + "similarity_chat": 0.6481531772158435, + "similarity_extraction": 0.7221033732096355, + "similarity_qa": 0.8645438591639201, + "similarity_summarization": 0.824706514676412, + "similarity_unanswerable": 0.8155314723650614, + "word_count_diff_boolean": 2.7058823529411766, + "word_count_diff_chat": 10.556756756756757, + "word_count_diff_extraction": 2.933333333333333, + "word_count_diff_qa": 3.2, + "word_count_diff_summarization": 50.166666666666664, + "word_count_diff_unanswerable": 3.1333333333333333 + }, + { + "epoch": 2.0, + "grad_norm": 0.30153563618659973, + "learning_rate": 2.628e-05, + "loss": 0.0445, + "step": 438 + }, + { + "epoch": 2.0, + "eval_loss": 0.04412226751446724, + "eval_runtime": 17.2255, + "eval_samples_per_second": 15.384, + "eval_steps_per_second": 1.974, + "step": 438 + }, + { + "em_boolean": 0.5882352941176471, + "em_chat": 0.05945945945945946, + "em_extraction": 0.4666666666666667, + "em_qa": 0.5333333333333333, + "em_summarization": 0.0, + "em_unanswerable": 0.0, + "epoch": 3.0, + "mean_em": 0.13584905660377358, + "mean_similarity": 0.7036299928109039, + "mean_word_count_diff": 11.230188679245282, + "similarity_boolean": 0.702288385699777, + "similarity_chat": 0.6661249467549292, + "similarity_extraction": 0.7363929619391759, + "similarity_qa": 0.9095388889312744, + "similarity_summarization": 0.7749250100718604, + "similarity_unanswerable": 0.8434868295987447, + "word_count_diff_boolean": 1.8823529411764706, + "word_count_diff_chat": 10.41081081081081, + "word_count_diff_extraction": 1.4, + "word_count_diff_qa": 2.8666666666666667, + "word_count_diff_summarization": 50.111111111111114, + "word_count_diff_unanswerable": 3.466666666666667 + }, + { + "epoch": 3.0, + "grad_norm": 0.4190770983695984, + "learning_rate": 2.9365689308796065e-05, + "loss": 0.0366, + "step": 657 + }, + { + "epoch": 3.0, + "eval_loss": 0.04255302622914314, + "eval_runtime": 17.2484, + "eval_samples_per_second": 15.364, + "eval_steps_per_second": 1.971, + "step": 657 + }, + { + "em_boolean": 0.8823529411764706, + "em_chat": 0.05405405405405406, + "em_extraction": 0.6666666666666666, + "em_qa": 0.3333333333333333, + "em_summarization": 0.0, + "em_unanswerable": 0.0, + "epoch": 4.0, + "mean_em": 0.1509433962264151, + "mean_similarity": 0.7214625703522338, + "mean_word_count_diff": 11.275471698113208, + "similarity_boolean": 0.9250823608325685, + "similarity_chat": 0.6629158062407294, + "similarity_extraction": 0.9287973960240682, + "similarity_qa": 0.8280642042557399, + "similarity_summarization": 0.7930781609482236, + "similarity_unanswerable": 0.8128950635592143, + "word_count_diff_boolean": 0.5882352941176471, + "word_count_diff_chat": 10.556756756756757, + "word_count_diff_extraction": 0.4666666666666667, + "word_count_diff_qa": 3.2, + "word_count_diff_summarization": 50.27777777777778, + "word_count_diff_unanswerable": 4.333333333333333 + }, + { + "epoch": 4.0, + "grad_norm": 0.34906384348869324, + "learning_rate": 2.6482696742411827e-05, + "loss": 0.0301, + "step": 876 + }, + { + "epoch": 4.0, + "eval_loss": 0.042461033910512924, + "eval_runtime": 17.2294, + "eval_samples_per_second": 15.381, + "eval_steps_per_second": 1.973, + "step": 876 + }, + { + "em_boolean": 0.8823529411764706, + "em_chat": 0.07027027027027027, + "em_extraction": 0.4, + "em_qa": 0.6, + "em_summarization": 0.0, + "em_unanswerable": 0.0, + "epoch": 5.0, + "mean_em": 0.16226415094339622, + "mean_similarity": 0.7417481038367973, + "mean_word_count_diff": 10.89811320754717, + "similarity_boolean": 0.9421353953726151, + "similarity_chat": 0.6902955391922513, + "similarity_extraction": 0.7581887672344844, + "similarity_qa": 0.9694860418637593, + "similarity_summarization": 0.795685844288932, + "similarity_unanswerable": 0.840320247411728, + "word_count_diff_boolean": 0.0, + "word_count_diff_chat": 10.04864864864865, + "word_count_diff_extraction": 2.066666666666667, + "word_count_diff_qa": 2.3333333333333335, + "word_count_diff_summarization": 50.111111111111114, + "word_count_diff_unanswerable": 4.066666666666666 + }, + { + "epoch": 5.0, + "grad_norm": 0.2575681209564209, + "learning_rate": 2.17227572135781e-05, + "loss": 0.0252, + "step": 1095 + }, + { + "epoch": 5.0, + "eval_loss": 0.043407145887613297, + "eval_runtime": 17.2466, + "eval_samples_per_second": 15.365, + "eval_steps_per_second": 1.971, + "step": 1095 + }, + { + "em_boolean": 0.8235294117647058, + "em_chat": 0.04864864864864865, + "em_extraction": 0.4666666666666667, + "em_qa": 0.3333333333333333, + "em_summarization": 0.0, + "em_unanswerable": 0.0, + "epoch": 6.0, + "mean_em": 0.1320754716981132, + "mean_similarity": 0.712902327184126, + "mean_word_count_diff": 11.09433962264151, + "similarity_boolean": 0.8407609323587488, + "similarity_chat": 0.6657742724023961, + "similarity_extraction": 0.8315838446219762, + "similarity_qa": 0.8299160649379095, + "similarity_summarization": 0.7715526024500529, + "similarity_unanswerable": 0.8431663314501444, + "word_count_diff_boolean": 1.2352941176470589, + "word_count_diff_chat": 10.183783783783785, + "word_count_diff_extraction": 1.5333333333333334, + "word_count_diff_qa": 3.2, + "word_count_diff_summarization": 50.22222222222222, + "word_count_diff_unanswerable": 4.0 + }, + { + "epoch": 6.0, + "grad_norm": 0.08031666278839111, + "learning_rate": 1.586392436600814e-05, + "loss": 0.0221, + "step": 1314 + }, + { + "epoch": 6.0, + "eval_loss": 0.04415750876069069, + "eval_runtime": 17.2236, + "eval_samples_per_second": 15.386, + "eval_steps_per_second": 1.974, + "step": 1314 + }, + { + "em_boolean": 0.8823529411764706, + "em_chat": 0.07567567567567568, + "em_extraction": 0.4, + "em_qa": 0.4, + "em_summarization": 0.0, + "em_unanswerable": 0.0, + "epoch": 7.0, + "mean_em": 0.15471698113207547, + "mean_similarity": 0.7240072643349194, + "mean_word_count_diff": 11.026415094339622, + "similarity_boolean": 0.8786488135947901, + "similarity_chat": 0.6825028176545291, + "similarity_extraction": 0.7764622618754705, + "similarity_qa": 0.8644786556561788, + "similarity_summarization": 0.7805107865068648, + "similarity_unanswerable": 0.7999044020970663, + "word_count_diff_boolean": 1.1764705882352942, + "word_count_diff_chat": 10.151351351351352, + "word_count_diff_extraction": 1.5333333333333334, + "word_count_diff_qa": 3.1333333333333333, + "word_count_diff_summarization": 50.388888888888886, + "word_count_diff_unanswerable": 3.1333333333333333 + }, + { + "epoch": 7.0, + "grad_norm": 0.21582281589508057, + "learning_rate": 9.863875543572635e-06, + "loss": 0.0193, + "step": 1533 + }, + { + "epoch": 7.0, + "eval_loss": 0.0449073351919651, + "eval_runtime": 17.225, + "eval_samples_per_second": 15.385, + "eval_steps_per_second": 1.974, + "step": 1533 + } + ], + "logging_steps": 500, + "max_steps": 2190, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.4084436947632128e+16, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1533/training_args.bin b/checkpoint-1533/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d977d8d4afc5bd92bf7fc4298b8866c8a6c8438c --- /dev/null +++ b/checkpoint-1533/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f96c10a0a2b43c05318a394de2f8a40b06b79bafe7c52911b0bd4f1a90733fe +size 5304 diff --git a/checkpoint-1752/added_tokens.json b/checkpoint-1752/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..3f5132007c4fcf42b75b65c8b6aa49c7098bcdf4 --- /dev/null +++ b/checkpoint-1752/added_tokens.json @@ -0,0 +1,102 @@ +{ + "": 32099, + "": 32089, + "": 32088, + "": 32087, + "": 32086, + "": 32085, + "": 32084, + "": 32083, + "": 32082, + "": 32081, + "": 32080, + "": 32098, + "": 32079, + "": 32078, + "": 32077, + "": 32076, + "": 32075, + "": 32074, + "": 32073, + "": 32072, + "": 32071, + "": 32070, + "": 32097, + "": 32069, + "": 32068, + "": 32067, + "": 32066, + "": 32065, + "": 32064, + "": 32063, + "": 32062, + "": 32061, + "": 32060, + "": 32096, + "": 32059, + "": 32058, + "": 32057, + "": 32056, + "": 32055, + "": 32054, + "": 32053, + "": 32052, + "": 32051, + "": 32050, + "": 32095, + "": 32049, + "": 32048, + "": 32047, + "": 32046, + "": 32045, + "": 32044, + "": 32043, + "": 32042, + "": 32041, + "": 32040, + "": 32094, + "": 32039, + "": 32038, + "": 32037, + "": 32036, + "": 32035, + "": 32034, + "": 32033, + "": 32032, + "": 32031, + "": 32030, + "": 32093, + "": 32029, + "": 32028, + "": 32027, + "": 32026, + "": 32025, + "": 32024, + "": 32023, + "": 32022, + "": 32021, + "": 32020, + "": 32092, + "": 32019, + "": 32018, + "": 32017, + "": 32016, + "": 32015, + "": 32014, + "": 32013, + "": 32012, + "": 32011, + "": 32010, + "": 32091, + "": 32009, + "": 32008, + "": 32007, + "": 32006, + "": 32005, + "": 32004, + "": 32003, + "": 32002, + "": 32001, + "": 32000, + "": 32090 +} diff --git a/checkpoint-1752/config.json b/checkpoint-1752/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7adeb5014f60a4a25c0c42eb288f6284e773e6dd --- /dev/null +++ b/checkpoint-1752/config.json @@ -0,0 +1,33 @@ +{ + "_name_or_path": "teapotai/teapotllm", + "architectures": [ + "T5ForConditionalGeneration" + ], + "classifier_dropout": 0.0, + "d_ff": 2816, + "d_kv": 64, + "d_model": 1024, + "decoder_start_token_id": 0, + "dense_act_fn": "gelu_new", + "dropout_rate": 0.1, + "eos_token_id": 1, + "feed_forward_proj": "gated-gelu", + "initializer_factor": 1.0, + "is_encoder_decoder": true, + "is_gated_act": true, + "layer_norm_epsilon": 1e-06, + "model_type": "t5", + "n_positions": 512, + "num_decoder_layers": 24, + "num_heads": 16, + "num_layers": 24, + "output_past": true, + "pad_token_id": 0, + "relative_attention_max_distance": 128, + "relative_attention_num_buckets": 32, + "tie_word_embeddings": false, + "torch_dtype": "float32", + "transformers_version": "4.48.3", + "use_cache": true, + "vocab_size": 32128 +} diff --git a/checkpoint-1752/generation_config.json b/checkpoint-1752/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..058f73f97f63923f298d59a1d6b4f78f510c5146 --- /dev/null +++ b/checkpoint-1752/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "decoder_start_token_id": 0, + "eos_token_id": 1, + "pad_token_id": 0, + "transformers_version": "4.48.3" +} diff --git a/checkpoint-1752/model.safetensors b/checkpoint-1752/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dec71f219a8e66b1548cbd5e65aa95832b3f0e46 --- /dev/null +++ b/checkpoint-1752/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1d0029c53537f161060832ec4a601d4d2b5916b94ad4aa77b09b693d5350057 +size 3132668808 diff --git a/checkpoint-1752/optimizer.pt b/checkpoint-1752/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6a7059b5e3eed5fbd8230a03d288bfebb61680a0 --- /dev/null +++ b/checkpoint-1752/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:664e4e29e64668f853ca9c1b3b79c48d7ce777ece4ba4402bfb3552b4892c2b5 +size 6265677800 diff --git a/checkpoint-1752/rng_state.pth b/checkpoint-1752/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..d774df3d7699d72f6e7785279608a8c2977b3800 --- /dev/null +++ b/checkpoint-1752/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b17ecbd4fc1573e10bfe61cb0b29f48a99c41d34f1f4527f02db5d0649bfee0d +size 14244 diff --git a/checkpoint-1752/scheduler.pt b/checkpoint-1752/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..576cceaed407fd9ef19b4e8100f3228d9862cfb5 --- /dev/null +++ b/checkpoint-1752/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1401c4db418f85e674afcc6b6a9b505506762425d27f4bcbdad9bd3f591603dc +size 1064 diff --git a/checkpoint-1752/special_tokens_map.json b/checkpoint-1752/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..17ade346a1042cbe0c1436f5bedcbd85c099d582 --- /dev/null +++ b/checkpoint-1752/special_tokens_map.json @@ -0,0 +1,125 @@ +{ + "additional_special_tokens": [ + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + ], + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-1752/spiece.model b/checkpoint-1752/spiece.model new file mode 100644 index 0000000000000000000000000000000000000000..317a5ccbde45300f5d1d970d4d449af2108b147e --- /dev/null +++ b/checkpoint-1752/spiece.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86 +size 791656 diff --git a/checkpoint-1752/tokenizer_config.json b/checkpoint-1752/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..7d9f418ab1e49d1eaa3832e9970c9c503f565484 --- /dev/null +++ b/checkpoint-1752/tokenizer_config.json @@ -0,0 +1,941 @@ +{ + "add_prefix_space": true, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32000": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32001": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32002": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32003": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32004": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32005": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32006": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32007": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32008": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32009": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32010": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32011": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32012": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32013": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32014": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32015": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32016": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32017": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32018": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32019": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32020": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32021": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32022": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32023": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32024": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32025": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32026": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32027": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32028": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32029": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32030": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32031": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32032": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32033": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32034": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32035": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32036": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32037": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32038": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32039": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32040": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32041": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32042": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32043": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32044": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32045": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32046": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32047": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32048": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32049": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32050": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32051": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32052": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32053": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32054": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32055": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32056": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32057": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32058": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32059": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32060": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32061": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32062": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32063": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32064": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32065": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32066": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32067": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32068": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32069": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32070": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32071": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32072": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32073": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32074": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32075": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32076": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32077": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32078": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32079": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32080": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32081": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32082": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32083": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32084": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32085": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32086": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32087": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32088": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32089": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32090": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32091": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32092": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32093": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32094": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32095": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32096": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32097": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32098": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32099": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + ], + "clean_up_tokenization_spaces": false, + "eos_token": "", + "extra_ids": 100, + "extra_special_tokens": {}, + "legacy": true, + "model_max_length": 512, + "pad_token": "", + "sp_model_kwargs": {}, + "tokenizer_class": "T5Tokenizer", + "unk_token": "" +} diff --git a/checkpoint-1752/trainer_state.json b/checkpoint-1752/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..db4120037dbe74801f00d9e716343cc03baf1619 --- /dev/null +++ b/checkpoint-1752/trainer_state.json @@ -0,0 +1,345 @@ +{ + "best_metric": 0.042461033910512924, + "best_model_checkpoint": "./teapotllm/checkpoint-876", + "epoch": 8.0, + "eval_steps": 500, + "global_step": 1752, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "em_boolean": 0.5294117647058824, + "em_chat": 0.03783783783783784, + "em_extraction": 0.5333333333333333, + "em_qa": 0.5333333333333333, + "em_summarization": 0.0, + "em_unanswerable": 0.0, + "epoch": 1.0, + "mean_em": 0.12075471698113208, + "mean_similarity": 0.6624939311014594, + "mean_word_count_diff": 11.958490566037735, + "similarity_boolean": 0.649950959993636, + "similarity_chat": 0.6068178282194846, + "similarity_extraction": 0.8274670541286469, + "similarity_qa": 0.8895131280024846, + "similarity_summarization": 0.7897715005609725, + "similarity_unanswerable": 0.8186558306217193, + "word_count_diff_boolean": 2.764705882352941, + "word_count_diff_chat": 11.275675675675675, + "word_count_diff_extraction": 1.8666666666666667, + "word_count_diff_qa": 2.7333333333333334, + "word_count_diff_summarization": 50.05555555555556, + "word_count_diff_unanswerable": 4.4 + }, + { + "epoch": 1.0, + "grad_norm": 0.29226627945899963, + "learning_rate": 1.314e-05, + "loss": 0.0548, + "step": 219 + }, + { + "epoch": 1.0, + "eval_loss": 0.04732182249426842, + "eval_runtime": 17.2425, + "eval_samples_per_second": 15.369, + "eval_steps_per_second": 1.972, + "step": 219 + }, + { + "em_boolean": 0.7058823529411765, + "em_chat": 0.05405405405405406, + "em_extraction": 0.4, + "em_qa": 0.4, + "em_summarization": 0.0, + "em_unanswerable": 0.0, + "epoch": 2.0, + "mean_em": 0.12830188679245283, + "mean_similarity": 0.6936857629426808, + "mean_word_count_diff": 11.475471698113207, + "similarity_boolean": 0.7671171505661571, + "similarity_chat": 0.6481531772158435, + "similarity_extraction": 0.7221033732096355, + "similarity_qa": 0.8645438591639201, + "similarity_summarization": 0.824706514676412, + "similarity_unanswerable": 0.8155314723650614, + "word_count_diff_boolean": 2.7058823529411766, + "word_count_diff_chat": 10.556756756756757, + "word_count_diff_extraction": 2.933333333333333, + "word_count_diff_qa": 3.2, + "word_count_diff_summarization": 50.166666666666664, + "word_count_diff_unanswerable": 3.1333333333333333 + }, + { + "epoch": 2.0, + "grad_norm": 0.30153563618659973, + "learning_rate": 2.628e-05, + "loss": 0.0445, + "step": 438 + }, + { + "epoch": 2.0, + "eval_loss": 0.04412226751446724, + "eval_runtime": 17.2255, + "eval_samples_per_second": 15.384, + "eval_steps_per_second": 1.974, + "step": 438 + }, + { + "em_boolean": 0.5882352941176471, + "em_chat": 0.05945945945945946, + "em_extraction": 0.4666666666666667, + "em_qa": 0.5333333333333333, + "em_summarization": 0.0, + "em_unanswerable": 0.0, + "epoch": 3.0, + "mean_em": 0.13584905660377358, + "mean_similarity": 0.7036299928109039, + "mean_word_count_diff": 11.230188679245282, + "similarity_boolean": 0.702288385699777, + "similarity_chat": 0.6661249467549292, + "similarity_extraction": 0.7363929619391759, + "similarity_qa": 0.9095388889312744, + "similarity_summarization": 0.7749250100718604, + "similarity_unanswerable": 0.8434868295987447, + "word_count_diff_boolean": 1.8823529411764706, + "word_count_diff_chat": 10.41081081081081, + "word_count_diff_extraction": 1.4, + "word_count_diff_qa": 2.8666666666666667, + "word_count_diff_summarization": 50.111111111111114, + "word_count_diff_unanswerable": 3.466666666666667 + }, + { + "epoch": 3.0, + "grad_norm": 0.4190770983695984, + "learning_rate": 2.9365689308796065e-05, + "loss": 0.0366, + "step": 657 + }, + { + "epoch": 3.0, + "eval_loss": 0.04255302622914314, + "eval_runtime": 17.2484, + "eval_samples_per_second": 15.364, + "eval_steps_per_second": 1.971, + "step": 657 + }, + { + "em_boolean": 0.8823529411764706, + "em_chat": 0.05405405405405406, + "em_extraction": 0.6666666666666666, + "em_qa": 0.3333333333333333, + "em_summarization": 0.0, + "em_unanswerable": 0.0, + "epoch": 4.0, + "mean_em": 0.1509433962264151, + "mean_similarity": 0.7214625703522338, + "mean_word_count_diff": 11.275471698113208, + "similarity_boolean": 0.9250823608325685, + "similarity_chat": 0.6629158062407294, + "similarity_extraction": 0.9287973960240682, + "similarity_qa": 0.8280642042557399, + "similarity_summarization": 0.7930781609482236, + "similarity_unanswerable": 0.8128950635592143, + "word_count_diff_boolean": 0.5882352941176471, + "word_count_diff_chat": 10.556756756756757, + "word_count_diff_extraction": 0.4666666666666667, + "word_count_diff_qa": 3.2, + "word_count_diff_summarization": 50.27777777777778, + "word_count_diff_unanswerable": 4.333333333333333 + }, + { + "epoch": 4.0, + "grad_norm": 0.34906384348869324, + "learning_rate": 2.6482696742411827e-05, + "loss": 0.0301, + "step": 876 + }, + { + "epoch": 4.0, + "eval_loss": 0.042461033910512924, + "eval_runtime": 17.2294, + "eval_samples_per_second": 15.381, + "eval_steps_per_second": 1.973, + "step": 876 + }, + { + "em_boolean": 0.8823529411764706, + "em_chat": 0.07027027027027027, + "em_extraction": 0.4, + "em_qa": 0.6, + "em_summarization": 0.0, + "em_unanswerable": 0.0, + "epoch": 5.0, + "mean_em": 0.16226415094339622, + "mean_similarity": 0.7417481038367973, + "mean_word_count_diff": 10.89811320754717, + "similarity_boolean": 0.9421353953726151, + "similarity_chat": 0.6902955391922513, + "similarity_extraction": 0.7581887672344844, + "similarity_qa": 0.9694860418637593, + "similarity_summarization": 0.795685844288932, + "similarity_unanswerable": 0.840320247411728, + "word_count_diff_boolean": 0.0, + "word_count_diff_chat": 10.04864864864865, + "word_count_diff_extraction": 2.066666666666667, + "word_count_diff_qa": 2.3333333333333335, + "word_count_diff_summarization": 50.111111111111114, + "word_count_diff_unanswerable": 4.066666666666666 + }, + { + "epoch": 5.0, + "grad_norm": 0.2575681209564209, + "learning_rate": 2.17227572135781e-05, + "loss": 0.0252, + "step": 1095 + }, + { + "epoch": 5.0, + "eval_loss": 0.043407145887613297, + "eval_runtime": 17.2466, + "eval_samples_per_second": 15.365, + "eval_steps_per_second": 1.971, + "step": 1095 + }, + { + "em_boolean": 0.8235294117647058, + "em_chat": 0.04864864864864865, + "em_extraction": 0.4666666666666667, + "em_qa": 0.3333333333333333, + "em_summarization": 0.0, + "em_unanswerable": 0.0, + "epoch": 6.0, + "mean_em": 0.1320754716981132, + "mean_similarity": 0.712902327184126, + "mean_word_count_diff": 11.09433962264151, + "similarity_boolean": 0.8407609323587488, + "similarity_chat": 0.6657742724023961, + "similarity_extraction": 0.8315838446219762, + "similarity_qa": 0.8299160649379095, + "similarity_summarization": 0.7715526024500529, + "similarity_unanswerable": 0.8431663314501444, + "word_count_diff_boolean": 1.2352941176470589, + "word_count_diff_chat": 10.183783783783785, + "word_count_diff_extraction": 1.5333333333333334, + "word_count_diff_qa": 3.2, + "word_count_diff_summarization": 50.22222222222222, + "word_count_diff_unanswerable": 4.0 + }, + { + "epoch": 6.0, + "grad_norm": 0.08031666278839111, + "learning_rate": 1.586392436600814e-05, + "loss": 0.0221, + "step": 1314 + }, + { + "epoch": 6.0, + "eval_loss": 0.04415750876069069, + "eval_runtime": 17.2236, + "eval_samples_per_second": 15.386, + "eval_steps_per_second": 1.974, + "step": 1314 + }, + { + "em_boolean": 0.8823529411764706, + "em_chat": 0.07567567567567568, + "em_extraction": 0.4, + "em_qa": 0.4, + "em_summarization": 0.0, + "em_unanswerable": 0.0, + "epoch": 7.0, + "mean_em": 0.15471698113207547, + "mean_similarity": 0.7240072643349194, + "mean_word_count_diff": 11.026415094339622, + "similarity_boolean": 0.8786488135947901, + "similarity_chat": 0.6825028176545291, + "similarity_extraction": 0.7764622618754705, + "similarity_qa": 0.8644786556561788, + "similarity_summarization": 0.7805107865068648, + "similarity_unanswerable": 0.7999044020970663, + "word_count_diff_boolean": 1.1764705882352942, + "word_count_diff_chat": 10.151351351351352, + "word_count_diff_extraction": 1.5333333333333334, + "word_count_diff_qa": 3.1333333333333333, + "word_count_diff_summarization": 50.388888888888886, + "word_count_diff_unanswerable": 3.1333333333333333 + }, + { + "epoch": 7.0, + "grad_norm": 0.21582281589508057, + "learning_rate": 9.863875543572635e-06, + "loss": 0.0193, + "step": 1533 + }, + { + "epoch": 7.0, + "eval_loss": 0.0449073351919651, + "eval_runtime": 17.225, + "eval_samples_per_second": 15.385, + "eval_steps_per_second": 1.974, + "step": 1533 + }, + { + "em_boolean": 0.8823529411764706, + "em_chat": 0.05945945945945946, + "em_extraction": 0.6, + "em_qa": 0.3333333333333333, + "em_summarization": 0.0, + "em_unanswerable": 0.0, + "epoch": 8.0, + "mean_em": 0.1509433962264151, + "mean_similarity": 0.7287208422667013, + "mean_word_count_diff": 11.075471698113208, + "similarity_boolean": 0.9135467927245533, + "similarity_chat": 0.6845327336139776, + "similarity_extraction": 0.8388477871815364, + "similarity_qa": 0.8482021888097128, + "similarity_summarization": 0.7841283877690634, + "similarity_unanswerable": 0.7681407590707143, + "word_count_diff_boolean": 0.7647058823529411, + "word_count_diff_chat": 10.254054054054054, + "word_count_diff_extraction": 1.7333333333333334, + "word_count_diff_qa": 3.466666666666667, + "word_count_diff_summarization": 49.888888888888886, + "word_count_diff_unanswerable": 3.2666666666666666 + }, + { + "epoch": 8.0, + "grad_norm": 0.22318905591964722, + "learning_rate": 4.703371073261941e-06, + "loss": 0.0176, + "step": 1752 + }, + { + "epoch": 8.0, + "eval_loss": 0.04609951004385948, + "eval_runtime": 17.2261, + "eval_samples_per_second": 15.384, + "eval_steps_per_second": 1.974, + "step": 1752 + } + ], + "logging_steps": 500, + "max_steps": 2190, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.6096499368722432e+16, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1752/training_args.bin b/checkpoint-1752/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d977d8d4afc5bd92bf7fc4298b8866c8a6c8438c --- /dev/null +++ b/checkpoint-1752/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f96c10a0a2b43c05318a394de2f8a40b06b79bafe7c52911b0bd4f1a90733fe +size 5304 diff --git a/checkpoint-1971/added_tokens.json b/checkpoint-1971/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..3f5132007c4fcf42b75b65c8b6aa49c7098bcdf4 --- /dev/null +++ b/checkpoint-1971/added_tokens.json @@ -0,0 +1,102 @@ +{ + "": 32099, + "": 32089, + "": 32088, + "": 32087, + "": 32086, + "": 32085, + "": 32084, + "": 32083, + "": 32082, + "": 32081, + "": 32080, + "": 32098, + "": 32079, + "": 32078, + "": 32077, + "": 32076, + "": 32075, + "": 32074, + "": 32073, + "": 32072, + "": 32071, + "": 32070, + "": 32097, + "": 32069, + "": 32068, + "": 32067, + "": 32066, + "": 32065, + "": 32064, + "": 32063, + "": 32062, + "": 32061, + "": 32060, + "": 32096, + "": 32059, + "": 32058, + "": 32057, + "": 32056, + "": 32055, + "": 32054, + "": 32053, + "": 32052, + "": 32051, + "": 32050, + "": 32095, + "": 32049, + "": 32048, + "": 32047, + "": 32046, + "": 32045, + "": 32044, + "": 32043, + "": 32042, + "": 32041, + "": 32040, + "": 32094, + "": 32039, + "": 32038, + "": 32037, + "": 32036, + "": 32035, + "": 32034, + "": 32033, + "": 32032, + "": 32031, + "": 32030, + "": 32093, + "": 32029, + "": 32028, + "": 32027, + "": 32026, + "": 32025, + "": 32024, + "": 32023, + "": 32022, + "": 32021, + "": 32020, + "": 32092, + "": 32019, + "": 32018, + "": 32017, + "": 32016, + "": 32015, + "": 32014, + "": 32013, + "": 32012, + "": 32011, + "": 32010, + "": 32091, + "": 32009, + "": 32008, + "": 32007, + "": 32006, + "": 32005, + "": 32004, + "": 32003, + "": 32002, + "": 32001, + "": 32000, + "": 32090 +} diff --git a/checkpoint-1971/config.json b/checkpoint-1971/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7adeb5014f60a4a25c0c42eb288f6284e773e6dd --- /dev/null +++ b/checkpoint-1971/config.json @@ -0,0 +1,33 @@ +{ + "_name_or_path": "teapotai/teapotllm", + "architectures": [ + "T5ForConditionalGeneration" + ], + "classifier_dropout": 0.0, + "d_ff": 2816, + "d_kv": 64, + "d_model": 1024, + "decoder_start_token_id": 0, + "dense_act_fn": "gelu_new", + "dropout_rate": 0.1, + "eos_token_id": 1, + "feed_forward_proj": "gated-gelu", + "initializer_factor": 1.0, + "is_encoder_decoder": true, + "is_gated_act": true, + "layer_norm_epsilon": 1e-06, + "model_type": "t5", + "n_positions": 512, + "num_decoder_layers": 24, + "num_heads": 16, + "num_layers": 24, + "output_past": true, + "pad_token_id": 0, + "relative_attention_max_distance": 128, + "relative_attention_num_buckets": 32, + "tie_word_embeddings": false, + "torch_dtype": "float32", + "transformers_version": "4.48.3", + "use_cache": true, + "vocab_size": 32128 +} diff --git a/checkpoint-1971/generation_config.json b/checkpoint-1971/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..058f73f97f63923f298d59a1d6b4f78f510c5146 --- /dev/null +++ b/checkpoint-1971/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "decoder_start_token_id": 0, + "eos_token_id": 1, + "pad_token_id": 0, + "transformers_version": "4.48.3" +} diff --git a/checkpoint-1971/model.safetensors b/checkpoint-1971/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1e07b0377fd8efd3dd4c572ef584e101ce9e5fcb --- /dev/null +++ b/checkpoint-1971/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e53fc23a3a0c1708e73943af24bad268fea9d1d122143b787c4c3c88ebe1bad5 +size 3132668808 diff --git a/checkpoint-1971/optimizer.pt b/checkpoint-1971/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..819faa07524a2f86308766aa7616028e4508da08 --- /dev/null +++ b/checkpoint-1971/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f57ba196247730a1fdd1993ef10a2b3da5560bb6b9144a0e1ee3e80a5dc44435 +size 6265677800 diff --git a/checkpoint-1971/rng_state.pth b/checkpoint-1971/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..76ab2f440968bf80c3a5042b1cffa36ce1276952 --- /dev/null +++ b/checkpoint-1971/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d86d318b535f641c93407f5842e4dbe8697e92aef6e5f623bc1f40f0b0ce02d0 +size 14244 diff --git a/checkpoint-1971/scheduler.pt b/checkpoint-1971/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..3aeb2f2c2a3958941555e0a9b8a24f6eb160ec4d --- /dev/null +++ b/checkpoint-1971/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8b18213bb2016bd5fa8bc1cdf2bda6cc6046008c9ba2fb78954531172fa3dab +size 1064 diff --git a/checkpoint-1971/special_tokens_map.json b/checkpoint-1971/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..17ade346a1042cbe0c1436f5bedcbd85c099d582 --- /dev/null +++ b/checkpoint-1971/special_tokens_map.json @@ -0,0 +1,125 @@ +{ + "additional_special_tokens": [ + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + ], + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-1971/spiece.model b/checkpoint-1971/spiece.model new file mode 100644 index 0000000000000000000000000000000000000000..317a5ccbde45300f5d1d970d4d449af2108b147e --- /dev/null +++ b/checkpoint-1971/spiece.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86 +size 791656 diff --git a/checkpoint-1971/tokenizer_config.json b/checkpoint-1971/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..7d9f418ab1e49d1eaa3832e9970c9c503f565484 --- /dev/null +++ b/checkpoint-1971/tokenizer_config.json @@ -0,0 +1,941 @@ +{ + "add_prefix_space": true, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32000": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32001": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32002": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32003": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32004": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32005": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32006": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32007": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32008": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32009": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32010": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32011": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32012": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32013": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32014": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32015": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32016": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32017": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32018": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32019": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32020": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32021": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32022": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32023": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32024": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32025": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32026": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32027": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32028": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32029": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32030": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32031": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32032": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32033": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32034": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32035": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32036": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32037": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32038": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32039": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32040": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32041": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32042": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32043": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32044": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32045": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32046": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32047": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32048": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32049": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32050": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32051": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32052": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32053": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32054": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32055": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32056": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32057": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32058": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32059": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32060": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32061": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32062": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32063": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32064": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32065": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32066": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32067": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32068": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32069": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32070": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32071": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32072": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32073": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32074": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32075": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32076": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32077": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32078": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32079": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32080": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32081": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32082": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32083": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32084": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32085": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32086": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32087": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32088": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32089": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32090": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32091": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32092": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32093": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32094": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32095": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32096": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32097": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32098": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32099": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + ], + "clean_up_tokenization_spaces": false, + "eos_token": "", + "extra_ids": 100, + "extra_special_tokens": {}, + "legacy": true, + "model_max_length": 512, + "pad_token": "", + "sp_model_kwargs": {}, + "tokenizer_class": "T5Tokenizer", + "unk_token": "" +} diff --git a/checkpoint-1971/trainer_state.json b/checkpoint-1971/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..bf1c007c50d4341820721a07e2ad054dff400bb6 --- /dev/null +++ b/checkpoint-1971/trainer_state.json @@ -0,0 +1,384 @@ +{ + "best_metric": 0.042461033910512924, + "best_model_checkpoint": "./teapotllm/checkpoint-876", + "epoch": 9.0, + "eval_steps": 500, + "global_step": 1971, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "em_boolean": 0.5294117647058824, + "em_chat": 0.03783783783783784, + "em_extraction": 0.5333333333333333, + "em_qa": 0.5333333333333333, + "em_summarization": 0.0, + "em_unanswerable": 0.0, + "epoch": 1.0, + "mean_em": 0.12075471698113208, + "mean_similarity": 0.6624939311014594, + "mean_word_count_diff": 11.958490566037735, + "similarity_boolean": 0.649950959993636, + "similarity_chat": 0.6068178282194846, + "similarity_extraction": 0.8274670541286469, + "similarity_qa": 0.8895131280024846, + "similarity_summarization": 0.7897715005609725, + "similarity_unanswerable": 0.8186558306217193, + "word_count_diff_boolean": 2.764705882352941, + "word_count_diff_chat": 11.275675675675675, + "word_count_diff_extraction": 1.8666666666666667, + "word_count_diff_qa": 2.7333333333333334, + "word_count_diff_summarization": 50.05555555555556, + "word_count_diff_unanswerable": 4.4 + }, + { + "epoch": 1.0, + "grad_norm": 0.29226627945899963, + "learning_rate": 1.314e-05, + "loss": 0.0548, + "step": 219 + }, + { + "epoch": 1.0, + "eval_loss": 0.04732182249426842, + "eval_runtime": 17.2425, + "eval_samples_per_second": 15.369, + "eval_steps_per_second": 1.972, + "step": 219 + }, + { + "em_boolean": 0.7058823529411765, + "em_chat": 0.05405405405405406, + "em_extraction": 0.4, + "em_qa": 0.4, + "em_summarization": 0.0, + "em_unanswerable": 0.0, + "epoch": 2.0, + "mean_em": 0.12830188679245283, + "mean_similarity": 0.6936857629426808, + "mean_word_count_diff": 11.475471698113207, + "similarity_boolean": 0.7671171505661571, + "similarity_chat": 0.6481531772158435, + "similarity_extraction": 0.7221033732096355, + "similarity_qa": 0.8645438591639201, + "similarity_summarization": 0.824706514676412, + "similarity_unanswerable": 0.8155314723650614, + "word_count_diff_boolean": 2.7058823529411766, + "word_count_diff_chat": 10.556756756756757, + "word_count_diff_extraction": 2.933333333333333, + "word_count_diff_qa": 3.2, + "word_count_diff_summarization": 50.166666666666664, + "word_count_diff_unanswerable": 3.1333333333333333 + }, + { + "epoch": 2.0, + "grad_norm": 0.30153563618659973, + "learning_rate": 2.628e-05, + "loss": 0.0445, + "step": 438 + }, + { + "epoch": 2.0, + "eval_loss": 0.04412226751446724, + "eval_runtime": 17.2255, + "eval_samples_per_second": 15.384, + "eval_steps_per_second": 1.974, + "step": 438 + }, + { + "em_boolean": 0.5882352941176471, + "em_chat": 0.05945945945945946, + "em_extraction": 0.4666666666666667, + "em_qa": 0.5333333333333333, + "em_summarization": 0.0, + "em_unanswerable": 0.0, + "epoch": 3.0, + "mean_em": 0.13584905660377358, + "mean_similarity": 0.7036299928109039, + "mean_word_count_diff": 11.230188679245282, + "similarity_boolean": 0.702288385699777, + "similarity_chat": 0.6661249467549292, + "similarity_extraction": 0.7363929619391759, + "similarity_qa": 0.9095388889312744, + "similarity_summarization": 0.7749250100718604, + "similarity_unanswerable": 0.8434868295987447, + "word_count_diff_boolean": 1.8823529411764706, + "word_count_diff_chat": 10.41081081081081, + "word_count_diff_extraction": 1.4, + "word_count_diff_qa": 2.8666666666666667, + "word_count_diff_summarization": 50.111111111111114, + "word_count_diff_unanswerable": 3.466666666666667 + }, + { + "epoch": 3.0, + "grad_norm": 0.4190770983695984, + "learning_rate": 2.9365689308796065e-05, + "loss": 0.0366, + "step": 657 + }, + { + "epoch": 3.0, + "eval_loss": 0.04255302622914314, + "eval_runtime": 17.2484, + "eval_samples_per_second": 15.364, + "eval_steps_per_second": 1.971, + "step": 657 + }, + { + "em_boolean": 0.8823529411764706, + "em_chat": 0.05405405405405406, + "em_extraction": 0.6666666666666666, + "em_qa": 0.3333333333333333, + "em_summarization": 0.0, + "em_unanswerable": 0.0, + "epoch": 4.0, + "mean_em": 0.1509433962264151, + "mean_similarity": 0.7214625703522338, + "mean_word_count_diff": 11.275471698113208, + "similarity_boolean": 0.9250823608325685, + "similarity_chat": 0.6629158062407294, + "similarity_extraction": 0.9287973960240682, + "similarity_qa": 0.8280642042557399, + "similarity_summarization": 0.7930781609482236, + "similarity_unanswerable": 0.8128950635592143, + "word_count_diff_boolean": 0.5882352941176471, + "word_count_diff_chat": 10.556756756756757, + "word_count_diff_extraction": 0.4666666666666667, + "word_count_diff_qa": 3.2, + "word_count_diff_summarization": 50.27777777777778, + "word_count_diff_unanswerable": 4.333333333333333 + }, + { + "epoch": 4.0, + "grad_norm": 0.34906384348869324, + "learning_rate": 2.6482696742411827e-05, + "loss": 0.0301, + "step": 876 + }, + { + "epoch": 4.0, + "eval_loss": 0.042461033910512924, + "eval_runtime": 17.2294, + "eval_samples_per_second": 15.381, + "eval_steps_per_second": 1.973, + "step": 876 + }, + { + "em_boolean": 0.8823529411764706, + "em_chat": 0.07027027027027027, + "em_extraction": 0.4, + "em_qa": 0.6, + "em_summarization": 0.0, + "em_unanswerable": 0.0, + "epoch": 5.0, + "mean_em": 0.16226415094339622, + "mean_similarity": 0.7417481038367973, + "mean_word_count_diff": 10.89811320754717, + "similarity_boolean": 0.9421353953726151, + "similarity_chat": 0.6902955391922513, + "similarity_extraction": 0.7581887672344844, + "similarity_qa": 0.9694860418637593, + "similarity_summarization": 0.795685844288932, + "similarity_unanswerable": 0.840320247411728, + "word_count_diff_boolean": 0.0, + "word_count_diff_chat": 10.04864864864865, + "word_count_diff_extraction": 2.066666666666667, + "word_count_diff_qa": 2.3333333333333335, + "word_count_diff_summarization": 50.111111111111114, + "word_count_diff_unanswerable": 4.066666666666666 + }, + { + "epoch": 5.0, + "grad_norm": 0.2575681209564209, + "learning_rate": 2.17227572135781e-05, + "loss": 0.0252, + "step": 1095 + }, + { + "epoch": 5.0, + "eval_loss": 0.043407145887613297, + "eval_runtime": 17.2466, + "eval_samples_per_second": 15.365, + "eval_steps_per_second": 1.971, + "step": 1095 + }, + { + "em_boolean": 0.8235294117647058, + "em_chat": 0.04864864864864865, + "em_extraction": 0.4666666666666667, + "em_qa": 0.3333333333333333, + "em_summarization": 0.0, + "em_unanswerable": 0.0, + "epoch": 6.0, + "mean_em": 0.1320754716981132, + "mean_similarity": 0.712902327184126, + "mean_word_count_diff": 11.09433962264151, + "similarity_boolean": 0.8407609323587488, + "similarity_chat": 0.6657742724023961, + "similarity_extraction": 0.8315838446219762, + "similarity_qa": 0.8299160649379095, + "similarity_summarization": 0.7715526024500529, + "similarity_unanswerable": 0.8431663314501444, + "word_count_diff_boolean": 1.2352941176470589, + "word_count_diff_chat": 10.183783783783785, + "word_count_diff_extraction": 1.5333333333333334, + "word_count_diff_qa": 3.2, + "word_count_diff_summarization": 50.22222222222222, + "word_count_diff_unanswerable": 4.0 + }, + { + "epoch": 6.0, + "grad_norm": 0.08031666278839111, + "learning_rate": 1.586392436600814e-05, + "loss": 0.0221, + "step": 1314 + }, + { + "epoch": 6.0, + "eval_loss": 0.04415750876069069, + "eval_runtime": 17.2236, + "eval_samples_per_second": 15.386, + "eval_steps_per_second": 1.974, + "step": 1314 + }, + { + "em_boolean": 0.8823529411764706, + "em_chat": 0.07567567567567568, + "em_extraction": 0.4, + "em_qa": 0.4, + "em_summarization": 0.0, + "em_unanswerable": 0.0, + "epoch": 7.0, + "mean_em": 0.15471698113207547, + "mean_similarity": 0.7240072643349194, + "mean_word_count_diff": 11.026415094339622, + "similarity_boolean": 0.8786488135947901, + "similarity_chat": 0.6825028176545291, + "similarity_extraction": 0.7764622618754705, + "similarity_qa": 0.8644786556561788, + "similarity_summarization": 0.7805107865068648, + "similarity_unanswerable": 0.7999044020970663, + "word_count_diff_boolean": 1.1764705882352942, + "word_count_diff_chat": 10.151351351351352, + "word_count_diff_extraction": 1.5333333333333334, + "word_count_diff_qa": 3.1333333333333333, + "word_count_diff_summarization": 50.388888888888886, + "word_count_diff_unanswerable": 3.1333333333333333 + }, + { + "epoch": 7.0, + "grad_norm": 0.21582281589508057, + "learning_rate": 9.863875543572635e-06, + "loss": 0.0193, + "step": 1533 + }, + { + "epoch": 7.0, + "eval_loss": 0.0449073351919651, + "eval_runtime": 17.225, + "eval_samples_per_second": 15.385, + "eval_steps_per_second": 1.974, + "step": 1533 + }, + { + "em_boolean": 0.8823529411764706, + "em_chat": 0.05945945945945946, + "em_extraction": 0.6, + "em_qa": 0.3333333333333333, + "em_summarization": 0.0, + "em_unanswerable": 0.0, + "epoch": 8.0, + "mean_em": 0.1509433962264151, + "mean_similarity": 0.7287208422667013, + "mean_word_count_diff": 11.075471698113208, + "similarity_boolean": 0.9135467927245533, + "similarity_chat": 0.6845327336139776, + "similarity_extraction": 0.8388477871815364, + "similarity_qa": 0.8482021888097128, + "similarity_summarization": 0.7841283877690634, + "similarity_unanswerable": 0.7681407590707143, + "word_count_diff_boolean": 0.7647058823529411, + "word_count_diff_chat": 10.254054054054054, + "word_count_diff_extraction": 1.7333333333333334, + "word_count_diff_qa": 3.466666666666667, + "word_count_diff_summarization": 49.888888888888886, + "word_count_diff_unanswerable": 3.2666666666666666 + }, + { + "epoch": 8.0, + "grad_norm": 0.22318905591964722, + "learning_rate": 4.703371073261941e-06, + "loss": 0.0176, + "step": 1752 + }, + { + "epoch": 8.0, + "eval_loss": 0.04609951004385948, + "eval_runtime": 17.2261, + "eval_samples_per_second": 15.384, + "eval_steps_per_second": 1.974, + "step": 1752 + }, + { + "em_boolean": 0.8235294117647058, + "em_chat": 0.0972972972972973, + "em_extraction": 0.5333333333333333, + "em_qa": 0.3333333333333333, + "em_summarization": 0.0, + "em_unanswerable": 0.0, + "epoch": 9.0, + "mean_em": 0.16981132075471697, + "mean_similarity": 0.7446535826572832, + "mean_word_count_diff": 10.928301886792452, + "similarity_boolean": 0.8877067903385443, + "similarity_chat": 0.6942617533174721, + "similarity_extraction": 0.8046925206979115, + "similarity_qa": 0.9396826108296712, + "similarity_summarization": 0.8152211805184683, + "similarity_unanswerable": 0.8642767588297526, + "word_count_diff_boolean": 0.8823529411764706, + "word_count_diff_chat": 9.962162162162162, + "word_count_diff_extraction": 2.2666666666666666, + "word_count_diff_qa": 3.3333333333333335, + "word_count_diff_summarization": 49.888888888888886, + "word_count_diff_unanswerable": 3.7333333333333334 + }, + { + "epoch": 9.0, + "grad_norm": 0.6055392026901245, + "learning_rate": 1.2259404331716567e-06, + "loss": 0.017, + "step": 1971 + }, + { + "epoch": 9.0, + "eval_loss": 0.046519145369529724, + "eval_runtime": 17.2316, + "eval_samples_per_second": 15.379, + "eval_steps_per_second": 1.973, + "step": 1971 + } + ], + "logging_steps": 500, + "max_steps": 2190, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.8108561789812736e+16, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1971/training_args.bin b/checkpoint-1971/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d977d8d4afc5bd92bf7fc4298b8866c8a6c8438c --- /dev/null +++ b/checkpoint-1971/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f96c10a0a2b43c05318a394de2f8a40b06b79bafe7c52911b0bd4f1a90733fe +size 5304 diff --git a/checkpoint-219/added_tokens.json b/checkpoint-219/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..3f5132007c4fcf42b75b65c8b6aa49c7098bcdf4 --- /dev/null +++ b/checkpoint-219/added_tokens.json @@ -0,0 +1,102 @@ +{ + "": 32099, + "": 32089, + "": 32088, + "": 32087, + "": 32086, + "": 32085, + "": 32084, + "": 32083, + "": 32082, + "": 32081, + "": 32080, + "": 32098, + "": 32079, + "": 32078, + "": 32077, + "": 32076, + "": 32075, + "": 32074, + "": 32073, + "": 32072, + "": 32071, + "": 32070, + "": 32097, + "": 32069, + "": 32068, + "": 32067, + "": 32066, + "": 32065, + "": 32064, + "": 32063, + "": 32062, + "": 32061, + "": 32060, + "": 32096, + "": 32059, + "": 32058, + "": 32057, + "": 32056, + "": 32055, + "": 32054, + "": 32053, + "": 32052, + "": 32051, + "": 32050, + "": 32095, + "": 32049, + "": 32048, + "": 32047, + "": 32046, + "": 32045, + "": 32044, + "": 32043, + "": 32042, + "": 32041, + "": 32040, + "": 32094, + "": 32039, + "": 32038, + "": 32037, + "": 32036, + "": 32035, + "": 32034, + "": 32033, + "": 32032, + "": 32031, + "": 32030, + "": 32093, + "": 32029, + "": 32028, + "": 32027, + "": 32026, + "": 32025, + "": 32024, + "": 32023, + "": 32022, + "": 32021, + "": 32020, + "": 32092, + "": 32019, + "": 32018, + "": 32017, + "": 32016, + "": 32015, + "": 32014, + "": 32013, + "": 32012, + "": 32011, + "": 32010, + "": 32091, + "": 32009, + "": 32008, + "": 32007, + "": 32006, + "": 32005, + "": 32004, + "": 32003, + "": 32002, + "": 32001, + "": 32000, + "": 32090 +} diff --git a/checkpoint-219/config.json b/checkpoint-219/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7adeb5014f60a4a25c0c42eb288f6284e773e6dd --- /dev/null +++ b/checkpoint-219/config.json @@ -0,0 +1,33 @@ +{ + "_name_or_path": "teapotai/teapotllm", + "architectures": [ + "T5ForConditionalGeneration" + ], + "classifier_dropout": 0.0, + "d_ff": 2816, + "d_kv": 64, + "d_model": 1024, + "decoder_start_token_id": 0, + "dense_act_fn": "gelu_new", + "dropout_rate": 0.1, + "eos_token_id": 1, + "feed_forward_proj": "gated-gelu", + "initializer_factor": 1.0, + "is_encoder_decoder": true, + "is_gated_act": true, + "layer_norm_epsilon": 1e-06, + "model_type": "t5", + "n_positions": 512, + "num_decoder_layers": 24, + "num_heads": 16, + "num_layers": 24, + "output_past": true, + "pad_token_id": 0, + "relative_attention_max_distance": 128, + "relative_attention_num_buckets": 32, + "tie_word_embeddings": false, + "torch_dtype": "float32", + "transformers_version": "4.48.3", + "use_cache": true, + "vocab_size": 32128 +} diff --git a/checkpoint-219/generation_config.json b/checkpoint-219/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..058f73f97f63923f298d59a1d6b4f78f510c5146 --- /dev/null +++ b/checkpoint-219/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "decoder_start_token_id": 0, + "eos_token_id": 1, + "pad_token_id": 0, + "transformers_version": "4.48.3" +} diff --git a/checkpoint-219/model.safetensors b/checkpoint-219/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..09312afa343d5300a2b7fd529e3c86f8b9a481b6 --- /dev/null +++ b/checkpoint-219/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00f35e22b80a2386aad318d1a6c5c589354e1037b3f83600ffc1277a371e3650 +size 3132668808 diff --git a/checkpoint-219/optimizer.pt b/checkpoint-219/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..121bc7f93064df6c7034c538c9d13e4c2ba60e36 --- /dev/null +++ b/checkpoint-219/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fadaf1dc768a62f6a6509a767952c79c152c7b25f7c5a1df59fed73744fc4fc +size 6265677800 diff --git a/checkpoint-219/rng_state.pth b/checkpoint-219/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..5b32474f0f3026f4f061131d126d2283833f284f --- /dev/null +++ b/checkpoint-219/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f01d90a65c227f201a2b2ac2945e0efc408b57ba651a6916a32fd0267cc54bc +size 14244 diff --git a/checkpoint-219/scheduler.pt b/checkpoint-219/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..e5532e40e1229910e723e04829219eca13dffa5a --- /dev/null +++ b/checkpoint-219/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a37dd755bdcc9f693fea64ebc0ad1428620c5d059554e86f7289044096fa47c +size 1064 diff --git a/checkpoint-219/special_tokens_map.json b/checkpoint-219/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..17ade346a1042cbe0c1436f5bedcbd85c099d582 --- /dev/null +++ b/checkpoint-219/special_tokens_map.json @@ -0,0 +1,125 @@ +{ + "additional_special_tokens": [ + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + ], + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-219/spiece.model b/checkpoint-219/spiece.model new file mode 100644 index 0000000000000000000000000000000000000000..317a5ccbde45300f5d1d970d4d449af2108b147e --- /dev/null +++ b/checkpoint-219/spiece.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86 +size 791656 diff --git a/checkpoint-219/tokenizer_config.json b/checkpoint-219/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..7d9f418ab1e49d1eaa3832e9970c9c503f565484 --- /dev/null +++ b/checkpoint-219/tokenizer_config.json @@ -0,0 +1,941 @@ +{ + "add_prefix_space": true, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32000": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32001": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32002": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32003": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32004": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32005": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32006": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32007": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32008": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32009": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32010": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32011": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32012": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32013": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32014": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32015": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32016": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32017": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32018": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32019": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32020": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32021": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32022": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32023": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32024": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32025": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32026": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32027": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32028": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32029": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32030": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32031": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32032": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32033": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32034": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32035": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32036": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32037": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32038": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32039": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32040": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32041": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32042": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32043": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32044": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32045": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32046": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32047": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32048": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32049": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32050": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32051": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32052": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32053": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32054": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32055": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32056": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32057": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32058": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32059": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32060": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32061": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32062": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32063": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32064": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32065": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32066": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32067": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32068": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32069": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32070": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32071": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32072": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32073": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32074": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32075": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32076": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32077": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32078": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32079": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32080": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32081": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32082": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32083": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32084": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32085": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32086": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32087": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32088": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32089": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32090": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32091": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32092": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32093": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32094": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32095": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32096": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32097": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32098": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32099": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + ], + "clean_up_tokenization_spaces": false, + "eos_token": "", + "extra_ids": 100, + "extra_special_tokens": {}, + "legacy": true, + "model_max_length": 512, + "pad_token": "", + "sp_model_kwargs": {}, + "tokenizer_class": "T5Tokenizer", + "unk_token": "" +} diff --git a/checkpoint-219/trainer_state.json b/checkpoint-219/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..108047f47cfb7ca98b78db9391340ec7c4e62d0d --- /dev/null +++ b/checkpoint-219/trainer_state.json @@ -0,0 +1,72 @@ +{ + "best_metric": 0.04732182249426842, + "best_model_checkpoint": "./teapotllm/checkpoint-219", + "epoch": 1.0, + "eval_steps": 500, + "global_step": 219, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "em_boolean": 0.5294117647058824, + "em_chat": 0.03783783783783784, + "em_extraction": 0.5333333333333333, + "em_qa": 0.5333333333333333, + "em_summarization": 0.0, + "em_unanswerable": 0.0, + "epoch": 1.0, + "mean_em": 0.12075471698113208, + "mean_similarity": 0.6624939311014594, + "mean_word_count_diff": 11.958490566037735, + "similarity_boolean": 0.649950959993636, + "similarity_chat": 0.6068178282194846, + "similarity_extraction": 0.8274670541286469, + "similarity_qa": 0.8895131280024846, + "similarity_summarization": 0.7897715005609725, + "similarity_unanswerable": 0.8186558306217193, + "word_count_diff_boolean": 2.764705882352941, + "word_count_diff_chat": 11.275675675675675, + "word_count_diff_extraction": 1.8666666666666667, + "word_count_diff_qa": 2.7333333333333334, + "word_count_diff_summarization": 50.05555555555556, + "word_count_diff_unanswerable": 4.4 + }, + { + "epoch": 1.0, + "grad_norm": 0.29226627945899963, + "learning_rate": 1.314e-05, + "loss": 0.0548, + "step": 219 + }, + { + "epoch": 1.0, + "eval_loss": 0.04732182249426842, + "eval_runtime": 17.2425, + "eval_samples_per_second": 15.369, + "eval_steps_per_second": 1.972, + "step": 219 + } + ], + "logging_steps": 500, + "max_steps": 2190, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2012062421090304.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-219/training_args.bin b/checkpoint-219/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d977d8d4afc5bd92bf7fc4298b8866c8a6c8438c --- /dev/null +++ b/checkpoint-219/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f96c10a0a2b43c05318a394de2f8a40b06b79bafe7c52911b0bd4f1a90733fe +size 5304 diff --git a/checkpoint-2190/added_tokens.json b/checkpoint-2190/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..3f5132007c4fcf42b75b65c8b6aa49c7098bcdf4 --- /dev/null +++ b/checkpoint-2190/added_tokens.json @@ -0,0 +1,102 @@ +{ + "": 32099, + "": 32089, + "": 32088, + "": 32087, + "": 32086, + "": 32085, + "": 32084, + "": 32083, + "": 32082, + "": 32081, + "": 32080, + "": 32098, + "": 32079, + "": 32078, + "": 32077, + "": 32076, + "": 32075, + "": 32074, + "": 32073, + "": 32072, + "": 32071, + "": 32070, + "": 32097, + "": 32069, + "": 32068, + "": 32067, + "": 32066, + "": 32065, + "": 32064, + "": 32063, + "": 32062, + "": 32061, + "": 32060, + "": 32096, + "": 32059, + "": 32058, + "": 32057, + "": 32056, + "": 32055, + "": 32054, + "": 32053, + "": 32052, + "": 32051, + "": 32050, + "": 32095, + "": 32049, + "": 32048, + "": 32047, + "": 32046, + "": 32045, + "": 32044, + "": 32043, + "": 32042, + "": 32041, + "": 32040, + "": 32094, + "": 32039, + "": 32038, + "": 32037, + "": 32036, + "": 32035, + "": 32034, + "": 32033, + "": 32032, + "": 32031, + "": 32030, + "": 32093, + "": 32029, + "": 32028, + "": 32027, + "": 32026, + "": 32025, + "": 32024, + "": 32023, + "": 32022, + "": 32021, + "": 32020, + "": 32092, + "": 32019, + "": 32018, + "": 32017, + "": 32016, + "": 32015, + "": 32014, + "": 32013, + "": 32012, + "": 32011, + "": 32010, + "": 32091, + "": 32009, + "": 32008, + "": 32007, + "": 32006, + "": 32005, + "": 32004, + "": 32003, + "": 32002, + "": 32001, + "": 32000, + "": 32090 +} diff --git a/checkpoint-2190/config.json b/checkpoint-2190/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7adeb5014f60a4a25c0c42eb288f6284e773e6dd --- /dev/null +++ b/checkpoint-2190/config.json @@ -0,0 +1,33 @@ +{ + "_name_or_path": "teapotai/teapotllm", + "architectures": [ + "T5ForConditionalGeneration" + ], + "classifier_dropout": 0.0, + "d_ff": 2816, + "d_kv": 64, + "d_model": 1024, + "decoder_start_token_id": 0, + "dense_act_fn": "gelu_new", + "dropout_rate": 0.1, + "eos_token_id": 1, + "feed_forward_proj": "gated-gelu", + "initializer_factor": 1.0, + "is_encoder_decoder": true, + "is_gated_act": true, + "layer_norm_epsilon": 1e-06, + "model_type": "t5", + "n_positions": 512, + "num_decoder_layers": 24, + "num_heads": 16, + "num_layers": 24, + "output_past": true, + "pad_token_id": 0, + "relative_attention_max_distance": 128, + "relative_attention_num_buckets": 32, + "tie_word_embeddings": false, + "torch_dtype": "float32", + "transformers_version": "4.48.3", + "use_cache": true, + "vocab_size": 32128 +} diff --git a/checkpoint-2190/generation_config.json b/checkpoint-2190/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..058f73f97f63923f298d59a1d6b4f78f510c5146 --- /dev/null +++ b/checkpoint-2190/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "decoder_start_token_id": 0, + "eos_token_id": 1, + "pad_token_id": 0, + "transformers_version": "4.48.3" +} diff --git a/checkpoint-2190/model.safetensors b/checkpoint-2190/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a75f06d76d4a34fd2059947a070b6385a09f999a --- /dev/null +++ b/checkpoint-2190/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0846740970fbe8eacfd8c29f649af264086197ca7520cf15dfc3f80b88f67221 +size 3132668808 diff --git a/checkpoint-2190/optimizer.pt b/checkpoint-2190/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..872a57c0d80951f7ea7f66e0201ec0f79ccfc0e9 --- /dev/null +++ b/checkpoint-2190/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42570d7f446f808cac06235f6bcb7e72e3dd131791d1b1f0bc3c52c27b4e3b65 +size 6265677800 diff --git a/checkpoint-2190/rng_state.pth b/checkpoint-2190/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..04100068e861242c1ac86e8ab7f0dd41dc3b74cb --- /dev/null +++ b/checkpoint-2190/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffeea2525ede8f66c564697cc013843061a6445a5a7865fca9605a36ead9bc28 +size 14244 diff --git a/checkpoint-2190/scheduler.pt b/checkpoint-2190/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f28ce57ea8560e743fdf3e57b8c94f9248dca8f3 --- /dev/null +++ b/checkpoint-2190/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:461097d8065e21e0cba5ed510963b1506a59f513e9b3f6cc5a26b313a9f41e91 +size 1064 diff --git a/checkpoint-2190/special_tokens_map.json b/checkpoint-2190/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..17ade346a1042cbe0c1436f5bedcbd85c099d582 --- /dev/null +++ b/checkpoint-2190/special_tokens_map.json @@ -0,0 +1,125 @@ +{ + "additional_special_tokens": [ + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + ], + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-2190/spiece.model b/checkpoint-2190/spiece.model new file mode 100644 index 0000000000000000000000000000000000000000..317a5ccbde45300f5d1d970d4d449af2108b147e --- /dev/null +++ b/checkpoint-2190/spiece.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86 +size 791656 diff --git a/checkpoint-2190/tokenizer_config.json b/checkpoint-2190/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..7d9f418ab1e49d1eaa3832e9970c9c503f565484 --- /dev/null +++ b/checkpoint-2190/tokenizer_config.json @@ -0,0 +1,941 @@ +{ + "add_prefix_space": true, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32000": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32001": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32002": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32003": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32004": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32005": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32006": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32007": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32008": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32009": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32010": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32011": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32012": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32013": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32014": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32015": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32016": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32017": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32018": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32019": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32020": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32021": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32022": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32023": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32024": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32025": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32026": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32027": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32028": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32029": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32030": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32031": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32032": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32033": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32034": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32035": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32036": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32037": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32038": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32039": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32040": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32041": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32042": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32043": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32044": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32045": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32046": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32047": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32048": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32049": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32050": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32051": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32052": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32053": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32054": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32055": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32056": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32057": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32058": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32059": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32060": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32061": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32062": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32063": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32064": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32065": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32066": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32067": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32068": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32069": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32070": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32071": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32072": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32073": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32074": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32075": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32076": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32077": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32078": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32079": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32080": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32081": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32082": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32083": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32084": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32085": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32086": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32087": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32088": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32089": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32090": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32091": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32092": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32093": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32094": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32095": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32096": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32097": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32098": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32099": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + ], + "clean_up_tokenization_spaces": false, + "eos_token": "", + "extra_ids": 100, + "extra_special_tokens": {}, + "legacy": true, + "model_max_length": 512, + "pad_token": "", + "sp_model_kwargs": {}, + "tokenizer_class": "T5Tokenizer", + "unk_token": "" +} diff --git a/checkpoint-2190/trainer_state.json b/checkpoint-2190/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..61dae8f3cd3b2abef73d12caa3d6ebb5a306cf2a --- /dev/null +++ b/checkpoint-2190/trainer_state.json @@ -0,0 +1,423 @@ +{ + "best_metric": 0.042461033910512924, + "best_model_checkpoint": "./teapotllm/checkpoint-876", + "epoch": 10.0, + "eval_steps": 500, + "global_step": 2190, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "em_boolean": 0.5294117647058824, + "em_chat": 0.03783783783783784, + "em_extraction": 0.5333333333333333, + "em_qa": 0.5333333333333333, + "em_summarization": 0.0, + "em_unanswerable": 0.0, + "epoch": 1.0, + "mean_em": 0.12075471698113208, + "mean_similarity": 0.6624939311014594, + "mean_word_count_diff": 11.958490566037735, + "similarity_boolean": 0.649950959993636, + "similarity_chat": 0.6068178282194846, + "similarity_extraction": 0.8274670541286469, + "similarity_qa": 0.8895131280024846, + "similarity_summarization": 0.7897715005609725, + "similarity_unanswerable": 0.8186558306217193, + "word_count_diff_boolean": 2.764705882352941, + "word_count_diff_chat": 11.275675675675675, + "word_count_diff_extraction": 1.8666666666666667, + "word_count_diff_qa": 2.7333333333333334, + "word_count_diff_summarization": 50.05555555555556, + "word_count_diff_unanswerable": 4.4 + }, + { + "epoch": 1.0, + "grad_norm": 0.29226627945899963, + "learning_rate": 1.314e-05, + "loss": 0.0548, + "step": 219 + }, + { + "epoch": 1.0, + "eval_loss": 0.04732182249426842, + "eval_runtime": 17.2425, + "eval_samples_per_second": 15.369, + "eval_steps_per_second": 1.972, + "step": 219 + }, + { + "em_boolean": 0.7058823529411765, + "em_chat": 0.05405405405405406, + "em_extraction": 0.4, + "em_qa": 0.4, + "em_summarization": 0.0, + "em_unanswerable": 0.0, + "epoch": 2.0, + "mean_em": 0.12830188679245283, + "mean_similarity": 0.6936857629426808, + "mean_word_count_diff": 11.475471698113207, + "similarity_boolean": 0.7671171505661571, + "similarity_chat": 0.6481531772158435, + "similarity_extraction": 0.7221033732096355, + "similarity_qa": 0.8645438591639201, + "similarity_summarization": 0.824706514676412, + "similarity_unanswerable": 0.8155314723650614, + "word_count_diff_boolean": 2.7058823529411766, + "word_count_diff_chat": 10.556756756756757, + "word_count_diff_extraction": 2.933333333333333, + "word_count_diff_qa": 3.2, + "word_count_diff_summarization": 50.166666666666664, + "word_count_diff_unanswerable": 3.1333333333333333 + }, + { + "epoch": 2.0, + "grad_norm": 0.30153563618659973, + "learning_rate": 2.628e-05, + "loss": 0.0445, + "step": 438 + }, + { + "epoch": 2.0, + "eval_loss": 0.04412226751446724, + "eval_runtime": 17.2255, + "eval_samples_per_second": 15.384, + "eval_steps_per_second": 1.974, + "step": 438 + }, + { + "em_boolean": 0.5882352941176471, + "em_chat": 0.05945945945945946, + "em_extraction": 0.4666666666666667, + "em_qa": 0.5333333333333333, + "em_summarization": 0.0, + "em_unanswerable": 0.0, + "epoch": 3.0, + "mean_em": 0.13584905660377358, + "mean_similarity": 0.7036299928109039, + "mean_word_count_diff": 11.230188679245282, + "similarity_boolean": 0.702288385699777, + "similarity_chat": 0.6661249467549292, + "similarity_extraction": 0.7363929619391759, + "similarity_qa": 0.9095388889312744, + "similarity_summarization": 0.7749250100718604, + "similarity_unanswerable": 0.8434868295987447, + "word_count_diff_boolean": 1.8823529411764706, + "word_count_diff_chat": 10.41081081081081, + "word_count_diff_extraction": 1.4, + "word_count_diff_qa": 2.8666666666666667, + "word_count_diff_summarization": 50.111111111111114, + "word_count_diff_unanswerable": 3.466666666666667 + }, + { + "epoch": 3.0, + "grad_norm": 0.4190770983695984, + "learning_rate": 2.9365689308796065e-05, + "loss": 0.0366, + "step": 657 + }, + { + "epoch": 3.0, + "eval_loss": 0.04255302622914314, + "eval_runtime": 17.2484, + "eval_samples_per_second": 15.364, + "eval_steps_per_second": 1.971, + "step": 657 + }, + { + "em_boolean": 0.8823529411764706, + "em_chat": 0.05405405405405406, + "em_extraction": 0.6666666666666666, + "em_qa": 0.3333333333333333, + "em_summarization": 0.0, + "em_unanswerable": 0.0, + "epoch": 4.0, + "mean_em": 0.1509433962264151, + "mean_similarity": 0.7214625703522338, + "mean_word_count_diff": 11.275471698113208, + "similarity_boolean": 0.9250823608325685, + "similarity_chat": 0.6629158062407294, + "similarity_extraction": 0.9287973960240682, + "similarity_qa": 0.8280642042557399, + "similarity_summarization": 0.7930781609482236, + "similarity_unanswerable": 0.8128950635592143, + "word_count_diff_boolean": 0.5882352941176471, + "word_count_diff_chat": 10.556756756756757, + "word_count_diff_extraction": 0.4666666666666667, + "word_count_diff_qa": 3.2, + "word_count_diff_summarization": 50.27777777777778, + "word_count_diff_unanswerable": 4.333333333333333 + }, + { + "epoch": 4.0, + "grad_norm": 0.34906384348869324, + "learning_rate": 2.6482696742411827e-05, + "loss": 0.0301, + "step": 876 + }, + { + "epoch": 4.0, + "eval_loss": 0.042461033910512924, + "eval_runtime": 17.2294, + "eval_samples_per_second": 15.381, + "eval_steps_per_second": 1.973, + "step": 876 + }, + { + "em_boolean": 0.8823529411764706, + "em_chat": 0.07027027027027027, + "em_extraction": 0.4, + "em_qa": 0.6, + "em_summarization": 0.0, + "em_unanswerable": 0.0, + "epoch": 5.0, + "mean_em": 0.16226415094339622, + "mean_similarity": 0.7417481038367973, + "mean_word_count_diff": 10.89811320754717, + "similarity_boolean": 0.9421353953726151, + "similarity_chat": 0.6902955391922513, + "similarity_extraction": 0.7581887672344844, + "similarity_qa": 0.9694860418637593, + "similarity_summarization": 0.795685844288932, + "similarity_unanswerable": 0.840320247411728, + "word_count_diff_boolean": 0.0, + "word_count_diff_chat": 10.04864864864865, + "word_count_diff_extraction": 2.066666666666667, + "word_count_diff_qa": 2.3333333333333335, + "word_count_diff_summarization": 50.111111111111114, + "word_count_diff_unanswerable": 4.066666666666666 + }, + { + "epoch": 5.0, + "grad_norm": 0.2575681209564209, + "learning_rate": 2.17227572135781e-05, + "loss": 0.0252, + "step": 1095 + }, + { + "epoch": 5.0, + "eval_loss": 0.043407145887613297, + "eval_runtime": 17.2466, + "eval_samples_per_second": 15.365, + "eval_steps_per_second": 1.971, + "step": 1095 + }, + { + "em_boolean": 0.8235294117647058, + "em_chat": 0.04864864864864865, + "em_extraction": 0.4666666666666667, + "em_qa": 0.3333333333333333, + "em_summarization": 0.0, + "em_unanswerable": 0.0, + "epoch": 6.0, + "mean_em": 0.1320754716981132, + "mean_similarity": 0.712902327184126, + "mean_word_count_diff": 11.09433962264151, + "similarity_boolean": 0.8407609323587488, + "similarity_chat": 0.6657742724023961, + "similarity_extraction": 0.8315838446219762, + "similarity_qa": 0.8299160649379095, + "similarity_summarization": 0.7715526024500529, + "similarity_unanswerable": 0.8431663314501444, + "word_count_diff_boolean": 1.2352941176470589, + "word_count_diff_chat": 10.183783783783785, + "word_count_diff_extraction": 1.5333333333333334, + "word_count_diff_qa": 3.2, + "word_count_diff_summarization": 50.22222222222222, + "word_count_diff_unanswerable": 4.0 + }, + { + "epoch": 6.0, + "grad_norm": 0.08031666278839111, + "learning_rate": 1.586392436600814e-05, + "loss": 0.0221, + "step": 1314 + }, + { + "epoch": 6.0, + "eval_loss": 0.04415750876069069, + "eval_runtime": 17.2236, + "eval_samples_per_second": 15.386, + "eval_steps_per_second": 1.974, + "step": 1314 + }, + { + "em_boolean": 0.8823529411764706, + "em_chat": 0.07567567567567568, + "em_extraction": 0.4, + "em_qa": 0.4, + "em_summarization": 0.0, + "em_unanswerable": 0.0, + "epoch": 7.0, + "mean_em": 0.15471698113207547, + "mean_similarity": 0.7240072643349194, + "mean_word_count_diff": 11.026415094339622, + "similarity_boolean": 0.8786488135947901, + "similarity_chat": 0.6825028176545291, + "similarity_extraction": 0.7764622618754705, + "similarity_qa": 0.8644786556561788, + "similarity_summarization": 0.7805107865068648, + "similarity_unanswerable": 0.7999044020970663, + "word_count_diff_boolean": 1.1764705882352942, + "word_count_diff_chat": 10.151351351351352, + "word_count_diff_extraction": 1.5333333333333334, + "word_count_diff_qa": 3.1333333333333333, + "word_count_diff_summarization": 50.388888888888886, + "word_count_diff_unanswerable": 3.1333333333333333 + }, + { + "epoch": 7.0, + "grad_norm": 0.21582281589508057, + "learning_rate": 9.863875543572635e-06, + "loss": 0.0193, + "step": 1533 + }, + { + "epoch": 7.0, + "eval_loss": 0.0449073351919651, + "eval_runtime": 17.225, + "eval_samples_per_second": 15.385, + "eval_steps_per_second": 1.974, + "step": 1533 + }, + { + "em_boolean": 0.8823529411764706, + "em_chat": 0.05945945945945946, + "em_extraction": 0.6, + "em_qa": 0.3333333333333333, + "em_summarization": 0.0, + "em_unanswerable": 0.0, + "epoch": 8.0, + "mean_em": 0.1509433962264151, + "mean_similarity": 0.7287208422667013, + "mean_word_count_diff": 11.075471698113208, + "similarity_boolean": 0.9135467927245533, + "similarity_chat": 0.6845327336139776, + "similarity_extraction": 0.8388477871815364, + "similarity_qa": 0.8482021888097128, + "similarity_summarization": 0.7841283877690634, + "similarity_unanswerable": 0.7681407590707143, + "word_count_diff_boolean": 0.7647058823529411, + "word_count_diff_chat": 10.254054054054054, + "word_count_diff_extraction": 1.7333333333333334, + "word_count_diff_qa": 3.466666666666667, + "word_count_diff_summarization": 49.888888888888886, + "word_count_diff_unanswerable": 3.2666666666666666 + }, + { + "epoch": 8.0, + "grad_norm": 0.22318905591964722, + "learning_rate": 4.703371073261941e-06, + "loss": 0.0176, + "step": 1752 + }, + { + "epoch": 8.0, + "eval_loss": 0.04609951004385948, + "eval_runtime": 17.2261, + "eval_samples_per_second": 15.384, + "eval_steps_per_second": 1.974, + "step": 1752 + }, + { + "em_boolean": 0.8235294117647058, + "em_chat": 0.0972972972972973, + "em_extraction": 0.5333333333333333, + "em_qa": 0.3333333333333333, + "em_summarization": 0.0, + "em_unanswerable": 0.0, + "epoch": 9.0, + "mean_em": 0.16981132075471697, + "mean_similarity": 0.7446535826572832, + "mean_word_count_diff": 10.928301886792452, + "similarity_boolean": 0.8877067903385443, + "similarity_chat": 0.6942617533174721, + "similarity_extraction": 0.8046925206979115, + "similarity_qa": 0.9396826108296712, + "similarity_summarization": 0.8152211805184683, + "similarity_unanswerable": 0.8642767588297526, + "word_count_diff_boolean": 0.8823529411764706, + "word_count_diff_chat": 9.962162162162162, + "word_count_diff_extraction": 2.2666666666666666, + "word_count_diff_qa": 3.3333333333333335, + "word_count_diff_summarization": 49.888888888888886, + "word_count_diff_unanswerable": 3.7333333333333334 + }, + { + "epoch": 9.0, + "grad_norm": 0.6055392026901245, + "learning_rate": 1.2259404331716567e-06, + "loss": 0.017, + "step": 1971 + }, + { + "epoch": 9.0, + "eval_loss": 0.046519145369529724, + "eval_runtime": 17.2316, + "eval_samples_per_second": 15.379, + "eval_steps_per_second": 1.973, + "step": 1971 + }, + { + "em_boolean": 0.8823529411764706, + "em_chat": 0.07567567567567568, + "em_extraction": 0.4666666666666667, + "em_qa": 0.4666666666666667, + "em_summarization": 0.0, + "em_unanswerable": 0.0, + "epoch": 10.0, + "mean_em": 0.16226415094339622, + "mean_similarity": 0.740944116372826, + "mean_word_count_diff": 10.992452830188679, + "similarity_boolean": 0.8978819262017222, + "similarity_chat": 0.6907422283815371, + "similarity_extraction": 0.8148630116134882, + "similarity_qa": 0.9200653195381164, + "similarity_summarization": 0.8028387659125857, + "similarity_unanswerable": 0.854924205938975, + "word_count_diff_boolean": 0.8235294117647058, + "word_count_diff_chat": 10.162162162162161, + "word_count_diff_extraction": 1.4, + "word_count_diff_qa": 3.1333333333333333, + "word_count_diff_summarization": 50.05555555555556, + "word_count_diff_unanswerable": 3.3333333333333335 + }, + { + "epoch": 10.0, + "grad_norm": 0.11785141378641129, + "learning_rate": 0.0, + "loss": 0.0162, + "step": 2190 + }, + { + "epoch": 10.0, + "eval_loss": 0.0466282032430172, + "eval_runtime": 17.2316, + "eval_samples_per_second": 15.379, + "eval_steps_per_second": 1.973, + "step": 2190 + } + ], + "logging_steps": 500, + "max_steps": 2190, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 2.012062421090304e+16, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-2190/training_args.bin b/checkpoint-2190/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d977d8d4afc5bd92bf7fc4298b8866c8a6c8438c --- /dev/null +++ b/checkpoint-2190/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f96c10a0a2b43c05318a394de2f8a40b06b79bafe7c52911b0bd4f1a90733fe +size 5304 diff --git a/checkpoint-438/added_tokens.json b/checkpoint-438/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..3f5132007c4fcf42b75b65c8b6aa49c7098bcdf4 --- /dev/null +++ b/checkpoint-438/added_tokens.json @@ -0,0 +1,102 @@ +{ + "": 32099, + "": 32089, + "": 32088, + "": 32087, + "": 32086, + "": 32085, + "": 32084, + "": 32083, + "": 32082, + "": 32081, + "": 32080, + "": 32098, + "": 32079, + "": 32078, + "": 32077, + "": 32076, + "": 32075, + "": 32074, + "": 32073, + "": 32072, + "": 32071, + "": 32070, + "": 32097, + "": 32069, + "": 32068, + "": 32067, + "": 32066, + "": 32065, + "": 32064, + "": 32063, + "": 32062, + "": 32061, + "": 32060, + "": 32096, + "": 32059, + "": 32058, + "": 32057, + "": 32056, + "": 32055, + "": 32054, + "": 32053, + "": 32052, + "": 32051, + "": 32050, + "": 32095, + "": 32049, + "": 32048, + "": 32047, + "": 32046, + "": 32045, + "": 32044, + "": 32043, + "": 32042, + "": 32041, + "": 32040, + "": 32094, + "": 32039, + "": 32038, + "": 32037, + "": 32036, + "": 32035, + "": 32034, + "": 32033, + "": 32032, + "": 32031, + "": 32030, + "": 32093, + "": 32029, + "": 32028, + "": 32027, + "": 32026, + "": 32025, + "": 32024, + "": 32023, + "": 32022, + "": 32021, + "": 32020, + "": 32092, + "": 32019, + "": 32018, + "": 32017, + "": 32016, + "": 32015, + "": 32014, + "": 32013, + "": 32012, + "": 32011, + "": 32010, + "": 32091, + "": 32009, + "": 32008, + "": 32007, + "": 32006, + "": 32005, + "": 32004, + "": 32003, + "": 32002, + "": 32001, + "": 32000, + "": 32090 +} diff --git a/checkpoint-438/config.json b/checkpoint-438/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7adeb5014f60a4a25c0c42eb288f6284e773e6dd --- /dev/null +++ b/checkpoint-438/config.json @@ -0,0 +1,33 @@ +{ + "_name_or_path": "teapotai/teapotllm", + "architectures": [ + "T5ForConditionalGeneration" + ], + "classifier_dropout": 0.0, + "d_ff": 2816, + "d_kv": 64, + "d_model": 1024, + "decoder_start_token_id": 0, + "dense_act_fn": "gelu_new", + "dropout_rate": 0.1, + "eos_token_id": 1, + "feed_forward_proj": "gated-gelu", + "initializer_factor": 1.0, + "is_encoder_decoder": true, + "is_gated_act": true, + "layer_norm_epsilon": 1e-06, + "model_type": "t5", + "n_positions": 512, + "num_decoder_layers": 24, + "num_heads": 16, + "num_layers": 24, + "output_past": true, + "pad_token_id": 0, + "relative_attention_max_distance": 128, + "relative_attention_num_buckets": 32, + "tie_word_embeddings": false, + "torch_dtype": "float32", + "transformers_version": "4.48.3", + "use_cache": true, + "vocab_size": 32128 +} diff --git a/checkpoint-438/generation_config.json b/checkpoint-438/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..058f73f97f63923f298d59a1d6b4f78f510c5146 --- /dev/null +++ b/checkpoint-438/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "decoder_start_token_id": 0, + "eos_token_id": 1, + "pad_token_id": 0, + "transformers_version": "4.48.3" +} diff --git a/checkpoint-438/model.safetensors b/checkpoint-438/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4aaa7856f229cc8c18736a3c5d4c4c5a817e7732 --- /dev/null +++ b/checkpoint-438/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83c86865275a5d13b254d032fed420351e646b1246666cd57f4146428e49ffe5 +size 3132668808 diff --git a/checkpoint-438/optimizer.pt b/checkpoint-438/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..9da209c740318185c29239de5cf956f994faa2e2 --- /dev/null +++ b/checkpoint-438/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11df471ddd62ed1bdd32c48839c873add6d0081cc91aa463441402f6eb6e2a7b +size 6265677800 diff --git a/checkpoint-438/rng_state.pth b/checkpoint-438/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..fa768fd35856718b4aa14ff68c9d91ec3458449a --- /dev/null +++ b/checkpoint-438/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a11c84d8bfc297ab0923d0785a548e1163d804a5ded503156b5cde0f267cb41c +size 14244 diff --git a/checkpoint-438/scheduler.pt b/checkpoint-438/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b1762b3490b7ecae86274d51ae7361e3c8879b37 --- /dev/null +++ b/checkpoint-438/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6be4a87c4461613f43c5980fd3a146739e64e14b11901800a5b24b5368ab021 +size 1064 diff --git a/checkpoint-438/special_tokens_map.json b/checkpoint-438/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..17ade346a1042cbe0c1436f5bedcbd85c099d582 --- /dev/null +++ b/checkpoint-438/special_tokens_map.json @@ -0,0 +1,125 @@ +{ + "additional_special_tokens": [ + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + ], + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-438/spiece.model b/checkpoint-438/spiece.model new file mode 100644 index 0000000000000000000000000000000000000000..317a5ccbde45300f5d1d970d4d449af2108b147e --- /dev/null +++ b/checkpoint-438/spiece.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86 +size 791656 diff --git a/checkpoint-438/tokenizer_config.json b/checkpoint-438/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..7d9f418ab1e49d1eaa3832e9970c9c503f565484 --- /dev/null +++ b/checkpoint-438/tokenizer_config.json @@ -0,0 +1,941 @@ +{ + "add_prefix_space": true, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32000": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32001": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32002": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32003": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32004": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32005": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32006": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32007": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32008": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32009": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32010": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32011": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32012": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32013": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32014": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32015": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32016": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32017": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32018": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32019": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32020": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32021": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32022": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32023": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32024": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32025": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32026": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32027": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32028": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32029": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32030": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32031": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32032": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32033": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32034": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32035": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32036": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32037": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32038": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32039": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32040": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32041": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32042": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32043": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32044": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32045": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32046": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32047": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32048": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32049": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32050": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32051": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32052": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32053": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32054": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32055": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32056": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32057": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32058": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32059": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32060": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32061": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32062": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32063": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32064": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32065": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32066": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32067": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32068": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32069": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32070": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32071": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32072": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32073": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32074": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32075": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32076": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32077": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32078": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32079": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32080": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32081": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32082": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32083": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32084": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32085": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32086": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32087": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32088": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32089": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32090": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32091": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32092": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32093": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32094": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32095": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32096": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32097": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32098": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32099": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + ], + "clean_up_tokenization_spaces": false, + "eos_token": "", + "extra_ids": 100, + "extra_special_tokens": {}, + "legacy": true, + "model_max_length": 512, + "pad_token": "", + "sp_model_kwargs": {}, + "tokenizer_class": "T5Tokenizer", + "unk_token": "" +} diff --git a/checkpoint-438/trainer_state.json b/checkpoint-438/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..4b990c12c7ee9744b59ff503be7712f371f030f8 --- /dev/null +++ b/checkpoint-438/trainer_state.json @@ -0,0 +1,111 @@ +{ + "best_metric": 0.04412226751446724, + "best_model_checkpoint": "./teapotllm/checkpoint-438", + "epoch": 2.0, + "eval_steps": 500, + "global_step": 438, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "em_boolean": 0.5294117647058824, + "em_chat": 0.03783783783783784, + "em_extraction": 0.5333333333333333, + "em_qa": 0.5333333333333333, + "em_summarization": 0.0, + "em_unanswerable": 0.0, + "epoch": 1.0, + "mean_em": 0.12075471698113208, + "mean_similarity": 0.6624939311014594, + "mean_word_count_diff": 11.958490566037735, + "similarity_boolean": 0.649950959993636, + "similarity_chat": 0.6068178282194846, + "similarity_extraction": 0.8274670541286469, + "similarity_qa": 0.8895131280024846, + "similarity_summarization": 0.7897715005609725, + "similarity_unanswerable": 0.8186558306217193, + "word_count_diff_boolean": 2.764705882352941, + "word_count_diff_chat": 11.275675675675675, + "word_count_diff_extraction": 1.8666666666666667, + "word_count_diff_qa": 2.7333333333333334, + "word_count_diff_summarization": 50.05555555555556, + "word_count_diff_unanswerable": 4.4 + }, + { + "epoch": 1.0, + "grad_norm": 0.29226627945899963, + "learning_rate": 1.314e-05, + "loss": 0.0548, + "step": 219 + }, + { + "epoch": 1.0, + "eval_loss": 0.04732182249426842, + "eval_runtime": 17.2425, + "eval_samples_per_second": 15.369, + "eval_steps_per_second": 1.972, + "step": 219 + }, + { + "em_boolean": 0.7058823529411765, + "em_chat": 0.05405405405405406, + "em_extraction": 0.4, + "em_qa": 0.4, + "em_summarization": 0.0, + "em_unanswerable": 0.0, + "epoch": 2.0, + "mean_em": 0.12830188679245283, + "mean_similarity": 0.6936857629426808, + "mean_word_count_diff": 11.475471698113207, + "similarity_boolean": 0.7671171505661571, + "similarity_chat": 0.6481531772158435, + "similarity_extraction": 0.7221033732096355, + "similarity_qa": 0.8645438591639201, + "similarity_summarization": 0.824706514676412, + "similarity_unanswerable": 0.8155314723650614, + "word_count_diff_boolean": 2.7058823529411766, + "word_count_diff_chat": 10.556756756756757, + "word_count_diff_extraction": 2.933333333333333, + "word_count_diff_qa": 3.2, + "word_count_diff_summarization": 50.166666666666664, + "word_count_diff_unanswerable": 3.1333333333333333 + }, + { + "epoch": 2.0, + "grad_norm": 0.30153563618659973, + "learning_rate": 2.628e-05, + "loss": 0.0445, + "step": 438 + }, + { + "epoch": 2.0, + "eval_loss": 0.04412226751446724, + "eval_runtime": 17.2255, + "eval_samples_per_second": 15.384, + "eval_steps_per_second": 1.974, + "step": 438 + } + ], + "logging_steps": 500, + "max_steps": 2190, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4024124842180608.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-438/training_args.bin b/checkpoint-438/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d977d8d4afc5bd92bf7fc4298b8866c8a6c8438c --- /dev/null +++ b/checkpoint-438/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f96c10a0a2b43c05318a394de2f8a40b06b79bafe7c52911b0bd4f1a90733fe +size 5304 diff --git a/checkpoint-657/added_tokens.json b/checkpoint-657/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..3f5132007c4fcf42b75b65c8b6aa49c7098bcdf4 --- /dev/null +++ b/checkpoint-657/added_tokens.json @@ -0,0 +1,102 @@ +{ + "": 32099, + "": 32089, + "": 32088, + "": 32087, + "": 32086, + "": 32085, + "": 32084, + "": 32083, + "": 32082, + "": 32081, + "": 32080, + "": 32098, + "": 32079, + "": 32078, + "": 32077, + "": 32076, + "": 32075, + "": 32074, + "": 32073, + "": 32072, + "": 32071, + "": 32070, + "": 32097, + "": 32069, + "": 32068, + "": 32067, + "": 32066, + "": 32065, + "": 32064, + "": 32063, + "": 32062, + "": 32061, + "": 32060, + "": 32096, + "": 32059, + "": 32058, + "": 32057, + "": 32056, + "": 32055, + "": 32054, + "": 32053, + "": 32052, + "": 32051, + "": 32050, + "": 32095, + "": 32049, + "": 32048, + "": 32047, + "": 32046, + "": 32045, + "": 32044, + "": 32043, + "": 32042, + "": 32041, + "": 32040, + "": 32094, + "": 32039, + "": 32038, + "": 32037, + "": 32036, + "": 32035, + "": 32034, + "": 32033, + "": 32032, + "": 32031, + "": 32030, + "": 32093, + "": 32029, + "": 32028, + "": 32027, + "": 32026, + "": 32025, + "": 32024, + "": 32023, + "": 32022, + "": 32021, + "": 32020, + "": 32092, + "": 32019, + "": 32018, + "": 32017, + "": 32016, + "": 32015, + "": 32014, + "": 32013, + "": 32012, + "": 32011, + "": 32010, + "": 32091, + "": 32009, + "": 32008, + "": 32007, + "": 32006, + "": 32005, + "": 32004, + "": 32003, + "": 32002, + "": 32001, + "": 32000, + "": 32090 +} diff --git a/checkpoint-657/config.json b/checkpoint-657/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7adeb5014f60a4a25c0c42eb288f6284e773e6dd --- /dev/null +++ b/checkpoint-657/config.json @@ -0,0 +1,33 @@ +{ + "_name_or_path": "teapotai/teapotllm", + "architectures": [ + "T5ForConditionalGeneration" + ], + "classifier_dropout": 0.0, + "d_ff": 2816, + "d_kv": 64, + "d_model": 1024, + "decoder_start_token_id": 0, + "dense_act_fn": "gelu_new", + "dropout_rate": 0.1, + "eos_token_id": 1, + "feed_forward_proj": "gated-gelu", + "initializer_factor": 1.0, + "is_encoder_decoder": true, + "is_gated_act": true, + "layer_norm_epsilon": 1e-06, + "model_type": "t5", + "n_positions": 512, + "num_decoder_layers": 24, + "num_heads": 16, + "num_layers": 24, + "output_past": true, + "pad_token_id": 0, + "relative_attention_max_distance": 128, + "relative_attention_num_buckets": 32, + "tie_word_embeddings": false, + "torch_dtype": "float32", + "transformers_version": "4.48.3", + "use_cache": true, + "vocab_size": 32128 +} diff --git a/checkpoint-657/generation_config.json b/checkpoint-657/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..058f73f97f63923f298d59a1d6b4f78f510c5146 --- /dev/null +++ b/checkpoint-657/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "decoder_start_token_id": 0, + "eos_token_id": 1, + "pad_token_id": 0, + "transformers_version": "4.48.3" +} diff --git a/checkpoint-657/model.safetensors b/checkpoint-657/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6987ea70542dbf23d76cf3ad45be42472098a1d8 --- /dev/null +++ b/checkpoint-657/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c099f429c04daa96b4b657b837f7a593873bc706675c715e00b680c936c773a +size 3132668808 diff --git a/checkpoint-657/optimizer.pt b/checkpoint-657/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..af9cbae8b7abaf533182f0a7e9834173514b2af0 --- /dev/null +++ b/checkpoint-657/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f46a51d5089c4c095dcb9881feee1dbf83fd8dee0c0a75172e500a21dc04a693 +size 6265677800 diff --git a/checkpoint-657/rng_state.pth b/checkpoint-657/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e4dae4de9e7f9f5bed28d79193be052c2275ea0a --- /dev/null +++ b/checkpoint-657/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe4b6eb90f42318515e947da2e961f76d1f951f9ece9ad709a7c9896bd3d0124 +size 14244 diff --git a/checkpoint-657/scheduler.pt b/checkpoint-657/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..3b5a8a54f44e01a36758487a8597b24f50c271ac --- /dev/null +++ b/checkpoint-657/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:323388c6eb0e274e184a912acedc8c2c12826d28572b32c45f62db62a5e0c2fc +size 1064 diff --git a/checkpoint-657/special_tokens_map.json b/checkpoint-657/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..17ade346a1042cbe0c1436f5bedcbd85c099d582 --- /dev/null +++ b/checkpoint-657/special_tokens_map.json @@ -0,0 +1,125 @@ +{ + "additional_special_tokens": [ + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + ], + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-657/spiece.model b/checkpoint-657/spiece.model new file mode 100644 index 0000000000000000000000000000000000000000..317a5ccbde45300f5d1d970d4d449af2108b147e --- /dev/null +++ b/checkpoint-657/spiece.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86 +size 791656 diff --git a/checkpoint-657/tokenizer_config.json b/checkpoint-657/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..7d9f418ab1e49d1eaa3832e9970c9c503f565484 --- /dev/null +++ b/checkpoint-657/tokenizer_config.json @@ -0,0 +1,941 @@ +{ + "add_prefix_space": true, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32000": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32001": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32002": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32003": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32004": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32005": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32006": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32007": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32008": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32009": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32010": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32011": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32012": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32013": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32014": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32015": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32016": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32017": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32018": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32019": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32020": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32021": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32022": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32023": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32024": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32025": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32026": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32027": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32028": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32029": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32030": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32031": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32032": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32033": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32034": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32035": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32036": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32037": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32038": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32039": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32040": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32041": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32042": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32043": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32044": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32045": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32046": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32047": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32048": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32049": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32050": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32051": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32052": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32053": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32054": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32055": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32056": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32057": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32058": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32059": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32060": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32061": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32062": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32063": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32064": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32065": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32066": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32067": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32068": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32069": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32070": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32071": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32072": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32073": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32074": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32075": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32076": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32077": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32078": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32079": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32080": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32081": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32082": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32083": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32084": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32085": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32086": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32087": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32088": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32089": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32090": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32091": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32092": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32093": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32094": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32095": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32096": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32097": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32098": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32099": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + ], + "clean_up_tokenization_spaces": false, + "eos_token": "", + "extra_ids": 100, + "extra_special_tokens": {}, + "legacy": true, + "model_max_length": 512, + "pad_token": "", + "sp_model_kwargs": {}, + "tokenizer_class": "T5Tokenizer", + "unk_token": "" +} diff --git a/checkpoint-657/trainer_state.json b/checkpoint-657/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..523c4c87b6c8b6f213677c9552a6b0b89493fffe --- /dev/null +++ b/checkpoint-657/trainer_state.json @@ -0,0 +1,150 @@ +{ + "best_metric": 0.04255302622914314, + "best_model_checkpoint": "./teapotllm/checkpoint-657", + "epoch": 3.0, + "eval_steps": 500, + "global_step": 657, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "em_boolean": 0.5294117647058824, + "em_chat": 0.03783783783783784, + "em_extraction": 0.5333333333333333, + "em_qa": 0.5333333333333333, + "em_summarization": 0.0, + "em_unanswerable": 0.0, + "epoch": 1.0, + "mean_em": 0.12075471698113208, + "mean_similarity": 0.6624939311014594, + "mean_word_count_diff": 11.958490566037735, + "similarity_boolean": 0.649950959993636, + "similarity_chat": 0.6068178282194846, + "similarity_extraction": 0.8274670541286469, + "similarity_qa": 0.8895131280024846, + "similarity_summarization": 0.7897715005609725, + "similarity_unanswerable": 0.8186558306217193, + "word_count_diff_boolean": 2.764705882352941, + "word_count_diff_chat": 11.275675675675675, + "word_count_diff_extraction": 1.8666666666666667, + "word_count_diff_qa": 2.7333333333333334, + "word_count_diff_summarization": 50.05555555555556, + "word_count_diff_unanswerable": 4.4 + }, + { + "epoch": 1.0, + "grad_norm": 0.29226627945899963, + "learning_rate": 1.314e-05, + "loss": 0.0548, + "step": 219 + }, + { + "epoch": 1.0, + "eval_loss": 0.04732182249426842, + "eval_runtime": 17.2425, + "eval_samples_per_second": 15.369, + "eval_steps_per_second": 1.972, + "step": 219 + }, + { + "em_boolean": 0.7058823529411765, + "em_chat": 0.05405405405405406, + "em_extraction": 0.4, + "em_qa": 0.4, + "em_summarization": 0.0, + "em_unanswerable": 0.0, + "epoch": 2.0, + "mean_em": 0.12830188679245283, + "mean_similarity": 0.6936857629426808, + "mean_word_count_diff": 11.475471698113207, + "similarity_boolean": 0.7671171505661571, + "similarity_chat": 0.6481531772158435, + "similarity_extraction": 0.7221033732096355, + "similarity_qa": 0.8645438591639201, + "similarity_summarization": 0.824706514676412, + "similarity_unanswerable": 0.8155314723650614, + "word_count_diff_boolean": 2.7058823529411766, + "word_count_diff_chat": 10.556756756756757, + "word_count_diff_extraction": 2.933333333333333, + "word_count_diff_qa": 3.2, + "word_count_diff_summarization": 50.166666666666664, + "word_count_diff_unanswerable": 3.1333333333333333 + }, + { + "epoch": 2.0, + "grad_norm": 0.30153563618659973, + "learning_rate": 2.628e-05, + "loss": 0.0445, + "step": 438 + }, + { + "epoch": 2.0, + "eval_loss": 0.04412226751446724, + "eval_runtime": 17.2255, + "eval_samples_per_second": 15.384, + "eval_steps_per_second": 1.974, + "step": 438 + }, + { + "em_boolean": 0.5882352941176471, + "em_chat": 0.05945945945945946, + "em_extraction": 0.4666666666666667, + "em_qa": 0.5333333333333333, + "em_summarization": 0.0, + "em_unanswerable": 0.0, + "epoch": 3.0, + "mean_em": 0.13584905660377358, + "mean_similarity": 0.7036299928109039, + "mean_word_count_diff": 11.230188679245282, + "similarity_boolean": 0.702288385699777, + "similarity_chat": 0.6661249467549292, + "similarity_extraction": 0.7363929619391759, + "similarity_qa": 0.9095388889312744, + "similarity_summarization": 0.7749250100718604, + "similarity_unanswerable": 0.8434868295987447, + "word_count_diff_boolean": 1.8823529411764706, + "word_count_diff_chat": 10.41081081081081, + "word_count_diff_extraction": 1.4, + "word_count_diff_qa": 2.8666666666666667, + "word_count_diff_summarization": 50.111111111111114, + "word_count_diff_unanswerable": 3.466666666666667 + }, + { + "epoch": 3.0, + "grad_norm": 0.4190770983695984, + "learning_rate": 2.9365689308796065e-05, + "loss": 0.0366, + "step": 657 + }, + { + "epoch": 3.0, + "eval_loss": 0.04255302622914314, + "eval_runtime": 17.2484, + "eval_samples_per_second": 15.364, + "eval_steps_per_second": 1.971, + "step": 657 + } + ], + "logging_steps": 500, + "max_steps": 2190, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 6036187263270912.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-657/training_args.bin b/checkpoint-657/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d977d8d4afc5bd92bf7fc4298b8866c8a6c8438c --- /dev/null +++ b/checkpoint-657/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f96c10a0a2b43c05318a394de2f8a40b06b79bafe7c52911b0bd4f1a90733fe +size 5304 diff --git a/checkpoint-876/added_tokens.json b/checkpoint-876/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..3f5132007c4fcf42b75b65c8b6aa49c7098bcdf4 --- /dev/null +++ b/checkpoint-876/added_tokens.json @@ -0,0 +1,102 @@ +{ + "": 32099, + "": 32089, + "": 32088, + "": 32087, + "": 32086, + "": 32085, + "": 32084, + "": 32083, + "": 32082, + "": 32081, + "": 32080, + "": 32098, + "": 32079, + "": 32078, + "": 32077, + "": 32076, + "": 32075, + "": 32074, + "": 32073, + "": 32072, + "": 32071, + "": 32070, + "": 32097, + "": 32069, + "": 32068, + "": 32067, + "": 32066, + "": 32065, + "": 32064, + "": 32063, + "": 32062, + "": 32061, + "": 32060, + "": 32096, + "": 32059, + "": 32058, + "": 32057, + "": 32056, + "": 32055, + "": 32054, + "": 32053, + "": 32052, + "": 32051, + "": 32050, + "": 32095, + "": 32049, + "": 32048, + "": 32047, + "": 32046, + "": 32045, + "": 32044, + "": 32043, + "": 32042, + "": 32041, + "": 32040, + "": 32094, + "": 32039, + "": 32038, + "": 32037, + "": 32036, + "": 32035, + "": 32034, + "": 32033, + "": 32032, + "": 32031, + "": 32030, + "": 32093, + "": 32029, + "": 32028, + "": 32027, + "": 32026, + "": 32025, + "": 32024, + "": 32023, + "": 32022, + "": 32021, + "": 32020, + "": 32092, + "": 32019, + "": 32018, + "": 32017, + "": 32016, + "": 32015, + "": 32014, + "": 32013, + "": 32012, + "": 32011, + "": 32010, + "": 32091, + "": 32009, + "": 32008, + "": 32007, + "": 32006, + "": 32005, + "": 32004, + "": 32003, + "": 32002, + "": 32001, + "": 32000, + "": 32090 +} diff --git a/checkpoint-876/config.json b/checkpoint-876/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7adeb5014f60a4a25c0c42eb288f6284e773e6dd --- /dev/null +++ b/checkpoint-876/config.json @@ -0,0 +1,33 @@ +{ + "_name_or_path": "teapotai/teapotllm", + "architectures": [ + "T5ForConditionalGeneration" + ], + "classifier_dropout": 0.0, + "d_ff": 2816, + "d_kv": 64, + "d_model": 1024, + "decoder_start_token_id": 0, + "dense_act_fn": "gelu_new", + "dropout_rate": 0.1, + "eos_token_id": 1, + "feed_forward_proj": "gated-gelu", + "initializer_factor": 1.0, + "is_encoder_decoder": true, + "is_gated_act": true, + "layer_norm_epsilon": 1e-06, + "model_type": "t5", + "n_positions": 512, + "num_decoder_layers": 24, + "num_heads": 16, + "num_layers": 24, + "output_past": true, + "pad_token_id": 0, + "relative_attention_max_distance": 128, + "relative_attention_num_buckets": 32, + "tie_word_embeddings": false, + "torch_dtype": "float32", + "transformers_version": "4.48.3", + "use_cache": true, + "vocab_size": 32128 +} diff --git a/checkpoint-876/generation_config.json b/checkpoint-876/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..058f73f97f63923f298d59a1d6b4f78f510c5146 --- /dev/null +++ b/checkpoint-876/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "decoder_start_token_id": 0, + "eos_token_id": 1, + "pad_token_id": 0, + "transformers_version": "4.48.3" +} diff --git a/checkpoint-876/model.safetensors b/checkpoint-876/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1220ae7add45befd803e9a079c6373e0377af6ba --- /dev/null +++ b/checkpoint-876/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8c0c3490aba7e36306280238bfb54f81853ce06f25f6e3fd573b28e8a1fd9e2 +size 3132668808 diff --git a/checkpoint-876/optimizer.pt b/checkpoint-876/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4ebc68fcdd4a81c152befd73058ff3b49e81f8d6 --- /dev/null +++ b/checkpoint-876/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d773584b2c4a92848ef4221267839931dade64ff8414cee94e30ee9c65af3e5d +size 6265677800 diff --git a/checkpoint-876/rng_state.pth b/checkpoint-876/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..d78ebf7722e7638819225f2dcd12d5d71572706e --- /dev/null +++ b/checkpoint-876/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aad1d83e9858994aaeb48253050e024ea066e094ab2e2e44e90fa58f1bfef25d +size 14244 diff --git a/checkpoint-876/scheduler.pt b/checkpoint-876/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..efd847ca84bfb03dc88f8529a827fb951c79c86c --- /dev/null +++ b/checkpoint-876/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36f69d18de8b02278b8e2b59c942ac89ee0d6f838ae8bfd51fc272bf642e35ed +size 1064 diff --git a/checkpoint-876/special_tokens_map.json b/checkpoint-876/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..17ade346a1042cbe0c1436f5bedcbd85c099d582 --- /dev/null +++ b/checkpoint-876/special_tokens_map.json @@ -0,0 +1,125 @@ +{ + "additional_special_tokens": [ + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + ], + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-876/spiece.model b/checkpoint-876/spiece.model new file mode 100644 index 0000000000000000000000000000000000000000..317a5ccbde45300f5d1d970d4d449af2108b147e --- /dev/null +++ b/checkpoint-876/spiece.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86 +size 791656 diff --git a/checkpoint-876/tokenizer_config.json b/checkpoint-876/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..7d9f418ab1e49d1eaa3832e9970c9c503f565484 --- /dev/null +++ b/checkpoint-876/tokenizer_config.json @@ -0,0 +1,941 @@ +{ + "add_prefix_space": true, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32000": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32001": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32002": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32003": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32004": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32005": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32006": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32007": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32008": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32009": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32010": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32011": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32012": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32013": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32014": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32015": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32016": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32017": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32018": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32019": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32020": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32021": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32022": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32023": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32024": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32025": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32026": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32027": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32028": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32029": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32030": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32031": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32032": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32033": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32034": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32035": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32036": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32037": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32038": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32039": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32040": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32041": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32042": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32043": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32044": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32045": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32046": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32047": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32048": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32049": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32050": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32051": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32052": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32053": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32054": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32055": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32056": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32057": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32058": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32059": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32060": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32061": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32062": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32063": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32064": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32065": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32066": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32067": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32068": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32069": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32070": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32071": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32072": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32073": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32074": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32075": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32076": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32077": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32078": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32079": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32080": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32081": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32082": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32083": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32084": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32085": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32086": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32087": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32088": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32089": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32090": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32091": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32092": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32093": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32094": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32095": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32096": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32097": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32098": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32099": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + ], + "clean_up_tokenization_spaces": false, + "eos_token": "", + "extra_ids": 100, + "extra_special_tokens": {}, + "legacy": true, + "model_max_length": 512, + "pad_token": "", + "sp_model_kwargs": {}, + "tokenizer_class": "T5Tokenizer", + "unk_token": "" +} diff --git a/checkpoint-876/trainer_state.json b/checkpoint-876/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d3c7dba1dec328f4fdabc5ae16ebb194477ed652 --- /dev/null +++ b/checkpoint-876/trainer_state.json @@ -0,0 +1,189 @@ +{ + "best_metric": 0.042461033910512924, + "best_model_checkpoint": "./teapotllm/checkpoint-876", + "epoch": 4.0, + "eval_steps": 500, + "global_step": 876, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "em_boolean": 0.5294117647058824, + "em_chat": 0.03783783783783784, + "em_extraction": 0.5333333333333333, + "em_qa": 0.5333333333333333, + "em_summarization": 0.0, + "em_unanswerable": 0.0, + "epoch": 1.0, + "mean_em": 0.12075471698113208, + "mean_similarity": 0.6624939311014594, + "mean_word_count_diff": 11.958490566037735, + "similarity_boolean": 0.649950959993636, + "similarity_chat": 0.6068178282194846, + "similarity_extraction": 0.8274670541286469, + "similarity_qa": 0.8895131280024846, + "similarity_summarization": 0.7897715005609725, + "similarity_unanswerable": 0.8186558306217193, + "word_count_diff_boolean": 2.764705882352941, + "word_count_diff_chat": 11.275675675675675, + "word_count_diff_extraction": 1.8666666666666667, + "word_count_diff_qa": 2.7333333333333334, + "word_count_diff_summarization": 50.05555555555556, + "word_count_diff_unanswerable": 4.4 + }, + { + "epoch": 1.0, + "grad_norm": 0.29226627945899963, + "learning_rate": 1.314e-05, + "loss": 0.0548, + "step": 219 + }, + { + "epoch": 1.0, + "eval_loss": 0.04732182249426842, + "eval_runtime": 17.2425, + "eval_samples_per_second": 15.369, + "eval_steps_per_second": 1.972, + "step": 219 + }, + { + "em_boolean": 0.7058823529411765, + "em_chat": 0.05405405405405406, + "em_extraction": 0.4, + "em_qa": 0.4, + "em_summarization": 0.0, + "em_unanswerable": 0.0, + "epoch": 2.0, + "mean_em": 0.12830188679245283, + "mean_similarity": 0.6936857629426808, + "mean_word_count_diff": 11.475471698113207, + "similarity_boolean": 0.7671171505661571, + "similarity_chat": 0.6481531772158435, + "similarity_extraction": 0.7221033732096355, + "similarity_qa": 0.8645438591639201, + "similarity_summarization": 0.824706514676412, + "similarity_unanswerable": 0.8155314723650614, + "word_count_diff_boolean": 2.7058823529411766, + "word_count_diff_chat": 10.556756756756757, + "word_count_diff_extraction": 2.933333333333333, + "word_count_diff_qa": 3.2, + "word_count_diff_summarization": 50.166666666666664, + "word_count_diff_unanswerable": 3.1333333333333333 + }, + { + "epoch": 2.0, + "grad_norm": 0.30153563618659973, + "learning_rate": 2.628e-05, + "loss": 0.0445, + "step": 438 + }, + { + "epoch": 2.0, + "eval_loss": 0.04412226751446724, + "eval_runtime": 17.2255, + "eval_samples_per_second": 15.384, + "eval_steps_per_second": 1.974, + "step": 438 + }, + { + "em_boolean": 0.5882352941176471, + "em_chat": 0.05945945945945946, + "em_extraction": 0.4666666666666667, + "em_qa": 0.5333333333333333, + "em_summarization": 0.0, + "em_unanswerable": 0.0, + "epoch": 3.0, + "mean_em": 0.13584905660377358, + "mean_similarity": 0.7036299928109039, + "mean_word_count_diff": 11.230188679245282, + "similarity_boolean": 0.702288385699777, + "similarity_chat": 0.6661249467549292, + "similarity_extraction": 0.7363929619391759, + "similarity_qa": 0.9095388889312744, + "similarity_summarization": 0.7749250100718604, + "similarity_unanswerable": 0.8434868295987447, + "word_count_diff_boolean": 1.8823529411764706, + "word_count_diff_chat": 10.41081081081081, + "word_count_diff_extraction": 1.4, + "word_count_diff_qa": 2.8666666666666667, + "word_count_diff_summarization": 50.111111111111114, + "word_count_diff_unanswerable": 3.466666666666667 + }, + { + "epoch": 3.0, + "grad_norm": 0.4190770983695984, + "learning_rate": 2.9365689308796065e-05, + "loss": 0.0366, + "step": 657 + }, + { + "epoch": 3.0, + "eval_loss": 0.04255302622914314, + "eval_runtime": 17.2484, + "eval_samples_per_second": 15.364, + "eval_steps_per_second": 1.971, + "step": 657 + }, + { + "em_boolean": 0.8823529411764706, + "em_chat": 0.05405405405405406, + "em_extraction": 0.6666666666666666, + "em_qa": 0.3333333333333333, + "em_summarization": 0.0, + "em_unanswerable": 0.0, + "epoch": 4.0, + "mean_em": 0.1509433962264151, + "mean_similarity": 0.7214625703522338, + "mean_word_count_diff": 11.275471698113208, + "similarity_boolean": 0.9250823608325685, + "similarity_chat": 0.6629158062407294, + "similarity_extraction": 0.9287973960240682, + "similarity_qa": 0.8280642042557399, + "similarity_summarization": 0.7930781609482236, + "similarity_unanswerable": 0.8128950635592143, + "word_count_diff_boolean": 0.5882352941176471, + "word_count_diff_chat": 10.556756756756757, + "word_count_diff_extraction": 0.4666666666666667, + "word_count_diff_qa": 3.2, + "word_count_diff_summarization": 50.27777777777778, + "word_count_diff_unanswerable": 4.333333333333333 + }, + { + "epoch": 4.0, + "grad_norm": 0.34906384348869324, + "learning_rate": 2.6482696742411827e-05, + "loss": 0.0301, + "step": 876 + }, + { + "epoch": 4.0, + "eval_loss": 0.042461033910512924, + "eval_runtime": 17.2294, + "eval_samples_per_second": 15.381, + "eval_steps_per_second": 1.973, + "step": 876 + } + ], + "logging_steps": 500, + "max_steps": 2190, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 8048249684361216.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-876/training_args.bin b/checkpoint-876/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d977d8d4afc5bd92bf7fc4298b8866c8a6c8438c --- /dev/null +++ b/checkpoint-876/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f96c10a0a2b43c05318a394de2f8a40b06b79bafe7c52911b0bd4f1a90733fe +size 5304 diff --git a/config.json b/config.json index 690d98d85d1f4324a159558495e71d726b144ed1..7adeb5014f60a4a25c0c42eb288f6284e773e6dd 100644 --- a/config.json +++ b/config.json @@ -1,5 +1,5 @@ { - "_name_or_path": "./teapotllmbase", + "_name_or_path": "teapotai/teapotllm", "architectures": [ "T5ForConditionalGeneration" ], @@ -30,4 +30,4 @@ "transformers_version": "4.48.3", "use_cache": true, "vocab_size": 32128 -} \ No newline at end of file +} diff --git a/model.safetensors b/model.safetensors index 77f3fae69a2ee224daa5ba2dd163dc3258807937..1220ae7add45befd803e9a079c6373e0377af6ba 100644 --- a/model.safetensors +++ b/model.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e8904274c493ef5fb3e915279d1af7acb4f5b5057741047082c13c8f3f91fc48 +oid sha256:e8c0c3490aba7e36306280238bfb54f81853ce06f25f6e3fd573b28e8a1fd9e2 size 3132668808 diff --git a/training_args.bin b/training_args.bin index 4d2c5c2629f28b68226f06c02d16bc80f6e2e184..d977d8d4afc5bd92bf7fc4298b8866c8a6c8438c 100644 --- a/training_args.bin +++ b/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cefc726fb5e7550afc92974b096c16ac389aea67b613748fe71126ede98eda9b +oid sha256:1f96c10a0a2b43c05318a394de2f8a40b06b79bafe7c52911b0bd4f1a90733fe size 5304