Training in progress, step 500, checkpoint

Browse files

Files changed (12) hide show

last-checkpoint/config.json +29 -0
last-checkpoint/generation_config.json +8 -0
last-checkpoint/model.safetensors +3 -0
last-checkpoint/optimizer.pt +3 -0
last-checkpoint/rng_state.pth +3 -0
last-checkpoint/scaler.pt +3 -0
last-checkpoint/scheduler.pt +3 -0
last-checkpoint/special_tokens_map.json +37 -0
last-checkpoint/tokenizer.json +0 -0
last-checkpoint/tokenizer_config.json +53 -0
last-checkpoint/trainer_state.json +392 -0
last-checkpoint/training_args.bin +3 -0

last-checkpoint/config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+  "architectures": [
+    "T5ForConditionalGeneration"
+  ],
+  "classifier_dropout": 0.0,
+  "d_ff": 2048,
+  "d_kv": 64,
+  "d_model": 512,
+  "decoder_start_token_id": 3,
+  "dense_act_fn": "relu",
+  "dropout_rate": 0.1,
+  "eos_token_id": 4,
+  "feed_forward_proj": "relu",
+  "initializer_factor": 1.0,
+  "is_encoder_decoder": true,
+  "is_gated_act": false,
+  "layer_norm_epsilon": 1e-06,
+  "model_type": "t5",
+  "num_decoder_layers": 12,
+  "num_heads": 8,
+  "num_layers": 12,
+  "pad_token_id": 1,
+  "relative_attention_max_distance": 128,
+  "relative_attention_num_buckets": 32,
+  "torch_dtype": "float32",
+  "transformers_version": "4.54.1",
+  "use_cache": false,
+  "vocab_size": 4796
+}

last-checkpoint/generation_config.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+  "_from_model_config": true,
+  "decoder_start_token_id": 3,
+  "eos_token_id": 4,
+  "pad_token_id": 1,
+  "transformers_version": "4.54.1",
+  "use_cache": false
+}

last-checkpoint/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7629bac28a01650e0c47aae68065dd14a3871a16dc19535a5f47d97ef7f593e3
+size 362303176

last-checkpoint/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:56c5762af4816dd09402e536fddd14c4f886af7c31765384de8fda2510100c78
+size 724761914

last-checkpoint/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f29f1a356a90bc512795986655867fd11582b804d45eacb9816a4ff5d2939220
+size 14244

last-checkpoint/scaler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:18b984273ea2d45b7ffb1d047bb359d93111e41fcad70d16a1b453fd38f72636
+size 988

last-checkpoint/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3ee86abee0989df8e1c5461d5ecfc6d42b43c8879a30063d7c1ee114f0c589f6
+size 1064

last-checkpoint/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "bos_token": {
+    "content": "[START]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "[END]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

last-checkpoint/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

last-checkpoint/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,53 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "[START]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "[END]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "[START]",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "[END]",
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "[PAD]",
+  "tokenizer_class": "PreTrainedTokenizerFast",
+  "unk_token": "[UNK]"
+}

last-checkpoint/trainer_state.json ADDED Viewed

	@@ -0,0 +1,392 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.4764267990074442,
+  "eval_steps": 500,
+  "global_step": 500,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.04962779156327544,
+      "grad_norm": 0.9519186615943909,
+      "learning_rate": 4.5e-06,
+      "loss": 1.6628,
+      "step": 10
+    },
+    {
+      "epoch": 0.09925558312655088,
+      "grad_norm": 1.4794460535049438,
+      "learning_rate": 9.5e-06,
+      "loss": 1.6566,
+      "step": 20
+    },
+    {
+      "epoch": 0.1488833746898263,
+      "grad_norm": 1.0335605144500732,
+      "learning_rate": 1.45e-05,
+      "loss": 1.6733,
+      "step": 30
+    },
+    {
+      "epoch": 0.19851116625310175,
+      "grad_norm": 1.3918877840042114,
+      "learning_rate": 1.9500000000000003e-05,
+      "loss": 1.6386,
+      "step": 40
+    },
+    {
+      "epoch": 0.24813895781637718,
+      "grad_norm": 1.2918508052825928,
+      "learning_rate": 2.45e-05,
+      "loss": 1.6226,
+      "step": 50
+    },
+    {
+      "epoch": 0.2977667493796526,
+      "grad_norm": 0.9966709613800049,
+      "learning_rate": 2.95e-05,
+      "loss": 1.6584,
+      "step": 60
+    },
+    {
+      "epoch": 0.34739454094292804,
+      "grad_norm": 1.1411339044570923,
+      "learning_rate": 3.45e-05,
+      "loss": 1.679,
+      "step": 70
+    },
+    {
+      "epoch": 0.3970223325062035,
+      "grad_norm": 1.2775113582611084,
+      "learning_rate": 3.9500000000000005e-05,
+      "loss": 1.6762,
+      "step": 80
+    },
+    {
+      "epoch": 0.4466501240694789,
+      "grad_norm": 1.37651789188385,
+      "learning_rate": 4.4500000000000004e-05,
+      "loss": 1.6636,
+      "step": 90
+    },
+    {
+      "epoch": 0.49627791563275436,
+      "grad_norm": 1.3402600288391113,
+      "learning_rate": 4.9500000000000004e-05,
+      "loss": 1.6492,
+      "step": 100
+    },
+    {
+      "epoch": 0.5459057071960298,
+      "grad_norm": 1.337302565574646,
+      "learning_rate": 5.45e-05,
+      "loss": 1.6608,
+      "step": 110
+    },
+    {
+      "epoch": 0.5955334987593052,
+      "grad_norm": 2.611341714859009,
+      "learning_rate": 5.95e-05,
+      "loss": 1.6915,
+      "step": 120
+    },
+    {
+      "epoch": 0.6451612903225806,
+      "grad_norm": 2.1621108055114746,
+      "learning_rate": 6.450000000000001e-05,
+      "loss": 1.6788,
+      "step": 130
+    },
+    {
+      "epoch": 0.6947890818858561,
+      "grad_norm": 1.839064359664917,
+      "learning_rate": 6.95e-05,
+      "loss": 1.6679,
+      "step": 140
+    },
+    {
+      "epoch": 0.7444168734491315,
+      "grad_norm": 2.099543571472168,
+      "learning_rate": 7.450000000000001e-05,
+      "loss": 1.6668,
+      "step": 150
+    },
+    {
+      "epoch": 0.794044665012407,
+      "grad_norm": 1.5114145278930664,
+      "learning_rate": 7.950000000000001e-05,
+      "loss": 1.6917,
+      "step": 160
+    },
+    {
+      "epoch": 0.8436724565756824,
+      "grad_norm": 1.3731415271759033,
+      "learning_rate": 8.450000000000001e-05,
+      "loss": 1.6731,
+      "step": 170
+    },
+    {
+      "epoch": 0.8933002481389578,
+      "grad_norm": 1.322886347770691,
+      "learning_rate": 8.950000000000001e-05,
+      "loss": 1.6808,
+      "step": 180
+    },
+    {
+      "epoch": 0.9429280397022333,
+      "grad_norm": 2.232438325881958,
+      "learning_rate": 9.449999999999999e-05,
+      "loss": 1.6659,
+      "step": 190
+    },
+    {
+      "epoch": 0.9925558312655087,
+      "grad_norm": 1.6377075910568237,
+      "learning_rate": 9.95e-05,
+      "loss": 1.683,
+      "step": 200
+    },
+    {
+      "epoch": 1.0397022332506203,
+      "grad_norm": 1.362827181816101,
+      "learning_rate": 0.0001,
+      "loss": 1.5687,
+      "step": 210
+    },
+    {
+      "epoch": 1.0893300248138957,
+      "grad_norm": 1.4714646339416504,
+      "learning_rate": 0.0001,
+      "loss": 1.6823,
+      "step": 220
+    },
+    {
+      "epoch": 1.1389578163771712,
+      "grad_norm": 1.2480884790420532,
+      "learning_rate": 0.0001,
+      "loss": 1.6513,
+      "step": 230
+    },
+    {
+      "epoch": 1.1885856079404467,
+      "grad_norm": 3.1516945362091064,
+      "learning_rate": 0.0001,
+      "loss": 1.6613,
+      "step": 240
+    },
+    {
+      "epoch": 1.2382133995037221,
+      "grad_norm": 1.2342029809951782,
+      "learning_rate": 0.0001,
+      "loss": 1.6717,
+      "step": 250
+    },
+    {
+      "epoch": 1.2878411910669976,
+      "grad_norm": 1.2516708374023438,
+      "learning_rate": 0.0001,
+      "loss": 1.6712,
+      "step": 260
+    },
+    {
+      "epoch": 1.337468982630273,
+      "grad_norm": 1.28671395778656,
+      "learning_rate": 0.0001,
+      "loss": 1.6516,
+      "step": 270
+    },
+    {
+      "epoch": 1.3870967741935485,
+      "grad_norm": 1.6043883562088013,
+      "learning_rate": 0.0001,
+      "loss": 1.6595,
+      "step": 280
+    },
+    {
+      "epoch": 1.436724565756824,
+      "grad_norm": 1.6665605306625366,
+      "learning_rate": 0.0001,
+      "loss": 1.6494,
+      "step": 290
+    },
+    {
+      "epoch": 1.4863523573200992,
+      "grad_norm": 1.4527169466018677,
+      "learning_rate": 0.0001,
+      "loss": 1.6532,
+      "step": 300
+    },
+    {
+      "epoch": 1.5359801488833746,
+      "grad_norm": 1.6567929983139038,
+      "learning_rate": 0.0001,
+      "loss": 1.6355,
+      "step": 310
+    },
+    {
+      "epoch": 1.58560794044665,
+      "grad_norm": 1.3525745868682861,
+      "learning_rate": 0.0001,
+      "loss": 1.6394,
+      "step": 320
+    },
+    {
+      "epoch": 1.6352357320099256,
+      "grad_norm": 1.8357068300247192,
+      "learning_rate": 0.0001,
+      "loss": 1.6528,
+      "step": 330
+    },
+    {
+      "epoch": 1.684863523573201,
+      "grad_norm": 1.1668673753738403,
+      "learning_rate": 0.0001,
+      "loss": 1.6337,
+      "step": 340
+    },
+    {
+      "epoch": 1.7344913151364765,
+      "grad_norm": 1.0567771196365356,
+      "learning_rate": 0.0001,
+      "loss": 1.6347,
+      "step": 350
+    },
+    {
+      "epoch": 1.7841191066997517,
+      "grad_norm": 1.5663048028945923,
+      "learning_rate": 0.0001,
+      "loss": 1.6355,
+      "step": 360
+    },
+    {
+      "epoch": 1.8337468982630272,
+      "grad_norm": 1.1115260124206543,
+      "learning_rate": 0.0001,
+      "loss": 1.6186,
+      "step": 370
+    },
+    {
+      "epoch": 1.8833746898263026,
+      "grad_norm": 1.4452751874923706,
+      "learning_rate": 0.0001,
+      "loss": 1.6111,
+      "step": 380
+    },
+    {
+      "epoch": 1.933002481389578,
+      "grad_norm": 1.2159701585769653,
+      "learning_rate": 0.0001,
+      "loss": 1.6314,
+      "step": 390
+    },
+    {
+      "epoch": 1.9826302729528535,
+      "grad_norm": 1.2750087976455688,
+      "learning_rate": 0.0001,
+      "loss": 1.6327,
+      "step": 400
+    },
+    {
+      "epoch": 2.029776674937965,
+      "grad_norm": 1.4693094491958618,
+      "learning_rate": 0.0001,
+      "loss": 1.5277,
+      "step": 410
+    },
+    {
+      "epoch": 2.0794044665012406,
+      "grad_norm": 1.4537895917892456,
+      "learning_rate": 0.0001,
+      "loss": 1.6218,
+      "step": 420
+    },
+    {
+      "epoch": 2.129032258064516,
+      "grad_norm": 0.9247901439666748,
+      "learning_rate": 0.0001,
+      "loss": 1.6084,
+      "step": 430
+    },
+    {
+      "epoch": 2.1786600496277915,
+      "grad_norm": 1.1026546955108643,
+      "learning_rate": 0.0001,
+      "loss": 1.6369,
+      "step": 440
+    },
+    {
+      "epoch": 2.228287841191067,
+      "grad_norm": 1.1468608379364014,
+      "learning_rate": 0.0001,
+      "loss": 1.5851,
+      "step": 450
+    },
+    {
+      "epoch": 2.2779156327543424,
+      "grad_norm": 1.1061238050460815,
+      "learning_rate": 0.0001,
+      "loss": 1.5967,
+      "step": 460
+    },
+    {
+      "epoch": 2.327543424317618,
+      "grad_norm": 1.2714872360229492,
+      "learning_rate": 0.0001,
+      "loss": 1.5837,
+      "step": 470
+    },
+    {
+      "epoch": 2.3771712158808933,
+      "grad_norm": 1.3792251348495483,
+      "learning_rate": 0.0001,
+      "loss": 1.5841,
+      "step": 480
+    },
+    {
+      "epoch": 2.4267990074441688,
+      "grad_norm": 1.3134170770645142,
+      "learning_rate": 0.0001,
+      "loss": 1.5864,
+      "step": 490
+    },
+    {
+      "epoch": 2.4764267990074442,
+      "grad_norm": 1.1710975170135498,
+      "learning_rate": 0.0001,
+      "loss": 1.5731,
+      "step": 500
+    },
+    {
+      "epoch": 2.4764267990074442,
+      "eval_loss": 1.5040353536605835,
+      "eval_runtime": 57.3876,
+      "eval_samples_per_second": 37.43,
+      "eval_steps_per_second": 4.687,
+      "step": 500
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 808,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 5.182556306040422e+16,
+  "train_batch_size": 24,
+  "trial_name": null,
+  "trial_params": null
+}

last-checkpoint/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:258e93bb7afb81bdb858196bb5d9459c58151567faf53e997e6292b1461756ae
+size 5624