Spaces:

janmayjay
/

demo

Sleeping

App Files Files Community

janmayjay commited on Jul 14

Commit

2098d75

1 Parent(s): 5ec3a1d

Your commit message

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

smollm2-finetuned-lora1/adapter_model.safetensors +1 -1
{smollm2-finetuned1/checkpoint-192 → smollm2-finetuned-lora12}/README.md +0 -0
{smollm2-finetuned1/checkpoint-192 → smollm2-finetuned-lora12}/adapter_config.json +0 -0
{smollm2-finetuned1/checkpoint-576 → smollm2-finetuned-lora12}/adapter_model.safetensors +0 -0
{smollm2-finetuned1/checkpoint-192 → smollm2-finetuned-lora12}/merges.txt +0 -0
{smollm2-finetuned1/checkpoint-192 → smollm2-finetuned-lora12}/special_tokens_map.json +0 -0
{smollm2-finetuned1/checkpoint-192 → smollm2-finetuned-lora12}/tokenizer.json +0 -0
{smollm2-finetuned1/checkpoint-192 → smollm2-finetuned-lora12}/tokenizer_config.json +0 -0
{smollm2-finetuned1/checkpoint-192 → smollm2-finetuned-lora12}/vocab.json +0 -0
smollm2-finetuned1/{checkpoint-576 → checkpoint-1152}/README.md +0 -0
smollm2-finetuned1/{checkpoint-576 → checkpoint-1152}/adapter_config.json +0 -0
smollm2-finetuned1/checkpoint-1152/adapter_model.safetensors +3 -0
smollm2-finetuned1/{checkpoint-576 → checkpoint-1152}/merges.txt +0 -0
smollm2-finetuned1/checkpoint-1152/optimizer.pt +3 -0
smollm2-finetuned1/checkpoint-1152/rng_state.pth +3 -0
smollm2-finetuned1/checkpoint-1152/scaler.pt +3 -0
smollm2-finetuned1/checkpoint-1152/scheduler.pt +3 -0
smollm2-finetuned1/{checkpoint-576 → checkpoint-1152}/special_tokens_map.json +0 -0
smollm2-finetuned1/{checkpoint-576 → checkpoint-1152}/tokenizer.json +0 -0
smollm2-finetuned1/{checkpoint-576 → checkpoint-1152}/tokenizer_config.json +0 -0
smollm2-finetuned1/checkpoint-1152/trainer_state.json +839 -0
smollm2-finetuned1/{checkpoint-192 → checkpoint-1152}/training_args.bin +0 -0
smollm2-finetuned1/{checkpoint-576 → checkpoint-1152}/vocab.json +0 -0
smollm2-finetuned1/checkpoint-384/adapter_model.safetensors +1 -1
smollm2-finetuned1/checkpoint-384/optimizer.pt +1 -1
smollm2-finetuned1/checkpoint-384/rng_state.pth +1 -1
smollm2-finetuned1/checkpoint-384/scheduler.pt +1 -1
smollm2-finetuned1/checkpoint-384/trainer_state.json +155 -155
smollm2-finetuned1/checkpoint-768/README.md +207 -0
smollm2-finetuned1/checkpoint-768/adapter_config.json +36 -0
smollm2-finetuned1/checkpoint-768/adapter_model.safetensors +3 -0
smollm2-finetuned1/checkpoint-768/merges.txt +0 -0
smollm2-finetuned1/checkpoint-768/optimizer.pt +3 -0
smollm2-finetuned1/checkpoint-768/rng_state.pth +3 -0
smollm2-finetuned1/checkpoint-768/scaler.pt +3 -0
smollm2-finetuned1/checkpoint-768/scheduler.pt +3 -0
smollm2-finetuned1/checkpoint-768/special_tokens_map.json +43 -0
smollm2-finetuned1/checkpoint-768/tokenizer.json +0 -0
smollm2-finetuned1/checkpoint-768/tokenizer_config.json +169 -0
smollm2-finetuned1/checkpoint-768/trainer_state.json +566 -0
smollm2-finetuned1/{checkpoint-576 → checkpoint-768}/training_args.bin +0 -0
smollm2-finetuned1/checkpoint-768/vocab.json +0 -0
smollm2-finetuned12/checkpoint-192/README.md +207 -0
smollm2-finetuned12/checkpoint-192/adapter_config.json +36 -0
{smollm2-finetuned1 → smollm2-finetuned12}/checkpoint-192/adapter_model.safetensors +0 -0
smollm2-finetuned12/checkpoint-192/merges.txt +0 -0
{smollm2-finetuned1 → smollm2-finetuned12}/checkpoint-192/optimizer.pt +0 -0
{smollm2-finetuned1 → smollm2-finetuned12}/checkpoint-192/rng_state.pth +0 -0
{smollm2-finetuned1 → smollm2-finetuned12}/checkpoint-192/scaler.pt +0 -0
{smollm2-finetuned1 → smollm2-finetuned12}/checkpoint-192/scheduler.pt +0 -0

smollm2-finetuned-lora1/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7435cc9d057e9ee49e9490c9d73d413da59ad25ccf7cf29c4dff151f4553d800
 size 3293480

 version https://git-lfs.github.com/spec/v1
+oid sha256:b4d02e74b3a1422aa5cfdb18032a4a76a3deb1cc233768bea593c730e2110812
 size 3293480

{smollm2-finetuned1/checkpoint-192 → smollm2-finetuned-lora12}/README.md RENAMED Viewed

File without changes

{smollm2-finetuned1/checkpoint-192 → smollm2-finetuned-lora12}/adapter_config.json RENAMED Viewed

File without changes

{smollm2-finetuned1/checkpoint-576 → smollm2-finetuned-lora12}/adapter_model.safetensors RENAMED Viewed

File without changes

{smollm2-finetuned1/checkpoint-192 → smollm2-finetuned-lora12}/merges.txt RENAMED Viewed

File without changes

{smollm2-finetuned1/checkpoint-192 → smollm2-finetuned-lora12}/special_tokens_map.json RENAMED Viewed

File without changes

{smollm2-finetuned1/checkpoint-192 → smollm2-finetuned-lora12}/tokenizer.json RENAMED Viewed

File without changes

{smollm2-finetuned1/checkpoint-192 → smollm2-finetuned-lora12}/tokenizer_config.json RENAMED Viewed

File without changes

{smollm2-finetuned1/checkpoint-192 → smollm2-finetuned-lora12}/vocab.json RENAMED Viewed

File without changes

smollm2-finetuned1/{checkpoint-576 → checkpoint-1152}/README.md RENAMED Viewed

File without changes

smollm2-finetuned1/{checkpoint-576 → checkpoint-1152}/adapter_config.json RENAMED Viewed

File without changes

smollm2-finetuned1/checkpoint-1152/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b4d02e74b3a1422aa5cfdb18032a4a76a3deb1cc233768bea593c730e2110812
+size 3293480

smollm2-finetuned1/{checkpoint-576 → checkpoint-1152}/merges.txt RENAMED Viewed

File without changes

smollm2-finetuned1/checkpoint-1152/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:de0611e8c6ad2ff4eeac3c54cb92ea6e224ad0d7dcabe0b04b09d53b4cdc627c
+size 6661242

smollm2-finetuned1/checkpoint-1152/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e1115ea9e3d3bbdef3eff0677a5c4a9ecc311fc470528f5de36d8eab1590555d
+size 14244

smollm2-finetuned1/checkpoint-1152/scaler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4e0708c3eb6f5496c205a6fa0ed2a48464f72a7a3d151851e4340e1139f237f9
+size 988

smollm2-finetuned1/checkpoint-1152/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:67b0d793ccfe6c75f7f8f5445ccf0cf577dcf2cc8370e4ab4d21b965703750ca
+size 1064

smollm2-finetuned1/{checkpoint-576 → checkpoint-1152}/special_tokens_map.json RENAMED Viewed

File without changes

smollm2-finetuned1/{checkpoint-576 → checkpoint-1152}/tokenizer.json RENAMED Viewed

File without changes

smollm2-finetuned1/{checkpoint-576 → checkpoint-1152}/tokenizer_config.json RENAMED Viewed

File without changes

smollm2-finetuned1/checkpoint-1152/trainer_state.json ADDED Viewed

	@@ -0,0 +1,839 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 1152,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.026041666666666668,
+      "grad_norm": 0.07924782484769821,
+      "learning_rate": 4.9609375000000005e-05,
+      "loss": 3.1556,
+      "step": 10
+    },
+    {
+      "epoch": 0.052083333333333336,
+      "grad_norm": 0.0875125601887703,
+      "learning_rate": 4.917534722222222e-05,
+      "loss": 3.1901,
+      "step": 20
+    },
+    {
+      "epoch": 0.078125,
+      "grad_norm": 0.09327095746994019,
+      "learning_rate": 4.874131944444445e-05,
+      "loss": 3.1043,
+      "step": 30
+    },
+    {
+      "epoch": 0.10416666666666667,
+      "grad_norm": 0.08811140060424805,
+      "learning_rate": 4.830729166666667e-05,
+      "loss": 3.065,
+      "step": 40
+    },
+    {
+      "epoch": 0.13020833333333334,
+      "grad_norm": 0.09639778733253479,
+      "learning_rate": 4.787326388888889e-05,
+      "loss": 3.1739,
+      "step": 50
+    },
+    {
+      "epoch": 0.15625,
+      "grad_norm": 0.12037681043148041,
+      "learning_rate": 4.7439236111111115e-05,
+      "loss": 3.0952,
+      "step": 60
+    },
+    {
+      "epoch": 0.18229166666666666,
+      "grad_norm": 0.10899762064218521,
+      "learning_rate": 4.7005208333333334e-05,
+      "loss": 2.9885,
+      "step": 70
+    },
+    {
+      "epoch": 0.20833333333333334,
+      "grad_norm": 0.10793007910251617,
+      "learning_rate": 4.657118055555556e-05,
+      "loss": 3.1401,
+      "step": 80
+    },
+    {
+      "epoch": 0.234375,
+      "grad_norm": 0.13812898099422455,
+      "learning_rate": 4.613715277777778e-05,
+      "loss": 3.153,
+      "step": 90
+    },
+    {
+      "epoch": 0.2604166666666667,
+      "grad_norm": 0.13644039630889893,
+      "learning_rate": 4.5703125e-05,
+      "loss": 3.0757,
+      "step": 100
+    },
+    {
+      "epoch": 0.2864583333333333,
+      "grad_norm": 0.13385091722011566,
+      "learning_rate": 4.5269097222222226e-05,
+      "loss": 3.1576,
+      "step": 110
+    },
+    {
+      "epoch": 0.3125,
+      "grad_norm": 0.13685277104377747,
+      "learning_rate": 4.4835069444444444e-05,
+      "loss": 3.0804,
+      "step": 120
+    },
+    {
+      "epoch": 0.3385416666666667,
+      "grad_norm": 0.1641232818365097,
+      "learning_rate": 4.440104166666667e-05,
+      "loss": 3.105,
+      "step": 130
+    },
+    {
+      "epoch": 0.3645833333333333,
+      "grad_norm": 0.14693030714988708,
+      "learning_rate": 4.3967013888888894e-05,
+      "loss": 3.0005,
+      "step": 140
+    },
+    {
+      "epoch": 0.390625,
+      "grad_norm": 0.15054495632648468,
+      "learning_rate": 4.353298611111111e-05,
+      "loss": 3.0237,
+      "step": 150
+    },
+    {
+      "epoch": 0.4166666666666667,
+      "grad_norm": 0.15571929514408112,
+      "learning_rate": 4.309895833333334e-05,
+      "loss": 2.9212,
+      "step": 160
+    },
+    {
+      "epoch": 0.4427083333333333,
+      "grad_norm": 0.16245532035827637,
+      "learning_rate": 4.266493055555556e-05,
+      "loss": 2.9759,
+      "step": 170
+    },
+    {
+      "epoch": 0.46875,
+      "grad_norm": 0.1736162304878235,
+      "learning_rate": 4.223090277777778e-05,
+      "loss": 2.9398,
+      "step": 180
+    },
+    {
+      "epoch": 0.4947916666666667,
+      "grad_norm": 0.166653111577034,
+      "learning_rate": 4.1796875000000005e-05,
+      "loss": 2.9738,
+      "step": 190
+    },
+    {
+      "epoch": 0.5208333333333334,
+      "grad_norm": 0.18301138281822205,
+      "learning_rate": 4.136284722222222e-05,
+      "loss": 2.8916,
+      "step": 200
+    },
+    {
+      "epoch": 0.546875,
+      "grad_norm": 0.18230901658535004,
+      "learning_rate": 4.092881944444444e-05,
+      "loss": 2.9061,
+      "step": 210
+    },
+    {
+      "epoch": 0.5729166666666666,
+      "grad_norm": 0.18205538392066956,
+      "learning_rate": 4.049479166666667e-05,
+      "loss": 2.8703,
+      "step": 220
+    },
+    {
+      "epoch": 0.5989583333333334,
+      "grad_norm": 0.17894142866134644,
+      "learning_rate": 4.006076388888889e-05,
+      "loss": 2.808,
+      "step": 230
+    },
+    {
+      "epoch": 0.625,
+      "grad_norm": 0.18798667192459106,
+      "learning_rate": 3.9626736111111115e-05,
+      "loss": 2.8614,
+      "step": 240
+    },
+    {
+      "epoch": 0.6510416666666666,
+      "grad_norm": 0.19296622276306152,
+      "learning_rate": 3.919270833333333e-05,
+      "loss": 2.9261,
+      "step": 250
+    },
+    {
+      "epoch": 0.6770833333333334,
+      "grad_norm": 0.19976121187210083,
+      "learning_rate": 3.875868055555556e-05,
+      "loss": 2.8094,
+      "step": 260
+    },
+    {
+      "epoch": 0.703125,
+      "grad_norm": 0.17799632251262665,
+      "learning_rate": 3.832465277777778e-05,
+      "loss": 2.8358,
+      "step": 270
+    },
+    {
+      "epoch": 0.7291666666666666,
+      "grad_norm": 0.2119487226009369,
+      "learning_rate": 3.7890625e-05,
+      "loss": 2.8203,
+      "step": 280
+    },
+    {
+      "epoch": 0.7552083333333334,
+      "grad_norm": 0.20649544894695282,
+      "learning_rate": 3.7456597222222226e-05,
+      "loss": 2.7819,
+      "step": 290
+    },
+    {
+      "epoch": 0.78125,
+      "grad_norm": 0.22266916930675507,
+      "learning_rate": 3.7022569444444444e-05,
+      "loss": 2.8192,
+      "step": 300
+    },
+    {
+      "epoch": 0.8072916666666666,
+      "grad_norm": 0.23271456360816956,
+      "learning_rate": 3.658854166666667e-05,
+      "loss": 2.8706,
+      "step": 310
+    },
+    {
+      "epoch": 0.8333333333333334,
+      "grad_norm": 0.2504468560218811,
+      "learning_rate": 3.6154513888888894e-05,
+      "loss": 2.7641,
+      "step": 320
+    },
+    {
+      "epoch": 0.859375,
+      "grad_norm": 0.19909098744392395,
+      "learning_rate": 3.572048611111111e-05,
+      "loss": 2.6973,
+      "step": 330
+    },
+    {
+      "epoch": 0.8854166666666666,
+      "grad_norm": 0.2656916081905365,
+      "learning_rate": 3.528645833333333e-05,
+      "loss": 2.6888,
+      "step": 340
+    },
+    {
+      "epoch": 0.9114583333333334,
+      "grad_norm": 0.2115168273448944,
+      "learning_rate": 3.485243055555556e-05,
+      "loss": 2.7075,
+      "step": 350
+    },
+    {
+      "epoch": 0.9375,
+      "grad_norm": 0.24468505382537842,
+      "learning_rate": 3.441840277777778e-05,
+      "loss": 2.7051,
+      "step": 360
+    },
+    {
+      "epoch": 0.9635416666666666,
+      "grad_norm": 0.23581114411354065,
+      "learning_rate": 3.3984375000000004e-05,
+      "loss": 2.6773,
+      "step": 370
+    },
+    {
+      "epoch": 0.9895833333333334,
+      "grad_norm": 0.27085772156715393,
+      "learning_rate": 3.355034722222222e-05,
+      "loss": 2.6959,
+      "step": 380
+    },
+    {
+      "epoch": 1.015625,
+      "grad_norm": 0.22150537371635437,
+      "learning_rate": 3.311631944444444e-05,
+      "loss": 2.5918,
+      "step": 390
+    },
+    {
+      "epoch": 1.0416666666666667,
+      "grad_norm": 0.22745287418365479,
+      "learning_rate": 3.268229166666667e-05,
+      "loss": 2.5933,
+      "step": 400
+    },
+    {
+      "epoch": 1.0677083333333333,
+      "grad_norm": 0.29524073004722595,
+      "learning_rate": 3.224826388888889e-05,
+      "loss": 2.659,
+      "step": 410
+    },
+    {
+      "epoch": 1.09375,
+      "grad_norm": 0.26419463753700256,
+      "learning_rate": 3.1814236111111115e-05,
+      "loss": 2.615,
+      "step": 420
+    },
+    {
+      "epoch": 1.1197916666666667,
+      "grad_norm": 0.3234228789806366,
+      "learning_rate": 3.138020833333333e-05,
+      "loss": 2.5106,
+      "step": 430
+    },
+    {
+      "epoch": 1.1458333333333333,
+      "grad_norm": 0.28176596760749817,
+      "learning_rate": 3.094618055555556e-05,
+      "loss": 2.6551,
+      "step": 440
+    },
+    {
+      "epoch": 1.171875,
+      "grad_norm": 0.2584069073200226,
+      "learning_rate": 3.051215277777778e-05,
+      "loss": 2.6053,
+      "step": 450
+    },
+    {
+      "epoch": 1.1979166666666667,
+      "grad_norm": 0.22455713152885437,
+      "learning_rate": 3.0078125e-05,
+      "loss": 2.5743,
+      "step": 460
+    },
+    {
+      "epoch": 1.2239583333333333,
+      "grad_norm": 0.2834200859069824,
+      "learning_rate": 2.9644097222222222e-05,
+      "loss": 2.5535,
+      "step": 470
+    },
+    {
+      "epoch": 1.25,
+      "grad_norm": 0.26442793011665344,
+      "learning_rate": 2.9210069444444444e-05,
+      "loss": 2.5343,
+      "step": 480
+    },
+    {
+      "epoch": 1.2760416666666667,
+      "grad_norm": 0.2931734323501587,
+      "learning_rate": 2.877604166666667e-05,
+      "loss": 2.6204,
+      "step": 490
+    },
+    {
+      "epoch": 1.3020833333333333,
+      "grad_norm": 0.3041195571422577,
+      "learning_rate": 2.834201388888889e-05,
+      "loss": 2.5094,
+      "step": 500
+    },
+    {
+      "epoch": 1.328125,
+      "grad_norm": 0.280934602022171,
+      "learning_rate": 2.790798611111111e-05,
+      "loss": 2.5245,
+      "step": 510
+    },
+    {
+      "epoch": 1.3541666666666667,
+      "grad_norm": 0.2929544448852539,
+      "learning_rate": 2.7473958333333333e-05,
+      "loss": 2.5285,
+      "step": 520
+    },
+    {
+      "epoch": 1.3802083333333333,
+      "grad_norm": 0.32009249925613403,
+      "learning_rate": 2.7039930555555558e-05,
+      "loss": 2.5499,
+      "step": 530
+    },
+    {
+      "epoch": 1.40625,
+      "grad_norm": 0.290863960981369,
+      "learning_rate": 2.660590277777778e-05,
+      "loss": 2.5022,
+      "step": 540
+    },
+    {
+      "epoch": 1.4322916666666667,
+      "grad_norm": 0.3213239312171936,
+      "learning_rate": 2.6171875e-05,
+      "loss": 2.5902,
+      "step": 550
+    },
+    {
+      "epoch": 1.4583333333333333,
+      "grad_norm": 0.26357147097587585,
+      "learning_rate": 2.5737847222222222e-05,
+      "loss": 2.6207,
+      "step": 560
+    },
+    {
+      "epoch": 1.484375,
+      "grad_norm": 0.30386197566986084,
+      "learning_rate": 2.5303819444444444e-05,
+      "loss": 2.5154,
+      "step": 570
+    },
+    {
+      "epoch": 1.5104166666666665,
+      "grad_norm": 0.3122921288013458,
+      "learning_rate": 2.4869791666666665e-05,
+      "loss": 2.5318,
+      "step": 580
+    },
+    {
+      "epoch": 1.5364583333333335,
+      "grad_norm": 0.3065759241580963,
+      "learning_rate": 2.443576388888889e-05,
+      "loss": 2.5618,
+      "step": 590
+    },
+    {
+      "epoch": 1.5625,
+      "grad_norm": 0.2884944677352905,
+      "learning_rate": 2.400173611111111e-05,
+      "loss": 2.5246,
+      "step": 600
+    },
+    {
+      "epoch": 1.5885416666666665,
+      "grad_norm": 0.26818904280662537,
+      "learning_rate": 2.3567708333333336e-05,
+      "loss": 2.5426,
+      "step": 610
+    },
+    {
+      "epoch": 1.6145833333333335,
+      "grad_norm": 0.3531734347343445,
+      "learning_rate": 2.3133680555555558e-05,
+      "loss": 2.5363,
+      "step": 620
+    },
+    {
+      "epoch": 1.640625,
+      "grad_norm": 0.25133830308914185,
+      "learning_rate": 2.269965277777778e-05,
+      "loss": 2.536,
+      "step": 630
+    },
+    {
+      "epoch": 1.6666666666666665,
+      "grad_norm": 0.24893426895141602,
+      "learning_rate": 2.2265625e-05,
+      "loss": 2.567,
+      "step": 640
+    },
+    {
+      "epoch": 1.6927083333333335,
+      "grad_norm": 0.3459770083427429,
+      "learning_rate": 2.1831597222222222e-05,
+      "loss": 2.5626,
+      "step": 650
+    },
+    {
+      "epoch": 1.71875,
+      "grad_norm": 0.4507867097854614,
+      "learning_rate": 2.1397569444444447e-05,
+      "loss": 2.4232,
+      "step": 660
+    },
+    {
+      "epoch": 1.7447916666666665,
+      "grad_norm": 0.31911784410476685,
+      "learning_rate": 2.0963541666666665e-05,
+      "loss": 2.5649,
+      "step": 670
+    },
+    {
+      "epoch": 1.7708333333333335,
+      "grad_norm": 0.3345726728439331,
+      "learning_rate": 2.052951388888889e-05,
+      "loss": 2.4291,
+      "step": 680
+    },
+    {
+      "epoch": 1.796875,
+      "grad_norm": 0.3128361403942108,
+      "learning_rate": 2.009548611111111e-05,
+      "loss": 2.3886,
+      "step": 690
+    },
+    {
+      "epoch": 1.8229166666666665,
+      "grad_norm": 0.27449044585227966,
+      "learning_rate": 1.9661458333333336e-05,
+      "loss": 2.4424,
+      "step": 700
+    },
+    {
+      "epoch": 1.8489583333333335,
+      "grad_norm": 0.29805341362953186,
+      "learning_rate": 1.9227430555555558e-05,
+      "loss": 2.5046,
+      "step": 710
+    },
+    {
+      "epoch": 1.875,
+      "grad_norm": 0.28968825936317444,
+      "learning_rate": 1.879340277777778e-05,
+      "loss": 2.4431,
+      "step": 720
+    },
+    {
+      "epoch": 1.9010416666666665,
+      "grad_norm": 0.28389787673950195,
+      "learning_rate": 1.8359375e-05,
+      "loss": 2.5092,
+      "step": 730
+    },
+    {
+      "epoch": 1.9270833333333335,
+      "grad_norm": 0.2659839391708374,
+      "learning_rate": 1.7925347222222222e-05,
+      "loss": 2.4871,
+      "step": 740
+    },
+    {
+      "epoch": 1.953125,
+      "grad_norm": 0.33582159876823425,
+      "learning_rate": 1.7491319444444447e-05,
+      "loss": 2.5754,
+      "step": 750
+    },
+    {
+      "epoch": 1.9791666666666665,
+      "grad_norm": 0.2729295790195465,
+      "learning_rate": 1.7057291666666665e-05,
+      "loss": 2.4551,
+      "step": 760
+    },
+    {
+      "epoch": 2.0052083333333335,
+      "grad_norm": 0.2775850296020508,
+      "learning_rate": 1.662326388888889e-05,
+      "loss": 2.4781,
+      "step": 770
+    },
+    {
+      "epoch": 2.03125,
+      "grad_norm": 0.30065348744392395,
+      "learning_rate": 1.618923611111111e-05,
+      "loss": 2.467,
+      "step": 780
+    },
+    {
+      "epoch": 2.0572916666666665,
+      "grad_norm": 0.3142607510089874,
+      "learning_rate": 1.5755208333333336e-05,
+      "loss": 2.4902,
+      "step": 790
+    },
+    {
+      "epoch": 2.0833333333333335,
+      "grad_norm": 0.2734437882900238,
+      "learning_rate": 1.5321180555555554e-05,
+      "loss": 2.4821,
+      "step": 800
+    },
+    {
+      "epoch": 2.109375,
+      "grad_norm": 0.3251326084136963,
+      "learning_rate": 1.488715277777778e-05,
+      "loss": 2.4432,
+      "step": 810
+    },
+    {
+      "epoch": 2.1354166666666665,
+      "grad_norm": 0.2499406933784485,
+      "learning_rate": 1.4453125e-05,
+      "loss": 2.4716,
+      "step": 820
+    },
+    {
+      "epoch": 2.1614583333333335,
+      "grad_norm": 0.28514012694358826,
+      "learning_rate": 1.4019097222222222e-05,
+      "loss": 2.5084,
+      "step": 830
+    },
+    {
+      "epoch": 2.1875,
+      "grad_norm": 0.27635160088539124,
+      "learning_rate": 1.3585069444444445e-05,
+      "loss": 2.3607,
+      "step": 840
+    },
+    {
+      "epoch": 2.2135416666666665,
+      "grad_norm": 0.28442683815956116,
+      "learning_rate": 1.3151041666666667e-05,
+      "loss": 2.5007,
+      "step": 850
+    },
+    {
+      "epoch": 2.2395833333333335,
+      "grad_norm": 0.29502061009407043,
+      "learning_rate": 1.271701388888889e-05,
+      "loss": 2.4918,
+      "step": 860
+    },
+    {
+      "epoch": 2.265625,
+      "grad_norm": 0.35289624333381653,
+      "learning_rate": 1.2282986111111111e-05,
+      "loss": 2.4324,
+      "step": 870
+    },
+    {
+      "epoch": 2.2916666666666665,
+      "grad_norm": 0.2951031029224396,
+      "learning_rate": 1.1848958333333333e-05,
+      "loss": 2.5092,
+      "step": 880
+    },
+    {
+      "epoch": 2.3177083333333335,
+      "grad_norm": 0.2936709225177765,
+      "learning_rate": 1.1414930555555556e-05,
+      "loss": 2.439,
+      "step": 890
+    },
+    {
+      "epoch": 2.34375,
+      "grad_norm": 0.2847365438938141,
+      "learning_rate": 1.0980902777777777e-05,
+      "loss": 2.3859,
+      "step": 900
+    },
+    {
+      "epoch": 2.3697916666666665,
+      "grad_norm": 0.2546698749065399,
+      "learning_rate": 1.0546875e-05,
+      "loss": 2.5522,
+      "step": 910
+    },
+    {
+      "epoch": 2.3958333333333335,
+      "grad_norm": 0.39236539602279663,
+      "learning_rate": 1.0112847222222222e-05,
+      "loss": 2.3852,
+      "step": 920
+    },
+    {
+      "epoch": 2.421875,
+      "grad_norm": 0.2980665862560272,
+      "learning_rate": 9.678819444444445e-06,
+      "loss": 2.3363,
+      "step": 930
+    },
+    {
+      "epoch": 2.4479166666666665,
+      "grad_norm": 0.2813090682029724,
+      "learning_rate": 9.244791666666668e-06,
+      "loss": 2.4878,
+      "step": 940
+    },
+    {
+      "epoch": 2.4739583333333335,
+      "grad_norm": 0.26593634486198425,
+      "learning_rate": 8.81076388888889e-06,
+      "loss": 2.4595,
+      "step": 950
+    },
+    {
+      "epoch": 2.5,
+      "grad_norm": 0.3041106164455414,
+      "learning_rate": 8.376736111111111e-06,
+      "loss": 2.3665,
+      "step": 960
+    },
+    {
+      "epoch": 2.5260416666666665,
+      "grad_norm": 0.2596166133880615,
+      "learning_rate": 7.942708333333333e-06,
+      "loss": 2.4274,
+      "step": 970
+    },
+    {
+      "epoch": 2.5520833333333335,
+      "grad_norm": 0.2752944231033325,
+      "learning_rate": 7.508680555555556e-06,
+      "loss": 2.4852,
+      "step": 980
+    },
+    {
+      "epoch": 2.578125,
+      "grad_norm": 0.294866681098938,
+      "learning_rate": 7.074652777777778e-06,
+      "loss": 2.4531,
+      "step": 990
+    },
+    {
+      "epoch": 2.6041666666666665,
+      "grad_norm": 0.36473456025123596,
+      "learning_rate": 6.6406250000000005e-06,
+      "loss": 2.3718,
+      "step": 1000
+    },
+    {
+      "epoch": 2.6302083333333335,
+      "grad_norm": 0.2755633592605591,
+      "learning_rate": 6.206597222222223e-06,
+      "loss": 2.5185,
+      "step": 1010
+    },
+    {
+      "epoch": 2.65625,
+      "grad_norm": 0.30362269282341003,
+      "learning_rate": 5.772569444444445e-06,
+      "loss": 2.361,
+      "step": 1020
+    },
+    {
+      "epoch": 2.6822916666666665,
+      "grad_norm": 0.4947061240673065,
+      "learning_rate": 5.3385416666666666e-06,
+      "loss": 2.3851,
+      "step": 1030
+    },
+    {
+      "epoch": 2.7083333333333335,
+      "grad_norm": 0.29744017124176025,
+      "learning_rate": 4.904513888888889e-06,
+      "loss": 2.4882,
+      "step": 1040
+    },
+    {
+      "epoch": 2.734375,
+      "grad_norm": 0.32756978273391724,
+      "learning_rate": 4.470486111111111e-06,
+      "loss": 2.3541,
+      "step": 1050
+    },
+    {
+      "epoch": 2.7604166666666665,
+      "grad_norm": 0.26129186153411865,
+      "learning_rate": 4.0364583333333335e-06,
+      "loss": 2.3391,
+      "step": 1060
+    },
+    {
+      "epoch": 2.7864583333333335,
+      "grad_norm": 0.3421052396297455,
+      "learning_rate": 3.6024305555555554e-06,
+      "loss": 2.4271,
+      "step": 1070
+    },
+    {
+      "epoch": 2.8125,
+      "grad_norm": 0.4098067879676819,
+      "learning_rate": 3.1684027777777777e-06,
+      "loss": 2.4842,
+      "step": 1080
+    },
+    {
+      "epoch": 2.8385416666666665,
+      "grad_norm": 0.2739180028438568,
+      "learning_rate": 2.734375e-06,
+      "loss": 2.4398,
+      "step": 1090
+    },
+    {
+      "epoch": 2.8645833333333335,
+      "grad_norm": 0.3029952347278595,
+      "learning_rate": 2.3003472222222223e-06,
+      "loss": 2.51,
+      "step": 1100
+    },
+    {
+      "epoch": 2.890625,
+      "grad_norm": 0.40385085344314575,
+      "learning_rate": 1.8663194444444446e-06,
+      "loss": 2.4742,
+      "step": 1110
+    },
+    {
+      "epoch": 2.9166666666666665,
+      "grad_norm": 0.27774107456207275,
+      "learning_rate": 1.4322916666666667e-06,
+      "loss": 2.3773,
+      "step": 1120
+    },
+    {
+      "epoch": 2.9427083333333335,
+      "grad_norm": 0.2899167239665985,
+      "learning_rate": 9.982638888888888e-07,
+      "loss": 2.3983,
+      "step": 1130
+    },
+    {
+      "epoch": 2.96875,
+      "grad_norm": 0.3455342650413513,
+      "learning_rate": 5.642361111111111e-07,
+      "loss": 2.3833,
+      "step": 1140
+    },
+    {
+      "epoch": 2.9947916666666665,
+      "grad_norm": 0.2729035019874573,
+      "learning_rate": 1.3020833333333334e-07,
+      "loss": 2.4086,
+      "step": 1150
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 1152,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 8928096421478400.0,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}

smollm2-finetuned1/{checkpoint-192 → checkpoint-1152}/training_args.bin RENAMED Viewed

File without changes

smollm2-finetuned1/{checkpoint-576 → checkpoint-1152}/vocab.json RENAMED Viewed

File without changes

smollm2-finetuned1/checkpoint-384/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8bf151157d1f76e14f52e8de20b9e7d775ce94fec46cd9589f75b8f0805293dd
 size 3293480

 version https://git-lfs.github.com/spec/v1
+oid sha256:7bd5902451031a9fadf60e2a888eb0e66adcda2596a6502d299dcd64a750c919
 size 3293480

smollm2-finetuned1/checkpoint-384/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9bd86b123fed0dee5276a82fa1d2817f7342188e56060b1c9c2481f26e623ef2
 size 6661242

 version https://git-lfs.github.com/spec/v1
+oid sha256:25c1d42327c72e5c190de75f170a2a72a0fe24a4d8275e1eea5d2a30833c350b
 size 6661242

smollm2-finetuned1/checkpoint-384/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b02962bcb741e7aee4061cc720d2d3dca2b4a932ac83ffb3fbb613b2231eaaf7
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:ce52ea9381fce4fe46b9309b34bb790ee89cb0c7a21c055acc5f2accc64dd67d
 size 14244

smollm2-finetuned1/checkpoint-384/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:45dc0633de6c44e0714dfe6fe328cb8bf2f34f32c97bc8b6eac54e156ea5622d
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:e272daafda8b8d85ce869d4f83b7f9dd14252197b28e078d50e82ec2463234f9
 size 1064

smollm2-finetuned1/checkpoint-384/trainer_state.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 2.0,
   "eval_steps": 500,
   "global_step": 384,
   "is_hyper_param_search": false,
@@ -10,274 +10,274 @@
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 0.052083333333333336,
-      "grad_norm": 0.07655070722103119,
-      "learning_rate": 4.921875e-05,
-      "loss": 3.1871,
       "step": 10
     },
     {
-      "epoch": 0.10416666666666667,
-      "grad_norm": 0.09160477668046951,
-      "learning_rate": 4.835069444444444e-05,
-      "loss": 3.0592,
       "step": 20
     },
     {
-      "epoch": 0.15625,
-      "grad_norm": 0.09234557300806046,
-      "learning_rate": 4.748263888888889e-05,
-      "loss": 3.1982,
       "step": 30
     },
     {
-      "epoch": 0.20833333333333334,
-      "grad_norm": 0.09615059196949005,
-      "learning_rate": 4.6614583333333336e-05,
-      "loss": 3.1865,
       "step": 40
     },
     {
-      "epoch": 0.2604166666666667,
-      "grad_norm": 0.11428328603506088,
-      "learning_rate": 4.574652777777778e-05,
-      "loss": 3.1689,
       "step": 50
     },
     {
-      "epoch": 0.3125,
-      "grad_norm": 0.10111145675182343,
-      "learning_rate": 4.487847222222222e-05,
-      "loss": 3.0937,
       "step": 60
     },
     {
-      "epoch": 0.3645833333333333,
-      "grad_norm": 0.12738557159900665,
-      "learning_rate": 4.401041666666667e-05,
-      "loss": 3.143,
       "step": 70
     },
     {
-      "epoch": 0.4166666666666667,
-      "grad_norm": 0.14076603949069977,
-      "learning_rate": 4.3142361111111114e-05,
-      "loss": 3.0963,
       "step": 80
     },
     {
-      "epoch": 0.46875,
-      "grad_norm": 0.12179583311080933,
-      "learning_rate": 4.227430555555556e-05,
-      "loss": 3.0358,
       "step": 90
     },
     {
-      "epoch": 0.5208333333333334,
-      "grad_norm": 0.13192890584468842,
-      "learning_rate": 4.140625e-05,
-      "loss": 3.1517,
       "step": 100
     },
     {
-      "epoch": 0.5729166666666666,
-      "grad_norm": 0.12306984513998032,
-      "learning_rate": 4.053819444444444e-05,
-      "loss": 3.0126,
       "step": 110
     },
     {
-      "epoch": 0.625,
-      "grad_norm": 0.1288878321647644,
-      "learning_rate": 3.967013888888889e-05,
-      "loss": 3.0296,
       "step": 120
     },
     {
-      "epoch": 0.6770833333333334,
-      "grad_norm": 0.15506671369075775,
-      "learning_rate": 3.8802083333333336e-05,
-      "loss": 3.0453,
       "step": 130
     },
     {
-      "epoch": 0.7291666666666666,
-      "grad_norm": 0.14591804146766663,
-      "learning_rate": 3.793402777777778e-05,
-      "loss": 3.0611,
       "step": 140
     },
     {
-      "epoch": 0.78125,
-      "grad_norm": 0.14732398092746735,
-      "learning_rate": 3.706597222222222e-05,
-      "loss": 3.071,
       "step": 150
     },
     {
-      "epoch": 0.8333333333333334,
-      "grad_norm": 0.14069262146949768,
-      "learning_rate": 3.619791666666667e-05,
-      "loss": 3.0066,
       "step": 160
     },
     {
-      "epoch": 0.8854166666666666,
-      "grad_norm": 0.15924516320228577,
-      "learning_rate": 3.5329861111111114e-05,
-      "loss": 2.9758,
       "step": 170
     },
     {
-      "epoch": 0.9375,
-      "grad_norm": 0.16019809246063232,
-      "learning_rate": 3.446180555555556e-05,
-      "loss": 2.981,
       "step": 180
     },
     {
-      "epoch": 0.9895833333333334,
-      "grad_norm": 0.155264750123024,
-      "learning_rate": 3.359375e-05,
-      "loss": 2.9057,
       "step": 190
     },
     {
-      "epoch": 1.0416666666666667,
-      "grad_norm": 0.1712893545627594,
-      "learning_rate": 3.272569444444444e-05,
-      "loss": 2.9711,
       "step": 200
     },
     {
-      "epoch": 1.09375,
-      "grad_norm": 0.1758311241865158,
-      "learning_rate": 3.185763888888889e-05,
-      "loss": 2.9298,
       "step": 210
     },
     {
-      "epoch": 1.1458333333333333,
-      "grad_norm": 0.17173148691654205,
-      "learning_rate": 3.0989583333333336e-05,
-      "loss": 2.9685,
       "step": 220
     },
     {
-      "epoch": 1.1979166666666667,
-      "grad_norm": 0.19112636148929596,
-      "learning_rate": 3.0121527777777782e-05,
-      "loss": 2.9262,
       "step": 230
     },
     {
-      "epoch": 1.25,
-      "grad_norm": 0.18498270213603973,
-      "learning_rate": 2.925347222222222e-05,
-      "loss": 2.8788,
       "step": 240
     },
     {
-      "epoch": 1.3020833333333333,
-      "grad_norm": 0.1705554723739624,
-      "learning_rate": 2.838541666666667e-05,
-      "loss": 2.7591,
       "step": 250
     },
     {
-      "epoch": 1.3541666666666667,
-      "grad_norm": 0.19246017932891846,
-      "learning_rate": 2.751736111111111e-05,
-      "loss": 2.8625,
       "step": 260
     },
     {
-      "epoch": 1.40625,
-      "grad_norm": 0.17858931422233582,
-      "learning_rate": 2.664930555555556e-05,
-      "loss": 2.9046,
       "step": 270
     },
     {
-      "epoch": 1.4583333333333333,
-      "grad_norm": 0.21472914516925812,
-      "learning_rate": 2.578125e-05,
-      "loss": 2.8826,
       "step": 280
     },
     {
-      "epoch": 1.5104166666666665,
-      "grad_norm": 0.18860450387001038,
-      "learning_rate": 2.4913194444444446e-05,
-      "loss": 2.8265,
       "step": 290
     },
     {
-      "epoch": 1.5625,
-      "grad_norm": 0.17438200116157532,
-      "learning_rate": 2.404513888888889e-05,
-      "loss": 2.7819,
       "step": 300
     },
     {
-      "epoch": 1.6145833333333335,
-      "grad_norm": 0.23125068843364716,
-      "learning_rate": 2.3177083333333335e-05,
-      "loss": 2.8364,
       "step": 310
     },
     {
-      "epoch": 1.6666666666666665,
-      "grad_norm": 0.18903665244579315,
-      "learning_rate": 2.2309027777777782e-05,
-      "loss": 2.8423,
       "step": 320
     },
     {
-      "epoch": 1.71875,
-      "grad_norm": 0.22534161806106567,
-      "learning_rate": 2.144097222222222e-05,
-      "loss": 2.8666,
       "step": 330
     },
     {
-      "epoch": 1.7708333333333335,
-      "grad_norm": 0.240483358502388,
-      "learning_rate": 2.0572916666666668e-05,
-      "loss": 2.8409,
       "step": 340
     },
     {
-      "epoch": 1.8229166666666665,
-      "grad_norm": 0.23081159591674805,
-      "learning_rate": 1.970486111111111e-05,
-      "loss": 2.8257,
       "step": 350
     },
     {
-      "epoch": 1.875,
-      "grad_norm": 0.20717695355415344,
-      "learning_rate": 1.8836805555555557e-05,
-      "loss": 2.9202,
       "step": 360
     },
     {
-      "epoch": 1.9270833333333335,
-      "grad_norm": 0.21534092724323273,
-      "learning_rate": 1.796875e-05,
-      "loss": 2.7127,
       "step": 370
     },
     {
-      "epoch": 1.9791666666666665,
-      "grad_norm": 0.24049048125743866,
-      "learning_rate": 1.7100694444444446e-05,
-      "loss": 2.8734,
       "step": 380
     }
   ],
   "logging_steps": 10,
-  "max_steps": 576,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 3,
   "save_steps": 500,
@@ -293,7 +293,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2977001216409600.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.0,
   "eval_steps": 500,
   "global_step": 384,
   "is_hyper_param_search": false,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 0.026041666666666668,
+      "grad_norm": 0.07924782484769821,
+      "learning_rate": 4.9609375000000005e-05,
+      "loss": 3.1556,
       "step": 10
     },
     {
+      "epoch": 0.052083333333333336,
+      "grad_norm": 0.0875125601887703,
+      "learning_rate": 4.917534722222222e-05,
+      "loss": 3.1901,
       "step": 20
     },
     {
+      "epoch": 0.078125,
+      "grad_norm": 0.09327095746994019,
+      "learning_rate": 4.874131944444445e-05,
+      "loss": 3.1043,
       "step": 30
     },
     {
+      "epoch": 0.10416666666666667,
+      "grad_norm": 0.08811140060424805,
+      "learning_rate": 4.830729166666667e-05,
+      "loss": 3.065,
       "step": 40
     },
     {
+      "epoch": 0.13020833333333334,
+      "grad_norm": 0.09639778733253479,
+      "learning_rate": 4.787326388888889e-05,
+      "loss": 3.1739,
       "step": 50
     },
     {
+      "epoch": 0.15625,
+      "grad_norm": 0.12037681043148041,
+      "learning_rate": 4.7439236111111115e-05,
+      "loss": 3.0952,
       "step": 60
     },
     {
+      "epoch": 0.18229166666666666,
+      "grad_norm": 0.10899762064218521,
+      "learning_rate": 4.7005208333333334e-05,
+      "loss": 2.9885,
       "step": 70
     },
     {
+      "epoch": 0.20833333333333334,
+      "grad_norm": 0.10793007910251617,
+      "learning_rate": 4.657118055555556e-05,
+      "loss": 3.1401,
       "step": 80
     },
     {
+      "epoch": 0.234375,
+      "grad_norm": 0.13812898099422455,
+      "learning_rate": 4.613715277777778e-05,
+      "loss": 3.153,
       "step": 90
     },
     {
+      "epoch": 0.2604166666666667,
+      "grad_norm": 0.13644039630889893,
+      "learning_rate": 4.5703125e-05,
+      "loss": 3.0757,
       "step": 100
     },
     {
+      "epoch": 0.2864583333333333,
+      "grad_norm": 0.13385091722011566,
+      "learning_rate": 4.5269097222222226e-05,
+      "loss": 3.1576,
       "step": 110
     },
     {
+      "epoch": 0.3125,
+      "grad_norm": 0.13685277104377747,
+      "learning_rate": 4.4835069444444444e-05,
+      "loss": 3.0804,
       "step": 120
     },
     {
+      "epoch": 0.3385416666666667,
+      "grad_norm": 0.1641232818365097,
+      "learning_rate": 4.440104166666667e-05,
+      "loss": 3.105,
       "step": 130
     },
     {
+      "epoch": 0.3645833333333333,
+      "grad_norm": 0.14693030714988708,
+      "learning_rate": 4.3967013888888894e-05,
+      "loss": 3.0005,
       "step": 140
     },
     {
+      "epoch": 0.390625,
+      "grad_norm": 0.15054495632648468,
+      "learning_rate": 4.353298611111111e-05,
+      "loss": 3.0237,
       "step": 150
     },
     {
+      "epoch": 0.4166666666666667,
+      "grad_norm": 0.15571929514408112,
+      "learning_rate": 4.309895833333334e-05,
+      "loss": 2.9212,
       "step": 160
     },
     {
+      "epoch": 0.4427083333333333,
+      "grad_norm": 0.16245532035827637,
+      "learning_rate": 4.266493055555556e-05,
+      "loss": 2.9759,
       "step": 170
     },
     {
+      "epoch": 0.46875,
+      "grad_norm": 0.1736162304878235,
+      "learning_rate": 4.223090277777778e-05,
+      "loss": 2.9398,
       "step": 180
     },
     {
+      "epoch": 0.4947916666666667,
+      "grad_norm": 0.166653111577034,
+      "learning_rate": 4.1796875000000005e-05,
+      "loss": 2.9738,
       "step": 190
     },
     {
+      "epoch": 0.5208333333333334,
+      "grad_norm": 0.18301138281822205,
+      "learning_rate": 4.136284722222222e-05,
+      "loss": 2.8916,
       "step": 200
     },
     {
+      "epoch": 0.546875,
+      "grad_norm": 0.18230901658535004,
+      "learning_rate": 4.092881944444444e-05,
+      "loss": 2.9061,
       "step": 210
     },
     {
+      "epoch": 0.5729166666666666,
+      "grad_norm": 0.18205538392066956,
+      "learning_rate": 4.049479166666667e-05,
+      "loss": 2.8703,
       "step": 220
     },
     {
+      "epoch": 0.5989583333333334,
+      "grad_norm": 0.17894142866134644,
+      "learning_rate": 4.006076388888889e-05,
+      "loss": 2.808,
       "step": 230
     },
     {
+      "epoch": 0.625,
+      "grad_norm": 0.18798667192459106,
+      "learning_rate": 3.9626736111111115e-05,
+      "loss": 2.8614,
       "step": 240
     },
     {
+      "epoch": 0.6510416666666666,
+      "grad_norm": 0.19296622276306152,
+      "learning_rate": 3.919270833333333e-05,
+      "loss": 2.9261,
       "step": 250
     },
     {
+      "epoch": 0.6770833333333334,
+      "grad_norm": 0.19976121187210083,
+      "learning_rate": 3.875868055555556e-05,
+      "loss": 2.8094,
       "step": 260
     },
     {
+      "epoch": 0.703125,
+      "grad_norm": 0.17799632251262665,
+      "learning_rate": 3.832465277777778e-05,
+      "loss": 2.8358,
       "step": 270
     },
     {
+      "epoch": 0.7291666666666666,
+      "grad_norm": 0.2119487226009369,
+      "learning_rate": 3.7890625e-05,
+      "loss": 2.8203,
       "step": 280
     },
     {
+      "epoch": 0.7552083333333334,
+      "grad_norm": 0.20649544894695282,
+      "learning_rate": 3.7456597222222226e-05,
+      "loss": 2.7819,
       "step": 290
     },
     {
+      "epoch": 0.78125,
+      "grad_norm": 0.22266916930675507,
+      "learning_rate": 3.7022569444444444e-05,
+      "loss": 2.8192,
       "step": 300
     },
     {
+      "epoch": 0.8072916666666666,
+      "grad_norm": 0.23271456360816956,
+      "learning_rate": 3.658854166666667e-05,
+      "loss": 2.8706,
       "step": 310
     },
     {
+      "epoch": 0.8333333333333334,
+      "grad_norm": 0.2504468560218811,
+      "learning_rate": 3.6154513888888894e-05,
+      "loss": 2.7641,
       "step": 320
     },
     {
+      "epoch": 0.859375,
+      "grad_norm": 0.19909098744392395,
+      "learning_rate": 3.572048611111111e-05,
+      "loss": 2.6973,
       "step": 330
     },
     {
+      "epoch": 0.8854166666666666,
+      "grad_norm": 0.2656916081905365,
+      "learning_rate": 3.528645833333333e-05,
+      "loss": 2.6888,
       "step": 340
     },
     {
+      "epoch": 0.9114583333333334,
+      "grad_norm": 0.2115168273448944,
+      "learning_rate": 3.485243055555556e-05,
+      "loss": 2.7075,
       "step": 350
     },
     {
+      "epoch": 0.9375,
+      "grad_norm": 0.24468505382537842,
+      "learning_rate": 3.441840277777778e-05,
+      "loss": 2.7051,
       "step": 360
     },
     {
+      "epoch": 0.9635416666666666,
+      "grad_norm": 0.23581114411354065,
+      "learning_rate": 3.3984375000000004e-05,
+      "loss": 2.6773,
       "step": 370
     },
     {
+      "epoch": 0.9895833333333334,
+      "grad_norm": 0.27085772156715393,
+      "learning_rate": 3.355034722222222e-05,
+      "loss": 2.6959,
       "step": 380
     }
   ],
   "logging_steps": 10,
+  "max_steps": 1152,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 3,
   "save_steps": 500,
       "attributes": {}
     }
   },
+  "total_flos": 2976032140492800.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

smollm2-finetuned1/checkpoint-768/README.md ADDED Viewed

	@@ -0,0 +1,207 @@

+---
+base_model: HuggingFaceTB/SmolLM2-360M
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:HuggingFaceTB/SmolLM2-360M
+- lora
+- transformers
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.16.0

smollm2-finetuned1/checkpoint-768/adapter_config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "HuggingFaceTB/SmolLM2-360M",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "qalora_group_size": 16,
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

smollm2-finetuned1/checkpoint-768/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:58406cf2d1290201630406036eec799e565b6ca153b8100215bc4387f35a5df6
+size 3293480

smollm2-finetuned1/checkpoint-768/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

smollm2-finetuned1/checkpoint-768/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1fa1e836a84b9a1ebbea478846b80efd1b23db213c24f915698979f11fa75863
+size 6661242

smollm2-finetuned1/checkpoint-768/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dd9d7efa893c250ec39a1407465ef10a1035a3a93c795f0192103385a15b8e70
+size 14244

smollm2-finetuned1/checkpoint-768/scaler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a221f4673c0de0f105e2875e3402fd11dccaed60257e2c0d2036f94ab12c0bb9
+size 988

smollm2-finetuned1/checkpoint-768/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ce304b863670f40be984cea25919bac01aad8bf9da84e2306ebe857b78098546
+size 1064

smollm2-finetuned1/checkpoint-768/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+  "additional_special_tokens": [
+    "<|endoftext|>",
+    "<|im_start|>",
+    "<|im_end|>",
+    "<repo_name>",
+    "<reponame>",
+    "<file_sep>",
+    "<filename>",
+    "<gh_stars>",
+    "<issue_start>",
+    "<issue_comment>",
+    "<issue_closed>",
+    "<jupyter_start>",
+    "<jupyter_text>",
+    "<jupyter_code>",
+    "<jupyter_output>",
+    "<jupyter_script>",
+    "<empty_output>"
+  ],
+  "bos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<|endoftext|>",
+  "unk_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

smollm2-finetuned1/checkpoint-768/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

smollm2-finetuned1/checkpoint-768/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,169 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<repo_name>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "<reponame>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "5": {
+      "content": "<file_sep>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "6": {
+      "content": "<filename>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "7": {
+      "content": "<gh_stars>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "8": {
+      "content": "<issue_start>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "9": {
+      "content": "<issue_comment>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "10": {
+      "content": "<issue_closed>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "11": {
+      "content": "<jupyter_start>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "12": {
+      "content": "<jupyter_text>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "13": {
+      "content": "<jupyter_code>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "14": {
+      "content": "<jupyter_output>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "15": {
+      "content": "<jupyter_script>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "16": {
+      "content": "<empty_output>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "<|endoftext|>",
+    "<|im_start|>",
+    "<|im_end|>",
+    "<repo_name>",
+    "<reponame>",
+    "<file_sep>",
+    "<filename>",
+    "<gh_stars>",
+    "<issue_start>",
+    "<issue_comment>",
+    "<issue_closed>",
+    "<jupyter_start>",
+    "<jupyter_text>",
+    "<jupyter_code>",
+    "<jupyter_output>",
+    "<jupyter_script>",
+    "<empty_output>"
+  ],
+  "bos_token": "<|endoftext|>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|endoftext|>",
+  "extra_special_tokens": {},
+  "model_max_length": 8192,
+  "pad_token": "<|endoftext|>",
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "<|endoftext|>",
+  "vocab_size": 49152
+}

smollm2-finetuned1/checkpoint-768/trainer_state.json ADDED Viewed

	@@ -0,0 +1,566 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 768,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.026041666666666668,
+      "grad_norm": 0.07924782484769821,
+      "learning_rate": 4.9609375000000005e-05,
+      "loss": 3.1556,
+      "step": 10
+    },
+    {
+      "epoch": 0.052083333333333336,
+      "grad_norm": 0.0875125601887703,
+      "learning_rate": 4.917534722222222e-05,
+      "loss": 3.1901,
+      "step": 20
+    },
+    {
+      "epoch": 0.078125,
+      "grad_norm": 0.09327095746994019,
+      "learning_rate": 4.874131944444445e-05,
+      "loss": 3.1043,
+      "step": 30
+    },
+    {
+      "epoch": 0.10416666666666667,
+      "grad_norm": 0.08811140060424805,
+      "learning_rate": 4.830729166666667e-05,
+      "loss": 3.065,
+      "step": 40
+    },
+    {
+      "epoch": 0.13020833333333334,
+      "grad_norm": 0.09639778733253479,
+      "learning_rate": 4.787326388888889e-05,
+      "loss": 3.1739,
+      "step": 50
+    },
+    {
+      "epoch": 0.15625,
+      "grad_norm": 0.12037681043148041,
+      "learning_rate": 4.7439236111111115e-05,
+      "loss": 3.0952,
+      "step": 60
+    },
+    {
+      "epoch": 0.18229166666666666,
+      "grad_norm": 0.10899762064218521,
+      "learning_rate": 4.7005208333333334e-05,
+      "loss": 2.9885,
+      "step": 70
+    },
+    {
+      "epoch": 0.20833333333333334,
+      "grad_norm": 0.10793007910251617,
+      "learning_rate": 4.657118055555556e-05,
+      "loss": 3.1401,
+      "step": 80
+    },
+    {
+      "epoch": 0.234375,
+      "grad_norm": 0.13812898099422455,
+      "learning_rate": 4.613715277777778e-05,
+      "loss": 3.153,
+      "step": 90
+    },
+    {
+      "epoch": 0.2604166666666667,
+      "grad_norm": 0.13644039630889893,
+      "learning_rate": 4.5703125e-05,
+      "loss": 3.0757,
+      "step": 100
+    },
+    {
+      "epoch": 0.2864583333333333,
+      "grad_norm": 0.13385091722011566,
+      "learning_rate": 4.5269097222222226e-05,
+      "loss": 3.1576,
+      "step": 110
+    },
+    {
+      "epoch": 0.3125,
+      "grad_norm": 0.13685277104377747,
+      "learning_rate": 4.4835069444444444e-05,
+      "loss": 3.0804,
+      "step": 120
+    },
+    {
+      "epoch": 0.3385416666666667,
+      "grad_norm": 0.1641232818365097,
+      "learning_rate": 4.440104166666667e-05,
+      "loss": 3.105,
+      "step": 130
+    },
+    {
+      "epoch": 0.3645833333333333,
+      "grad_norm": 0.14693030714988708,
+      "learning_rate": 4.3967013888888894e-05,
+      "loss": 3.0005,
+      "step": 140
+    },
+    {
+      "epoch": 0.390625,
+      "grad_norm": 0.15054495632648468,
+      "learning_rate": 4.353298611111111e-05,
+      "loss": 3.0237,
+      "step": 150
+    },
+    {
+      "epoch": 0.4166666666666667,
+      "grad_norm": 0.15571929514408112,
+      "learning_rate": 4.309895833333334e-05,
+      "loss": 2.9212,
+      "step": 160
+    },
+    {
+      "epoch": 0.4427083333333333,
+      "grad_norm": 0.16245532035827637,
+      "learning_rate": 4.266493055555556e-05,
+      "loss": 2.9759,
+      "step": 170
+    },
+    {
+      "epoch": 0.46875,
+      "grad_norm": 0.1736162304878235,
+      "learning_rate": 4.223090277777778e-05,
+      "loss": 2.9398,
+      "step": 180
+    },
+    {
+      "epoch": 0.4947916666666667,
+      "grad_norm": 0.166653111577034,
+      "learning_rate": 4.1796875000000005e-05,
+      "loss": 2.9738,
+      "step": 190
+    },
+    {
+      "epoch": 0.5208333333333334,
+      "grad_norm": 0.18301138281822205,
+      "learning_rate": 4.136284722222222e-05,
+      "loss": 2.8916,
+      "step": 200
+    },
+    {
+      "epoch": 0.546875,
+      "grad_norm": 0.18230901658535004,
+      "learning_rate": 4.092881944444444e-05,
+      "loss": 2.9061,
+      "step": 210
+    },
+    {
+      "epoch": 0.5729166666666666,
+      "grad_norm": 0.18205538392066956,
+      "learning_rate": 4.049479166666667e-05,
+      "loss": 2.8703,
+      "step": 220
+    },
+    {
+      "epoch": 0.5989583333333334,
+      "grad_norm": 0.17894142866134644,
+      "learning_rate": 4.006076388888889e-05,
+      "loss": 2.808,
+      "step": 230
+    },
+    {
+      "epoch": 0.625,
+      "grad_norm": 0.18798667192459106,
+      "learning_rate": 3.9626736111111115e-05,
+      "loss": 2.8614,
+      "step": 240
+    },
+    {
+      "epoch": 0.6510416666666666,
+      "grad_norm": 0.19296622276306152,
+      "learning_rate": 3.919270833333333e-05,
+      "loss": 2.9261,
+      "step": 250
+    },
+    {
+      "epoch": 0.6770833333333334,
+      "grad_norm": 0.19976121187210083,
+      "learning_rate": 3.875868055555556e-05,
+      "loss": 2.8094,
+      "step": 260
+    },
+    {
+      "epoch": 0.703125,
+      "grad_norm": 0.17799632251262665,
+      "learning_rate": 3.832465277777778e-05,
+      "loss": 2.8358,
+      "step": 270
+    },
+    {
+      "epoch": 0.7291666666666666,
+      "grad_norm": 0.2119487226009369,
+      "learning_rate": 3.7890625e-05,
+      "loss": 2.8203,
+      "step": 280
+    },
+    {
+      "epoch": 0.7552083333333334,
+      "grad_norm": 0.20649544894695282,
+      "learning_rate": 3.7456597222222226e-05,
+      "loss": 2.7819,
+      "step": 290
+    },
+    {
+      "epoch": 0.78125,
+      "grad_norm": 0.22266916930675507,
+      "learning_rate": 3.7022569444444444e-05,
+      "loss": 2.8192,
+      "step": 300
+    },
+    {
+      "epoch": 0.8072916666666666,
+      "grad_norm": 0.23271456360816956,
+      "learning_rate": 3.658854166666667e-05,
+      "loss": 2.8706,
+      "step": 310
+    },
+    {
+      "epoch": 0.8333333333333334,
+      "grad_norm": 0.2504468560218811,
+      "learning_rate": 3.6154513888888894e-05,
+      "loss": 2.7641,
+      "step": 320
+    },
+    {
+      "epoch": 0.859375,
+      "grad_norm": 0.19909098744392395,
+      "learning_rate": 3.572048611111111e-05,
+      "loss": 2.6973,
+      "step": 330
+    },
+    {
+      "epoch": 0.8854166666666666,
+      "grad_norm": 0.2656916081905365,
+      "learning_rate": 3.528645833333333e-05,
+      "loss": 2.6888,
+      "step": 340
+    },
+    {
+      "epoch": 0.9114583333333334,
+      "grad_norm": 0.2115168273448944,
+      "learning_rate": 3.485243055555556e-05,
+      "loss": 2.7075,
+      "step": 350
+    },
+    {
+      "epoch": 0.9375,
+      "grad_norm": 0.24468505382537842,
+      "learning_rate": 3.441840277777778e-05,
+      "loss": 2.7051,
+      "step": 360
+    },
+    {
+      "epoch": 0.9635416666666666,
+      "grad_norm": 0.23581114411354065,
+      "learning_rate": 3.3984375000000004e-05,
+      "loss": 2.6773,
+      "step": 370
+    },
+    {
+      "epoch": 0.9895833333333334,
+      "grad_norm": 0.27085772156715393,
+      "learning_rate": 3.355034722222222e-05,
+      "loss": 2.6959,
+      "step": 380
+    },
+    {
+      "epoch": 1.015625,
+      "grad_norm": 0.22150537371635437,
+      "learning_rate": 3.311631944444444e-05,
+      "loss": 2.5918,
+      "step": 390
+    },
+    {
+      "epoch": 1.0416666666666667,
+      "grad_norm": 0.22745287418365479,
+      "learning_rate": 3.268229166666667e-05,
+      "loss": 2.5933,
+      "step": 400
+    },
+    {
+      "epoch": 1.0677083333333333,
+      "grad_norm": 0.29524073004722595,
+      "learning_rate": 3.224826388888889e-05,
+      "loss": 2.659,
+      "step": 410
+    },
+    {
+      "epoch": 1.09375,
+      "grad_norm": 0.26419463753700256,
+      "learning_rate": 3.1814236111111115e-05,
+      "loss": 2.615,
+      "step": 420
+    },
+    {
+      "epoch": 1.1197916666666667,
+      "grad_norm": 0.3234228789806366,
+      "learning_rate": 3.138020833333333e-05,
+      "loss": 2.5106,
+      "step": 430
+    },
+    {
+      "epoch": 1.1458333333333333,
+      "grad_norm": 0.28176596760749817,
+      "learning_rate": 3.094618055555556e-05,
+      "loss": 2.6551,
+      "step": 440
+    },
+    {
+      "epoch": 1.171875,
+      "grad_norm": 0.2584069073200226,
+      "learning_rate": 3.051215277777778e-05,
+      "loss": 2.6053,
+      "step": 450
+    },
+    {
+      "epoch": 1.1979166666666667,
+      "grad_norm": 0.22455713152885437,
+      "learning_rate": 3.0078125e-05,
+      "loss": 2.5743,
+      "step": 460
+    },
+    {
+      "epoch": 1.2239583333333333,
+      "grad_norm": 0.2834200859069824,
+      "learning_rate": 2.9644097222222222e-05,
+      "loss": 2.5535,
+      "step": 470
+    },
+    {
+      "epoch": 1.25,
+      "grad_norm": 0.26442793011665344,
+      "learning_rate": 2.9210069444444444e-05,
+      "loss": 2.5343,
+      "step": 480
+    },
+    {
+      "epoch": 1.2760416666666667,
+      "grad_norm": 0.2931734323501587,
+      "learning_rate": 2.877604166666667e-05,
+      "loss": 2.6204,
+      "step": 490
+    },
+    {
+      "epoch": 1.3020833333333333,
+      "grad_norm": 0.3041195571422577,
+      "learning_rate": 2.834201388888889e-05,
+      "loss": 2.5094,
+      "step": 500
+    },
+    {
+      "epoch": 1.328125,
+      "grad_norm": 0.280934602022171,
+      "learning_rate": 2.790798611111111e-05,
+      "loss": 2.5245,
+      "step": 510
+    },
+    {
+      "epoch": 1.3541666666666667,
+      "grad_norm": 0.2929544448852539,
+      "learning_rate": 2.7473958333333333e-05,
+      "loss": 2.5285,
+      "step": 520
+    },
+    {
+      "epoch": 1.3802083333333333,
+      "grad_norm": 0.32009249925613403,
+      "learning_rate": 2.7039930555555558e-05,
+      "loss": 2.5499,
+      "step": 530
+    },
+    {
+      "epoch": 1.40625,
+      "grad_norm": 0.290863960981369,
+      "learning_rate": 2.660590277777778e-05,
+      "loss": 2.5022,
+      "step": 540
+    },
+    {
+      "epoch": 1.4322916666666667,
+      "grad_norm": 0.3213239312171936,
+      "learning_rate": 2.6171875e-05,
+      "loss": 2.5902,
+      "step": 550
+    },
+    {
+      "epoch": 1.4583333333333333,
+      "grad_norm": 0.26357147097587585,
+      "learning_rate": 2.5737847222222222e-05,
+      "loss": 2.6207,
+      "step": 560
+    },
+    {
+      "epoch": 1.484375,
+      "grad_norm": 0.30386197566986084,
+      "learning_rate": 2.5303819444444444e-05,
+      "loss": 2.5154,
+      "step": 570
+    },
+    {
+      "epoch": 1.5104166666666665,
+      "grad_norm": 0.3122921288013458,
+      "learning_rate": 2.4869791666666665e-05,
+      "loss": 2.5318,
+      "step": 580
+    },
+    {
+      "epoch": 1.5364583333333335,
+      "grad_norm": 0.3065759241580963,
+      "learning_rate": 2.443576388888889e-05,
+      "loss": 2.5618,
+      "step": 590
+    },
+    {
+      "epoch": 1.5625,
+      "grad_norm": 0.2884944677352905,
+      "learning_rate": 2.400173611111111e-05,
+      "loss": 2.5246,
+      "step": 600
+    },
+    {
+      "epoch": 1.5885416666666665,
+      "grad_norm": 0.26818904280662537,
+      "learning_rate": 2.3567708333333336e-05,
+      "loss": 2.5426,
+      "step": 610
+    },
+    {
+      "epoch": 1.6145833333333335,
+      "grad_norm": 0.3531734347343445,
+      "learning_rate": 2.3133680555555558e-05,
+      "loss": 2.5363,
+      "step": 620
+    },
+    {
+      "epoch": 1.640625,
+      "grad_norm": 0.25133830308914185,
+      "learning_rate": 2.269965277777778e-05,
+      "loss": 2.536,
+      "step": 630
+    },
+    {
+      "epoch": 1.6666666666666665,
+      "grad_norm": 0.24893426895141602,
+      "learning_rate": 2.2265625e-05,
+      "loss": 2.567,
+      "step": 640
+    },
+    {
+      "epoch": 1.6927083333333335,
+      "grad_norm": 0.3459770083427429,
+      "learning_rate": 2.1831597222222222e-05,
+      "loss": 2.5626,
+      "step": 650
+    },
+    {
+      "epoch": 1.71875,
+      "grad_norm": 0.4507867097854614,
+      "learning_rate": 2.1397569444444447e-05,
+      "loss": 2.4232,
+      "step": 660
+    },
+    {
+      "epoch": 1.7447916666666665,
+      "grad_norm": 0.31911784410476685,
+      "learning_rate": 2.0963541666666665e-05,
+      "loss": 2.5649,
+      "step": 670
+    },
+    {
+      "epoch": 1.7708333333333335,
+      "grad_norm": 0.3345726728439331,
+      "learning_rate": 2.052951388888889e-05,
+      "loss": 2.4291,
+      "step": 680
+    },
+    {
+      "epoch": 1.796875,
+      "grad_norm": 0.3128361403942108,
+      "learning_rate": 2.009548611111111e-05,
+      "loss": 2.3886,
+      "step": 690
+    },
+    {
+      "epoch": 1.8229166666666665,
+      "grad_norm": 0.27449044585227966,
+      "learning_rate": 1.9661458333333336e-05,
+      "loss": 2.4424,
+      "step": 700
+    },
+    {
+      "epoch": 1.8489583333333335,
+      "grad_norm": 0.29805341362953186,
+      "learning_rate": 1.9227430555555558e-05,
+      "loss": 2.5046,
+      "step": 710
+    },
+    {
+      "epoch": 1.875,
+      "grad_norm": 0.28968825936317444,
+      "learning_rate": 1.879340277777778e-05,
+      "loss": 2.4431,
+      "step": 720
+    },
+    {
+      "epoch": 1.9010416666666665,
+      "grad_norm": 0.28389787673950195,
+      "learning_rate": 1.8359375e-05,
+      "loss": 2.5092,
+      "step": 730
+    },
+    {
+      "epoch": 1.9270833333333335,
+      "grad_norm": 0.2659839391708374,
+      "learning_rate": 1.7925347222222222e-05,
+      "loss": 2.4871,
+      "step": 740
+    },
+    {
+      "epoch": 1.953125,
+      "grad_norm": 0.33582159876823425,
+      "learning_rate": 1.7491319444444447e-05,
+      "loss": 2.5754,
+      "step": 750
+    },
+    {
+      "epoch": 1.9791666666666665,
+      "grad_norm": 0.2729295790195465,
+      "learning_rate": 1.7057291666666665e-05,
+      "loss": 2.4551,
+      "step": 760
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 1152,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 5952064280985600.0,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}

smollm2-finetuned1/{checkpoint-576 → checkpoint-768}/training_args.bin RENAMED Viewed

File without changes

smollm2-finetuned1/checkpoint-768/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

smollm2-finetuned12/checkpoint-192/README.md ADDED Viewed

	@@ -0,0 +1,207 @@

+---
+base_model: HuggingFaceTB/SmolLM2-360M
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:HuggingFaceTB/SmolLM2-360M
+- lora
+- transformers
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.16.0

smollm2-finetuned12/checkpoint-192/adapter_config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "HuggingFaceTB/SmolLM2-360M",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "qalora_group_size": 16,
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

{smollm2-finetuned1 → smollm2-finetuned12}/checkpoint-192/adapter_model.safetensors RENAMED Viewed

File without changes

smollm2-finetuned12/checkpoint-192/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

{smollm2-finetuned1 → smollm2-finetuned12}/checkpoint-192/optimizer.pt RENAMED Viewed

File without changes

{smollm2-finetuned1 → smollm2-finetuned12}/checkpoint-192/rng_state.pth RENAMED Viewed

File without changes

{smollm2-finetuned1 → smollm2-finetuned12}/checkpoint-192/scaler.pt RENAMED Viewed

File without changes

{smollm2-finetuned1 → smollm2-finetuned12}/checkpoint-192/scheduler.pt RENAMED Viewed

File without changes