antonpolishko commited on
Commit
d5745e7
·
verified ·
1 Parent(s): 7c471b2

Model save

Browse files
README.md CHANGED
@@ -27,7 +27,7 @@ print(output["generated_text"])
27
 
28
  ## Training procedure
29
 
30
- [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/polyagent/huggingface/runs/26qriwow)
31
 
32
  This model was trained with SFT.
33
 
 
27
 
28
  ## Training procedure
29
 
30
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/polyagent/huggingface/runs/y5uti29q)
31
 
32
  This model was trained with SFT.
33
 
all_results.json CHANGED
@@ -2,8 +2,8 @@
2
  "epoch": 3.0,
3
  "total_flos": 5.361516984661967e+18,
4
  "train_loss": 5.0057218712328115,
5
- "train_runtime": 5286.5666,
6
  "train_samples": 95663,
7
- "train_samples_per_second": 9.883,
8
- "train_steps_per_second": 0.155
9
  }
 
2
  "epoch": 3.0,
3
  "total_flos": 5.361516984661967e+18,
4
  "train_loss": 5.0057218712328115,
5
+ "train_runtime": 5088.8536,
6
  "train_samples": 95663,
7
+ "train_samples_per_second": 10.267,
8
+ "train_steps_per_second": 0.161
9
  }
runs/Dec26_13-37-26_mia1-gpu-110/events.out.tfevents.1735220304.mia1-gpu-110.3868266.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f3f91fe0fec6ba8b51210a5dc7b4913f9ca9c102ec0d49ff5266443641ace8c4
3
- size 41013
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4cb7415ed71bc4ed48c3eb38d592b50b0a560f5415b73602343827db106d79c
3
+ size 41367
train_results.json CHANGED
@@ -2,8 +2,8 @@
2
  "epoch": 3.0,
3
  "total_flos": 5.361516984661967e+18,
4
  "train_loss": 5.0057218712328115,
5
- "train_runtime": 5286.5666,
6
  "train_samples": 95663,
7
- "train_samples_per_second": 9.883,
8
- "train_steps_per_second": 0.155
9
  }
 
2
  "epoch": 3.0,
3
  "total_flos": 5.361516984661967e+18,
4
  "train_loss": 5.0057218712328115,
5
+ "train_runtime": 5088.8536,
6
  "train_samples": 95663,
7
+ "train_samples_per_second": 10.267,
8
+ "train_steps_per_second": 0.161
9
  }
trainer_state.json CHANGED
@@ -438,9 +438,9 @@
438
  {
439
  "epoch": 1.098901098901099,
440
  "eval_loss": 4.895308017730713,
441
- "eval_runtime": 14.6132,
442
- "eval_samples_per_second": 36.2,
443
- "eval_steps_per_second": 1.163,
444
  "step": 300
445
  },
446
  {
@@ -866,9 +866,9 @@
866
  {
867
  "epoch": 2.197802197802198,
868
  "eval_loss": 4.718916893005371,
869
- "eval_runtime": 14.5971,
870
- "eval_samples_per_second": 36.24,
871
- "eval_steps_per_second": 1.165,
872
  "step": 600
873
  },
874
  {
@@ -1177,9 +1177,9 @@
1177
  "step": 819,
1178
  "total_flos": 5.361516984661967e+18,
1179
  "train_loss": 5.0057218712328115,
1180
- "train_runtime": 5286.5666,
1181
- "train_samples_per_second": 9.883,
1182
- "train_steps_per_second": 0.155
1183
  }
1184
  ],
1185
  "logging_steps": 5,
 
438
  {
439
  "epoch": 1.098901098901099,
440
  "eval_loss": 4.895308017730713,
441
+ "eval_runtime": 14.5245,
442
+ "eval_samples_per_second": 36.421,
443
+ "eval_steps_per_second": 1.17,
444
  "step": 300
445
  },
446
  {
 
866
  {
867
  "epoch": 2.197802197802198,
868
  "eval_loss": 4.718916893005371,
869
+ "eval_runtime": 14.5284,
870
+ "eval_samples_per_second": 36.412,
871
+ "eval_steps_per_second": 1.17,
872
  "step": 600
873
  },
874
  {
 
1177
  "step": 819,
1178
  "total_flos": 5.361516984661967e+18,
1179
  "train_loss": 5.0057218712328115,
1180
+ "train_runtime": 5088.8536,
1181
+ "train_samples_per_second": 10.267,
1182
+ "train_steps_per_second": 0.161
1183
  }
1184
  ],
1185
  "logging_steps": 5,