sravanthib commited on
Commit
15f4c1c
·
verified ·
1 Parent(s): 1e77c97

Training completed

Browse files
Files changed (4) hide show
  1. README.md +1 -1
  2. all_results.json +5 -5
  3. train_results.json +5 -5
  4. trainer_state.json +14 -7
README.md CHANGED
@@ -46,7 +46,7 @@ The following hyperparameters were used during training:
46
  - total_train_batch_size: 160
47
  - total_eval_batch_size: 64
48
  - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
49
- - lr_scheduler_type: cosine
50
  - lr_scheduler_warmup_ratio: 0.03
51
  - training_steps: 10
52
 
 
46
  - total_train_batch_size: 160
47
  - total_eval_batch_size: 64
48
  - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
49
+ - lr_scheduler_type: linear
50
  - lr_scheduler_warmup_ratio: 0.03
51
  - training_steps: 10
52
 
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 0.0182648401826484,
3
- "total_flos": 8.713180396545638e+16,
4
- "train_loss": 9.561991882324218,
5
- "train_runtime": 174.8934,
6
- "train_samples_per_second": 9.148,
7
- "train_steps_per_second": 0.057
8
  }
 
1
  {
2
  "epoch": 0.0182648401826484,
3
+ "total_flos": 1.394108846267433e+17,
4
+ "train_loss": 5.07354736328125,
5
+ "train_runtime": 154.4303,
6
+ "train_samples_per_second": 10.361,
7
+ "train_steps_per_second": 0.065
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 0.0182648401826484,
3
- "total_flos": 8.713180396545638e+16,
4
- "train_loss": 9.561991882324218,
5
- "train_runtime": 174.8934,
6
- "train_samples_per_second": 9.148,
7
- "train_steps_per_second": 0.057
8
  }
 
1
  {
2
  "epoch": 0.0182648401826484,
3
+ "total_flos": 1.394108846267433e+17,
4
+ "train_loss": 5.07354736328125,
5
+ "train_runtime": 154.4303,
6
+ "train_samples_per_second": 10.361,
7
+ "train_steps_per_second": 0.065
8
  }
trainer_state.json CHANGED
@@ -9,17 +9,24 @@
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
 
 
 
 
 
 
 
12
  {
13
  "epoch": 0.0182648401826484,
14
  "step": 10,
15
- "total_flos": 8.713180396545638e+16,
16
- "train_loss": 9.561991882324218,
17
- "train_runtime": 174.8934,
18
- "train_samples_per_second": 9.148,
19
- "train_steps_per_second": 0.057
20
  }
21
  ],
22
- "logging_steps": 50,
23
  "max_steps": 10,
24
  "num_input_tokens_seen": 0,
25
  "num_train_epochs": 1,
@@ -36,7 +43,7 @@
36
  "attributes": {}
37
  }
38
  },
39
- "total_flos": 8.713180396545638e+16,
40
  "train_batch_size": 2,
41
  "trial_name": null,
42
  "trial_params": null
 
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
+ {
13
+ "epoch": 0.0182648401826484,
14
+ "grad_norm": 0.36667829751968384,
15
+ "learning_rate": 0.0001,
16
+ "loss": 5.0735,
17
+ "step": 10
18
+ },
19
  {
20
  "epoch": 0.0182648401826484,
21
  "step": 10,
22
+ "total_flos": 1.394108846267433e+17,
23
+ "train_loss": 5.07354736328125,
24
+ "train_runtime": 154.4303,
25
+ "train_samples_per_second": 10.361,
26
+ "train_steps_per_second": 0.065
27
  }
28
  ],
29
+ "logging_steps": 10,
30
  "max_steps": 10,
31
  "num_input_tokens_seen": 0,
32
  "num_train_epochs": 1,
 
43
  "attributes": {}
44
  }
45
  },
46
+ "total_flos": 1.394108846267433e+17,
47
  "train_batch_size": 2,
48
  "trial_name": null,
49
  "trial_params": null