File size: 1,547 Bytes
3e02bbf
 
 
3ef6ac1
3e02bbf
8cf4366
3e02bbf
 
 
 
 
3ef6ac1
8cf4366
3ef6ac1
3e02bbf
 
 
3ef6ac1
8cf4366
3ef6ac1
3e02bbf
 
 
3ef6ac1
 
 
 
 
 
 
 
 
8cf4366
3ef6ac1
3e02bbf
 
 
3ef6ac1
8cf4366
3ef6ac1
3e02bbf
 
 
3ef6ac1
 
 
 
 
3e02bbf
 
 
3ef6ac1
8cf4366
3ef6ac1
 
 
 
 
3e02bbf
 
 
8cf4366
3e02bbf
3ef6ac1
3e02bbf
3ef6ac1
3e02bbf
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.92,
  "eval_steps": 500,
  "global_step": 18,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.11,
      "learning_rate": 0.0001,
      "loss": 2.0929,
      "step": 1
    },
    {
      "epoch": 0.53,
      "learning_rate": 0.00018314696123025454,
      "loss": 1.5756,
      "step": 5
    },
    {
      "epoch": 0.96,
      "eval_loss": 0.2955792546272278,
      "eval_runtime": 7.0548,
      "eval_samples_per_second": 14.175,
      "eval_steps_per_second": 1.843,
      "step": 9
    },
    {
      "epoch": 1.07,
      "learning_rate": 0.0001,
      "loss": 0.4344,
      "step": 10
    },
    {
      "epoch": 1.6,
      "learning_rate": 1.6853038769745467e-05,
      "loss": 0.2474,
      "step": 15
    },
    {
      "epoch": 1.92,
      "eval_loss": 0.24160662293434143,
      "eval_runtime": 6.0397,
      "eval_samples_per_second": 16.557,
      "eval_steps_per_second": 2.152,
      "step": 18
    },
    {
      "epoch": 1.92,
      "step": 18,
      "total_flos": 19174999326720.0,
      "train_loss": 0.692977637052536,
      "train_runtime": 275.1049,
      "train_samples_per_second": 2.181,
      "train_steps_per_second": 0.065
    }
  ],
  "logging_steps": 5,
  "max_steps": 18,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 2,
  "save_steps": 500,
  "total_flos": 19174999326720.0,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}