File size: 2,711 Bytes
cdcda1e
 
 
 
 
a9ed7f0
cdcda1e
 
 
 
abd3264
 
a9ed7f0
 
 
 
abd3264
 
 
a9ed7f0
 
 
 
abd3264
 
 
a9ed7f0
 
 
 
 
abd3264
 
 
a9ed7f0
 
 
 
abd3264
 
 
a9ed7f0
 
 
 
abd3264
 
 
a9ed7f0
 
 
 
cdcda1e
 
 
a9ed7f0
 
 
 
 
abd3264
 
 
a9ed7f0
 
 
 
abd3264
 
 
a9ed7f0
 
 
 
cdcda1e
 
 
a9ed7f0
 
 
 
 
cdcda1e
 
 
a9ed7f0
 
 
 
 
 
cdcda1e
 
 
a9ed7f0
cdcda1e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a9ed7f0
abd3264
cdcda1e
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 3.0,
  "eval_steps": 500,
  "global_step": 3750,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.4,
      "grad_norm": 4.658291339874268,
      "learning_rate": 2.8703181864639013e-06,
      "loss": 0.64,
      "step": 500
    },
    {
      "epoch": 0.8,
      "grad_norm": 4.34930944442749,
      "learning_rate": 2.5036959095382875e-06,
      "loss": 0.6413,
      "step": 1000
    },
    {
      "epoch": 1.0,
      "eval_loss": 0.6420477032661438,
      "eval_runtime": 20.5136,
      "eval_samples_per_second": 97.497,
      "eval_steps_per_second": 12.187,
      "step": 1250
    },
    {
      "epoch": 1.2,
      "grad_norm": 5.983999252319336,
      "learning_rate": 1.963525491562421e-06,
      "loss": 0.4847,
      "step": 1500
    },
    {
      "epoch": 1.6,
      "grad_norm": 5.177482604980469,
      "learning_rate": 1.3432073050985201e-06,
      "loss": 0.3344,
      "step": 2000
    },
    {
      "epoch": 2.0,
      "grad_norm": 4.585903167724609,
      "learning_rate": 7.500000000000003e-07,
      "loss": 0.3318,
      "step": 2500
    },
    {
      "epoch": 2.0,
      "eval_loss": 0.7324458360671997,
      "eval_runtime": 20.5637,
      "eval_samples_per_second": 97.259,
      "eval_steps_per_second": 12.157,
      "step": 2500
    },
    {
      "epoch": 2.4,
      "grad_norm": 5.989898681640625,
      "learning_rate": 2.86474508437579e-07,
      "loss": 0.1551,
      "step": 3000
    },
    {
      "epoch": 2.8,
      "grad_norm": 5.329368591308594,
      "learning_rate": 3.277859889929147e-08,
      "loss": 0.1518,
      "step": 3500
    },
    {
      "epoch": 3.0,
      "eval_loss": 0.994577169418335,
      "eval_runtime": 20.6046,
      "eval_samples_per_second": 97.066,
      "eval_steps_per_second": 12.133,
      "step": 3750
    },
    {
      "epoch": 3.0,
      "step": 3750,
      "total_flos": 2.3924465493816115e+17,
      "train_loss": 0.37510518595377607,
      "train_runtime": 2372.5928,
      "train_samples_per_second": 12.644,
      "train_steps_per_second": 1.581
    }
  ],
  "logging_steps": 500,
  "max_steps": 3750,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 3,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": false,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 2.3924465493816115e+17,
  "train_batch_size": 1,
  "trial_name": null,
  "trial_params": null
}