error577 commited on
Commit
e6fca49
·
verified ·
1 Parent(s): 78f378b

End of training

Browse files
Files changed (3) hide show
  1. README.md +11 -11
  2. adapter_model.bin +1 -1
  3. adapter_model.safetensors +1 -1
README.md CHANGED
@@ -42,7 +42,7 @@ debug: null
42
  deepspeed: null
43
  early_stopping_patience: 3
44
  eval_max_new_tokens: 128
45
- eval_steps: 200
46
  eval_table_size: null
47
  evals_per_epoch: null
48
  flash_attention: true
@@ -70,7 +70,7 @@ lora_target_linear: true
70
  lr_scheduler: cosine
71
  max_grad_norm: 1.0
72
  max_steps: null
73
- micro_batch_size: 16
74
  mlflow_experiment_name: /tmp/1b5f3f0e9699035e_train_data.json
75
  model_type: AutoModelForCausalLM
76
  num_epochs: 10
@@ -80,7 +80,7 @@ pad_to_sequence_len: true
80
  resume_from_checkpoint: null
81
  s2_attention: null
82
  sample_packing: false
83
- save_steps: 200
84
  sequence_len: 256
85
  strict: false
86
  tf32: false
@@ -126,11 +126,11 @@ More information needed
126
 
127
  The following hyperparameters were used during training:
128
  - learning_rate: 0.0002
129
- - train_batch_size: 16
130
- - eval_batch_size: 16
131
  - seed: 42
132
  - gradient_accumulation_steps: 2
133
- - total_train_batch_size: 32
134
  - optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
135
  - lr_scheduler_type: cosine
136
  - lr_scheduler_warmup_steps: 30
@@ -140,11 +140,11 @@ The following hyperparameters were used during training:
140
 
141
  | Training Loss | Epoch | Step | Validation Loss |
142
  |:-------------:|:------:|:----:|:---------------:|
143
- | 23.0 | 0.0012 | 1 | 11.5 |
144
- | 23.0 | 0.2401 | 200 | 11.5 |
145
- | 23.0 | 0.4802 | 400 | 11.5 |
146
- | 23.0 | 0.7203 | 600 | 11.5 |
147
- | 23.0 | 0.9604 | 800 | 11.5 |
148
 
149
 
150
  ### Framework versions
 
42
  deepspeed: null
43
  early_stopping_patience: 3
44
  eval_max_new_tokens: 128
45
+ eval_steps: 1000
46
  eval_table_size: null
47
  evals_per_epoch: null
48
  flash_attention: true
 
70
  lr_scheduler: cosine
71
  max_grad_norm: 1.0
72
  max_steps: null
73
+ micro_batch_size: 8
74
  mlflow_experiment_name: /tmp/1b5f3f0e9699035e_train_data.json
75
  model_type: AutoModelForCausalLM
76
  num_epochs: 10
 
80
  resume_from_checkpoint: null
81
  s2_attention: null
82
  sample_packing: false
83
+ save_steps: 1000
84
  sequence_len: 256
85
  strict: false
86
  tf32: false
 
126
 
127
  The following hyperparameters were used during training:
128
  - learning_rate: 0.0002
129
+ - train_batch_size: 8
130
+ - eval_batch_size: 8
131
  - seed: 42
132
  - gradient_accumulation_steps: 2
133
+ - total_train_batch_size: 16
134
  - optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
135
  - lr_scheduler_type: cosine
136
  - lr_scheduler_warmup_steps: 30
 
140
 
141
  | Training Loss | Epoch | Step | Validation Loss |
142
  |:-------------:|:------:|:----:|:---------------:|
143
+ | 23.0 | 0.0006 | 1 | 11.5 |
144
+ | 23.0 | 0.6002 | 1000 | 11.5 |
145
+ | 23.0 | 1.2005 | 2000 | 11.5 |
146
+ | 23.0 | 1.8007 | 3000 | 11.5 |
147
+ | 23.0 | 2.4010 | 4000 | 11.5 |
148
 
149
 
150
  ### Framework versions
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f8a98a50fb4e7405dd2d3203df6ffd2acbb804cd11be619f8b59e25d5489c3a
3
  size 21458
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b57cd978fdd27561e08d2fc05cfe493b5341249c23a6da566b3ed6c181d6971
3
  size 21458
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5419ebdccf6a34eba56a6782ea49dca7b7e2fa87af59eb0e58d04fe2bab99324
3
  size 18064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e411c36ceecfaefc31ff0641b0e21bf9eb227b0fa3bba226dc5a55fcd0a06915
3
  size 18064