diagonalge commited on
Commit
654097c
·
verified ·
1 Parent(s): e334eae

Upload task output test1334test1234test1234test12334

Browse files
README.md CHANGED
@@ -38,7 +38,7 @@ deepspeed: null
38
  early_stopping_patience: null
39
  eval_max_new_tokens: 128
40
  eval_table_size: null
41
- evals_per_epoch: 4
42
  flash_attention: false
43
  fp16: null
44
  fsdp: null
@@ -58,7 +58,7 @@ lora_model_dir: null
58
  lora_r: 8
59
  lora_target_linear: true
60
  lr_scheduler: cosine
61
- max_steps: 10
62
  micro_batch_size: 2
63
  mlflow_experiment_name: /workspace/axolotl/data/test1334test1234test1234test12334_train_data.json
64
  model_type: AutoModelForCausalLM
@@ -69,7 +69,7 @@ pad_to_sequence_len: true
69
  resume_from_checkpoint: null
70
  s2_attention: null
71
  sample_packing: false
72
- saves_per_epoch: 4
73
  sequence_len: 512
74
  strict: false
75
  tf32: false
@@ -77,7 +77,13 @@ tokenizer_type: AutoTokenizer
77
  train_on_inputs: false
78
  trust_remote_code: true
79
  val_set_size: 0.05
80
- warmup_steps: 10
 
 
 
 
 
 
81
  weight_decay: 0.0
82
  xformers_attention: null
83
 
@@ -88,8 +94,6 @@ xformers_attention: null
88
  # workspace/axolotl/outputs/test1334test1234test1234test12334/texttest
89
 
90
  This model was trained from scratch on the None dataset.
91
- It achieves the following results on the evaluation set:
92
- - Loss: 0.9023
93
 
94
  ## Model description
95
 
@@ -116,17 +120,11 @@ The following hyperparameters were used during training:
116
  - total_train_batch_size: 8
117
  - optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
118
  - lr_scheduler_type: cosine
119
- - lr_scheduler_warmup_steps: 10
120
- - training_steps: 10
121
 
122
  ### Training results
123
 
124
- | Training Loss | Epoch | Step | Validation Loss |
125
- |:-------------:|:------:|:----:|:---------------:|
126
- | No log | 0 | 0 | 0.9052 |
127
- | 1.0164 | 0.0372 | 3 | 0.9057 |
128
- | 1.0848 | 0.0743 | 6 | 0.9046 |
129
- | 1.0387 | 0.1115 | 9 | 0.9023 |
130
 
131
 
132
  ### Framework versions
 
38
  early_stopping_patience: null
39
  eval_max_new_tokens: 128
40
  eval_table_size: null
41
+ evals_per_epoch: 1
42
  flash_attention: false
43
  fp16: null
44
  fsdp: null
 
58
  lora_r: 8
59
  lora_target_linear: true
60
  lr_scheduler: cosine
61
+ max_steps: 1
62
  micro_batch_size: 2
63
  mlflow_experiment_name: /workspace/axolotl/data/test1334test1234test1234test12334_train_data.json
64
  model_type: AutoModelForCausalLM
 
69
  resume_from_checkpoint: null
70
  s2_attention: null
71
  sample_packing: false
72
+ saves_per_epoch: 1
73
  sequence_len: 512
74
  strict: false
75
  tf32: false
 
77
  train_on_inputs: false
78
  trust_remote_code: true
79
  val_set_size: 0.05
80
+ wandb_entity: null
81
+ wandb_mode: offline
82
+ wandb_name: test1334test1234test1234test12334_texttest
83
+ wandb_project: Gradients-On-Demand
84
+ wandb_run: your_name
85
+ wandb_runid: test1334test1234test1234test12334_texttest
86
+ warmup_steps: 1
87
  weight_decay: 0.0
88
  xformers_attention: null
89
 
 
94
  # workspace/axolotl/outputs/test1334test1234test1234test12334/texttest
95
 
96
  This model was trained from scratch on the None dataset.
 
 
97
 
98
  ## Model description
99
 
 
120
  - total_train_batch_size: 8
121
  - optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
122
  - lr_scheduler_type: cosine
123
+ - lr_scheduler_warmup_steps: 2
124
+ - training_steps: 1
125
 
126
  ### Training results
127
 
 
 
 
 
 
 
128
 
129
 
130
  ### Framework versions
adapter_config.json CHANGED
@@ -25,12 +25,12 @@
25
  "revision": null,
26
  "target_modules": [
27
  "q_proj",
28
- "v_proj",
29
- "o_proj",
30
  "k_proj",
31
  "up_proj",
32
- "gate_proj",
33
- "down_proj"
34
  ],
35
  "task_type": "CAUSAL_LM",
36
  "trainable_token_indices": null,
 
25
  "revision": null,
26
  "target_modules": [
27
  "q_proj",
28
+ "gate_proj",
29
+ "down_proj",
30
  "k_proj",
31
  "up_proj",
32
+ "o_proj",
33
+ "v_proj"
34
  ],
35
  "task_type": "CAUSAL_LM",
36
  "trainable_token_indices": null,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fc9aeac2acbc2455a5f0737412114cef01add69564a125108760ac8956c75f9f
3
  size 22573704
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce2e9c10572d78b4dfbc2a0ae3083c41f64ff61a0e9b50805149deb9ff0315d7
3
  size 22573704
checkpoint-1/README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- base_model: samoline/b7447218-27e6-491c-b3ab-ea03a5b93541
3
  library_name: peft
4
  ---
5
 
@@ -199,4 +199,4 @@ Carbon emissions can be estimated using the [Machine Learning Impact calculator]
199
  [More Information Needed]
200
  ### Framework versions
201
 
202
- - PEFT 0.13.2
 
1
  ---
2
+ base_model: /cache/test1334test1234test1234test12334/models/samoline--b7447218-27e6-491c-b3ab-ea03a5b93541
3
  library_name: peft
4
  ---
5
 
 
199
  [More Information Needed]
200
  ### Framework versions
201
 
202
+ - PEFT 0.15.2
checkpoint-1/adapter_config.json CHANGED
@@ -1,8 +1,11 @@
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
- "base_model_name_or_path": "samoline/b7447218-27e6-491c-b3ab-ea03a5b93541",
5
  "bias": "none",
 
 
 
6
  "fan_in_fan_out": null,
7
  "inference_mode": true,
8
  "init_lora_weights": true,
@@ -11,6 +14,7 @@
11
  "layers_to_transform": null,
12
  "loftq_config": {},
13
  "lora_alpha": 16,
 
14
  "lora_dropout": 0.05,
15
  "megatron_config": null,
16
  "megatron_core": "megatron.core",
@@ -20,15 +24,16 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
23
  "gate_proj",
24
- "up_proj",
25
- "k_proj",
26
  "down_proj",
27
- "v_proj",
 
28
  "o_proj",
29
- "q_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
 
32
  "use_dora": false,
33
  "use_rslora": false
34
  }
 
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
+ "base_model_name_or_path": "/cache/test1334test1234test1234test12334/models/samoline--b7447218-27e6-491c-b3ab-ea03a5b93541",
5
  "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
  "fan_in_fan_out": null,
10
  "inference_mode": true,
11
  "init_lora_weights": true,
 
14
  "layers_to_transform": null,
15
  "loftq_config": {},
16
  "lora_alpha": 16,
17
+ "lora_bias": false,
18
  "lora_dropout": 0.05,
19
  "megatron_config": null,
20
  "megatron_core": "megatron.core",
 
24
  "rank_pattern": {},
25
  "revision": null,
26
  "target_modules": [
27
+ "q_proj",
28
  "gate_proj",
 
 
29
  "down_proj",
30
+ "k_proj",
31
+ "up_proj",
32
  "o_proj",
33
+ "v_proj"
34
  ],
35
  "task_type": "CAUSAL_LM",
36
+ "trainable_token_indices": null,
37
  "use_dora": false,
38
  "use_rslora": false
39
  }
checkpoint-1/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e43c34ff207fd3074b49d5fd4f002f67e3769e3658eb2a1b133c46e90ce21d94
3
  size 22573704
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce2e9c10572d78b4dfbc2a0ae3083c41f64ff61a0e9b50805149deb9ff0315d7
3
  size 22573704
checkpoint-1/chat_template.jinja ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>
2
+
3
+ '+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>
4
+
5
+ ' }}{% endif %}
checkpoint-1/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce93b8d86fadd8e6454d9a6661cc2fce00a1502d154dc7d2eb74db3866c3925f
3
  size 11710970
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:876bb53a025a237edcf5100f4fb0c41ac9d1e85c3f646614d528015a4ee9e484
3
  size 11710970
checkpoint-1/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f6d4a6f3773b907ac1538b67c4fa544962eb5b1fc06aa0e7c5a313a547dc052
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f97bb552d304801def1800311e9b4f0db4888641a7d6326f60a0099954973d2b
3
  size 14244
checkpoint-1/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dcd240061f71d888d805feed14123a68774f5e130888a3057bdaa388caa9028c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebe0540ed6e94d693724d0fd61916b0efd9dc6adbac83c0b0a2d35823214a0a5
3
  size 1064
checkpoint-1/tokenizer_config.json CHANGED
@@ -2051,7 +2051,6 @@
2051
  }
2052
  },
2053
  "bos_token": "<|begin_of_text|>",
2054
- "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
2055
  "clean_up_tokenization_spaces": true,
2056
  "eos_token": "<|end_of_text|>",
2057
  "extra_special_tokens": {},
 
2051
  }
2052
  },
2053
  "bos_token": "<|begin_of_text|>",
 
2054
  "clean_up_tokenization_spaces": true,
2055
  "eos_token": "<|end_of_text|>",
2056
  "extra_special_tokens": {},
checkpoint-1/trainer_state.json CHANGED
@@ -1,7 +1,8 @@
1
  {
 
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.008421052631578947,
5
  "eval_steps": 500,
6
  "global_step": 1,
7
  "is_hyper_param_search": false,
@@ -9,18 +10,10 @@
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.008421052631578947,
13
- "grad_norm": 0.42370617389678955,
14
  "learning_rate": 0.0,
15
- "loss": 0.5794,
16
- "step": 1
17
- },
18
- {
19
- "epoch": 0.008421052631578947,
20
- "eval_loss": 0.8261134624481201,
21
- "eval_runtime": 1.1415,
22
- "eval_samples_per_second": 43.801,
23
- "eval_steps_per_second": 21.901,
24
  "step": 1
25
  }
26
  ],
@@ -41,7 +34,7 @@
41
  "attributes": {}
42
  }
43
  },
44
- "total_flos": 30068189429760.0,
45
  "train_batch_size": 2,
46
  "trial_name": null,
47
  "trial_params": null
 
1
  {
2
+ "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 8.17026839331672e-05,
6
  "eval_steps": 500,
7
  "global_step": 1,
8
  "is_hyper_param_search": false,
 
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
+ "epoch": 8.17026839331672e-05,
14
+ "grad_norm": 0.5843780636787415,
15
  "learning_rate": 0.0,
16
+ "loss": 0.925,
 
 
 
 
 
 
 
 
17
  "step": 1
18
  }
19
  ],
 
34
  "attributes": {}
35
  }
36
  },
37
+ "total_flos": 24054551543808.0,
38
  "train_batch_size": 2,
39
  "trial_name": null,
40
  "trial_params": null
checkpoint-1/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8065f5a864b80993ab1beab427b139b642e1c31a0a8f2d96defab61f080c3fbd
3
- size 6776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:169fcda599c2acc655ce38e05e7603c80e5959ce0cd31227facd2f596cd183b3
3
+ size 7224