Upload task output test1334test1234test1234test12334

Browse files

Files changed (13) hide show

README.md +12 -14
adapter_config.json +4 -4
adapter_model.safetensors +1 -1
checkpoint-1/README.md +2 -2
checkpoint-1/adapter_config.json +10 -5
checkpoint-1/adapter_model.safetensors +1 -1
checkpoint-1/chat_template.jinja +5 -0
checkpoint-1/optimizer.pt +1 -1
checkpoint-1/rng_state.pth +1 -1
checkpoint-1/scheduler.pt +1 -1
checkpoint-1/tokenizer_config.json +0 -1
checkpoint-1/trainer_state.json +6 -13
checkpoint-1/training_args.bin +2 -2

README.md CHANGED Viewed

@@ -38,7 +38,7 @@ deepspeed: null
 early_stopping_patience: null
 eval_max_new_tokens: 128
 eval_table_size: null
-evals_per_epoch: 4
 flash_attention: false
 fp16: null
 fsdp: null
@@ -58,7 +58,7 @@ lora_model_dir: null
 lora_r: 8
 lora_target_linear: true
 lr_scheduler: cosine
-max_steps: 10
 micro_batch_size: 2
 mlflow_experiment_name: /workspace/axolotl/data/test1334test1234test1234test12334_train_data.json
 model_type: AutoModelForCausalLM
@@ -69,7 +69,7 @@ pad_to_sequence_len: true
 resume_from_checkpoint: null
 s2_attention: null
 sample_packing: false
-saves_per_epoch: 4
 sequence_len: 512
 strict: false
 tf32: false
@@ -77,7 +77,13 @@ tokenizer_type: AutoTokenizer
 train_on_inputs: false
 trust_remote_code: true
 val_set_size: 0.05
-warmup_steps: 10
 weight_decay: 0.0
 xformers_attention: null
@@ -88,8 +94,6 @@ xformers_attention: null
 # workspace/axolotl/outputs/test1334test1234test1234test12334/texttest
 This model was trained from scratch on the None dataset.
-It achieves the following results on the evaluation set:
-- Loss: 0.9023
 ## Model description
@@ -116,17 +120,11 @@ The following hyperparameters were used during training:
 - total_train_batch_size: 8
 - optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
 - lr_scheduler_type: cosine
-- lr_scheduler_warmup_steps: 10
-- training_steps: 10
 ### Training results
-| Training Loss | Epoch  | Step | Validation Loss |
-|:-------------:|:------:|:----:|:---------------:|
-| No log        | 0      | 0    | 0.9052          |
-| 1.0164        | 0.0372 | 3    | 0.9057          |
-| 1.0848        | 0.0743 | 6    | 0.9046          |
-| 1.0387        | 0.1115 | 9    | 0.9023          |
 ### Framework versions

 early_stopping_patience: null
 eval_max_new_tokens: 128
 eval_table_size: null
+evals_per_epoch: 1
 flash_attention: false
 fp16: null
 fsdp: null
 lora_r: 8
 lora_target_linear: true
 lr_scheduler: cosine
+max_steps: 1
 micro_batch_size: 2
 mlflow_experiment_name: /workspace/axolotl/data/test1334test1234test1234test12334_train_data.json
 model_type: AutoModelForCausalLM
 resume_from_checkpoint: null
 s2_attention: null
 sample_packing: false
+saves_per_epoch: 1
 sequence_len: 512
 strict: false
 tf32: false
 train_on_inputs: false
 trust_remote_code: true
 val_set_size: 0.05
+wandb_entity: null
+wandb_mode: offline
+wandb_name: test1334test1234test1234test12334_texttest
+wandb_project: Gradients-On-Demand
+wandb_run: your_name
+wandb_runid: test1334test1234test1234test12334_texttest
+warmup_steps: 1
 weight_decay: 0.0
 xformers_attention: null
 # workspace/axolotl/outputs/test1334test1234test1234test12334/texttest
 This model was trained from scratch on the None dataset.
 ## Model description
 - total_train_batch_size: 8
 - optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
 - lr_scheduler_type: cosine
+- lr_scheduler_warmup_steps: 2
+- training_steps: 1
 ### Training results
 ### Framework versions

adapter_config.json CHANGED Viewed

@@ -25,12 +25,12 @@
   "revision": null,
   "target_modules": [
     "q_proj",
-    "v_proj",
-    "o_proj",
     "k_proj",
     "up_proj",
-    "gate_proj",
-    "down_proj"
   ],
   "task_type": "CAUSAL_LM",
   "trainable_token_indices": null,

   "revision": null,
   "target_modules": [
     "q_proj",
+    "gate_proj",
+    "down_proj",
     "k_proj",
     "up_proj",
+    "o_proj",
+    "v_proj"
   ],
   "task_type": "CAUSAL_LM",
   "trainable_token_indices": null,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fc9aeac2acbc2455a5f0737412114cef01add69564a125108760ac8956c75f9f
 size 22573704

 version https://git-lfs.github.com/spec/v1
+oid sha256:ce2e9c10572d78b4dfbc2a0ae3083c41f64ff61a0e9b50805149deb9ff0315d7
 size 22573704

checkpoint-1/README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-base_model: samoline/b7447218-27e6-491c-b3ab-ea03a5b93541
 library_name: peft
 ---
@@ -199,4 +199,4 @@ Carbon emissions can be estimated using the [Machine Learning Impact calculator]
 [More Information Needed]
 ### Framework versions
-- PEFT 0.13.2

 ---
+base_model: /cache/test1334test1234test1234test12334/models/samoline--b7447218-27e6-491c-b3ab-ea03a5b93541
 library_name: peft
 ---
 [More Information Needed]
 ### Framework versions
+- PEFT 0.15.2

checkpoint-1/adapter_config.json CHANGED Viewed

@@ -1,8 +1,11 @@
 {
   "alpha_pattern": {},
   "auto_mapping": null,
-  "base_model_name_or_path": "samoline/b7447218-27e6-491c-b3ab-ea03a5b93541",
   "bias": "none",
   "fan_in_fan_out": null,
   "inference_mode": true,
   "init_lora_weights": true,
@@ -11,6 +14,7 @@
   "layers_to_transform": null,
   "loftq_config": {},
   "lora_alpha": 16,
   "lora_dropout": 0.05,
   "megatron_config": null,
   "megatron_core": "megatron.core",
@@ -20,15 +24,16 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "gate_proj",
-    "up_proj",
-    "k_proj",
     "down_proj",
-    "v_proj",
     "o_proj",
-    "q_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,
   "use_rslora": false
 }

 {
   "alpha_pattern": {},
   "auto_mapping": null,
+  "base_model_name_or_path": "/cache/test1334test1234test1234test12334/models/samoline--b7447218-27e6-491c-b3ab-ea03a5b93541",
   "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
   "fan_in_fan_out": null,
   "inference_mode": true,
   "init_lora_weights": true,
   "layers_to_transform": null,
   "loftq_config": {},
   "lora_alpha": 16,
+  "lora_bias": false,
   "lora_dropout": 0.05,
   "megatron_config": null,
   "megatron_core": "megatron.core",
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "q_proj",
     "gate_proj",
     "down_proj",
+    "k_proj",
+    "up_proj",
     "o_proj",
+    "v_proj"
   ],
   "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
   "use_dora": false,
   "use_rslora": false
 }

checkpoint-1/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e43c34ff207fd3074b49d5fd4f002f67e3769e3658eb2a1b133c46e90ce21d94
 size 22573704

 version https://git-lfs.github.com/spec/v1
+oid sha256:ce2e9c10572d78b4dfbc2a0ae3083c41f64ff61a0e9b50805149deb9ff0315d7
 size 22573704

checkpoint-1/chat_template.jinja ADDED Viewed

	@@ -0,0 +1,5 @@

+{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>
+'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>
+' }}{% endif %}

checkpoint-1/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ce93b8d86fadd8e6454d9a6661cc2fce00a1502d154dc7d2eb74db3866c3925f
 size 11710970

 version https://git-lfs.github.com/spec/v1
+oid sha256:876bb53a025a237edcf5100f4fb0c41ac9d1e85c3f646614d528015a4ee9e484
 size 11710970

checkpoint-1/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7f6d4a6f3773b907ac1538b67c4fa544962eb5b1fc06aa0e7c5a313a547dc052
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:f97bb552d304801def1800311e9b4f0db4888641a7d6326f60a0099954973d2b
 size 14244

checkpoint-1/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dcd240061f71d888d805feed14123a68774f5e130888a3057bdaa388caa9028c
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:ebe0540ed6e94d693724d0fd61916b0efd9dc6adbac83c0b0a2d35823214a0a5
 size 1064

checkpoint-1/tokenizer_config.json CHANGED Viewed

@@ -2051,7 +2051,6 @@
     }
   },
   "bos_token": "<|begin_of_text|>",
-  "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
   "clean_up_tokenization_spaces": true,
   "eos_token": "<|end_of_text|>",
   "extra_special_tokens": {},

     }
   },
   "bos_token": "<|begin_of_text|>",
   "clean_up_tokenization_spaces": true,
   "eos_token": "<|end_of_text|>",
   "extra_special_tokens": {},

checkpoint-1/trainer_state.json CHANGED Viewed

@@ -1,7 +1,8 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.008421052631578947,
   "eval_steps": 500,
   "global_step": 1,
   "is_hyper_param_search": false,
@@ -9,18 +10,10 @@
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 0.008421052631578947,
-      "grad_norm": 0.42370617389678955,
       "learning_rate": 0.0,
-      "loss": 0.5794,
-      "step": 1
-    },
-    {
-      "epoch": 0.008421052631578947,
-      "eval_loss": 0.8261134624481201,
-      "eval_runtime": 1.1415,
-      "eval_samples_per_second": 43.801,
-      "eval_steps_per_second": 21.901,
       "step": 1
     }
   ],
@@ -41,7 +34,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 30068189429760.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
+  "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 8.17026839331672e-05,
   "eval_steps": 500,
   "global_step": 1,
   "is_hyper_param_search": false,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 8.17026839331672e-05,
+      "grad_norm": 0.5843780636787415,
       "learning_rate": 0.0,
+      "loss": 0.925,
       "step": 1
     }
   ],
       "attributes": {}
     }
   },
+  "total_flos": 24054551543808.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

checkpoint-1/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8065f5a864b80993ab1beab427b139b642e1c31a0a8f2d96defab61f080c3fbd
-size 6776

 version https://git-lfs.github.com/spec/v1
+oid sha256:169fcda599c2acc655ce38e05e7603c80e5959ce0cd31227facd2f596cd183b3
+size 7224