Upload config and log files

Files changed (6) hide show

all_results.json ADDED Viewed

+{
+    "epoch": 1.0,
+    "num_input_tokens_seen": 33437856,
+    "total_flos": 7.1914395644928e+16,
+    "train_loss": 0.7151971128082275,
+    "train_runtime": 36754.4929,
+    "train_samples_per_second": 2.721,
+    "train_steps_per_second": 0.17
+}

train_results.json ADDED Viewed

+{
+    "epoch": 1.0,
+    "num_input_tokens_seen": 33437856,
+    "total_flos": 7.1914395644928e+16,
+    "train_loss": 0.7151971128082275,
+    "train_runtime": 36754.4929,
+    "train_samples_per_second": 2.721,
+    "train_steps_per_second": 0.17
+}

trainer_log.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

trainer_state.json ADDED Viewed

The diff for this file is too large to render. See raw diff

training_args.yaml ADDED Viewed

+bf16: true
+cutoff_len: 1024
+dataset: mathinstruct
+dataset_dir: data
+ddp_timeout: 180000000
+do_train: true
+finetuning_type: lora
+flash_attn: auto
+gradient_accumulation_steps: 8
+include_num_input_tokens_seen: true
+learning_rate: 5.0e-05
+logging_steps: 5
+lora_alpha: 2
+lora_dropout: 0
+lora_rank: 1
+lora_target: all
+lr_scheduler_type: cosine
+max_grad_norm: 1.0
+max_samples: 100000
+model_name_or_path: D:/models/Qwen2.5-0.5B
+num_train_epochs: 1.0
+optim: adamw_torch
+output_dir: saves\Qwen2.5-0.5B\lora\train_2024-09-26-14-50-59
+packing: false
+per_device_train_batch_size: 2
+plot_loss: true
+preprocessing_num_workers: 16
+quantization_bit: 4
+quantization_method: bitsandbytes
+report_to: all
+save_steps: 100
+stage: sft
+template: qwen
+warmup_steps: 0

training_loss.png ADDED Viewed