sravanthib commited on
Commit
21369fe
·
verified ·
1 Parent(s): 4f25d14

Training completed

Browse files
Files changed (4) hide show
  1. README.md +3 -3
  2. all_results.json +6 -6
  3. train_results.json +6 -6
  4. trainer_state.json +38 -17
README.md CHANGED
@@ -1,7 +1,7 @@
1
  ---
2
  library_name: peft
3
  license: apache-2.0
4
- base_model: Qwen/Qwen2.5-32B
5
  tags:
6
  - generated_from_trainer
7
  model-index:
@@ -14,7 +14,7 @@ should probably proofread and complete it, then remove this comment. -->
14
 
15
  # refactored-code-llama-3-2-3b
16
 
17
- This model is a fine-tuned version of [Qwen/Qwen2.5-32B](https://huggingface.co/Qwen/Qwen2.5-32B) on an unknown dataset.
18
 
19
  ## Model description
20
 
@@ -45,7 +45,7 @@ The following hyperparameters were used during training:
45
  - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
46
  - lr_scheduler_type: cosine
47
  - lr_scheduler_warmup_ratio: 0.05
48
- - training_steps: 20
49
 
50
  ### Training results
51
 
 
1
  ---
2
  library_name: peft
3
  license: apache-2.0
4
+ base_model: Qwen/Qwen2.5-7B-Instruct
5
  tags:
6
  - generated_from_trainer
7
  model-index:
 
14
 
15
  # refactored-code-llama-3-2-3b
16
 
17
+ This model is a fine-tuned version of [Qwen/Qwen2.5-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct) on an unknown dataset.
18
 
19
  ## Model description
20
 
 
45
  - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
46
  - lr_scheduler_type: cosine
47
  - lr_scheduler_warmup_ratio: 0.05
48
+ - training_steps: 50
49
 
50
  ### Training results
51
 
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 2.9523809523809526,
3
- "total_flos": 1.1721276362818847e+18,
4
- "train_loss": 1.1126826643943786,
5
- "train_runtime": 855.5742,
6
- "train_samples_per_second": 3.74,
7
- "train_steps_per_second": 0.023
8
  }
 
1
  {
2
+ "epoch": 7.158730158730159,
3
+ "total_flos": 6.287430713700516e+17,
4
+ "train_loss": 0.912325621843338,
5
+ "train_runtime": 619.2374,
6
+ "train_samples_per_second": 12.919,
7
+ "train_steps_per_second": 0.081
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 2.9523809523809526,
3
- "total_flos": 1.1721276362818847e+18,
4
- "train_loss": 1.1126826643943786,
5
- "train_runtime": 855.5742,
6
- "train_samples_per_second": 3.74,
7
- "train_steps_per_second": 0.023
8
  }
 
1
  {
2
+ "epoch": 7.158730158730159,
3
+ "total_flos": 6.287430713700516e+17,
4
+ "train_loss": 0.912325621843338,
5
+ "train_runtime": 619.2374,
6
+ "train_samples_per_second": 12.919,
7
+ "train_steps_per_second": 0.081
8
  }
trainer_state.json CHANGED
@@ -2,42 +2,63 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 2.9523809523809526,
6
  "eval_steps": 0,
7
- "global_step": 20,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
  "epoch": 1.4761904761904763,
14
- "grad_norm": 0.09666553139686584,
15
  "learning_rate": 0.0001,
16
- "loss": 2.2041,
17
  "step": 10
18
  },
19
  {
20
  "epoch": 2.9523809523809526,
21
- "grad_norm": 0.06255044788122177,
22
  "learning_rate": 0.0001,
23
- "loss": 0.0213,
24
  "step": 20
25
  },
26
  {
27
- "epoch": 2.9523809523809526,
28
- "step": 20,
29
- "total_flos": 1.1721276362818847e+18,
30
- "train_loss": 1.1126826643943786,
31
- "train_runtime": 855.5742,
32
- "train_samples_per_second": 3.74,
33
- "train_steps_per_second": 0.023
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  }
35
  ],
36
  "logging_steps": 10,
37
- "max_steps": 20,
38
  "num_input_tokens_seen": 0,
39
- "num_train_epochs": 4,
40
- "save_steps": 20,
41
  "stateful_callbacks": {
42
  "TrainerControl": {
43
  "args": {
@@ -50,7 +71,7 @@
50
  "attributes": {}
51
  }
52
  },
53
- "total_flos": 1.1721276362818847e+18,
54
  "train_batch_size": 2,
55
  "trial_name": null,
56
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 7.158730158730159,
6
  "eval_steps": 0,
7
+ "global_step": 50,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
  "epoch": 1.4761904761904763,
14
+ "grad_norm": 0.34064996242523193,
15
  "learning_rate": 0.0001,
16
+ "loss": 4.3632,
17
  "step": 10
18
  },
19
  {
20
  "epoch": 2.9523809523809526,
21
+ "grad_norm": 0.1575096845626831,
22
  "learning_rate": 0.0001,
23
+ "loss": 0.0554,
24
  "step": 20
25
  },
26
  {
27
+ "epoch": 4.317460317460317,
28
+ "grad_norm": 0.16478388011455536,
29
+ "learning_rate": 0.0001,
30
+ "loss": 0.0546,
31
+ "step": 30
32
+ },
33
+ {
34
+ "epoch": 5.7936507936507935,
35
+ "grad_norm": 0.140571728348732,
36
+ "learning_rate": 0.0001,
37
+ "loss": 0.0479,
38
+ "step": 40
39
+ },
40
+ {
41
+ "epoch": 7.158730158730159,
42
+ "grad_norm": 0.1423598974943161,
43
+ "learning_rate": 0.0001,
44
+ "loss": 0.0405,
45
+ "step": 50
46
+ },
47
+ {
48
+ "epoch": 7.158730158730159,
49
+ "step": 50,
50
+ "total_flos": 6.287430713700516e+17,
51
+ "train_loss": 0.912325621843338,
52
+ "train_runtime": 619.2374,
53
+ "train_samples_per_second": 12.919,
54
+ "train_steps_per_second": 0.081
55
  }
56
  ],
57
  "logging_steps": 10,
58
+ "max_steps": 50,
59
  "num_input_tokens_seen": 0,
60
+ "num_train_epochs": 9,
61
+ "save_steps": 50,
62
  "stateful_callbacks": {
63
  "TrainerControl": {
64
  "args": {
 
71
  "attributes": {}
72
  }
73
  },
74
+ "total_flos": 6.287430713700516e+17,
75
  "train_batch_size": 2,
76
  "trial_name": null,
77
  "trial_params": null