satyanshu404 commited on
Commit
18227aa
1 Parent(s): 11e929e

Model save

Browse files
README.md ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: Phi-3-mini-4k-instruct
3
+ library_name: peft
4
+ tags:
5
+ - trl
6
+ - sft
7
+ - generated_from_trainer
8
+ model-index:
9
+ - name: Phi-3-mini-4k-instruct-8bit-LMSYS-finetuned-v03
10
+ results: []
11
+ ---
12
+
13
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
+ should probably proofread and complete it, then remove this comment. -->
15
+
16
+ # Phi-3-mini-4k-instruct-8bit-LMSYS-finetuned-v03
17
+
18
+ This model was trained from scratch on an unknown dataset.
19
+ It achieves the following results on the evaluation set:
20
+ - Loss: 0.4867
21
+
22
+ ## Model description
23
+
24
+ More information needed
25
+
26
+ ## Intended uses & limitations
27
+
28
+ More information needed
29
+
30
+ ## Training and evaluation data
31
+
32
+ More information needed
33
+
34
+ ## Training procedure
35
+
36
+ ### Training hyperparameters
37
+
38
+ The following hyperparameters were used during training:
39
+ - learning_rate: 5e-07
40
+ - train_batch_size: 1
41
+ - eval_batch_size: 1
42
+ - seed: 0
43
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
44
+ - lr_scheduler_type: cosine
45
+ - lr_scheduler_warmup_ratio: 0.2
46
+ - num_epochs: 3
47
+
48
+ ### Training results
49
+
50
+
51
+
52
+ ### Framework versions
53
+
54
+ - PEFT 0.10.0
55
+ - Transformers 4.43.3
56
+ - Pytorch 2.2.2+cu121
57
+ - Datasets 2.20.0
58
+ - Tokenizers 0.19.1
all_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_loss": 0.4867475926876068,
4
+ "eval_runtime": 138.0836,
5
+ "eval_samples": 558,
6
+ "eval_samples_per_second": 4.041,
7
+ "eval_steps_per_second": 4.041,
8
+ "total_flos": 1.3318012341259592e+18,
9
+ "train_loss": 0.5598785406739636,
10
+ "train_runtime": 54928.4731,
11
+ "train_samples_per_second": 0.984,
12
+ "train_steps_per_second": 0.984
13
+ }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_loss": 0.4867475926876068,
4
+ "eval_runtime": 138.0836,
5
+ "eval_samples": 558,
6
+ "eval_samples_per_second": 4.041,
7
+ "eval_steps_per_second": 4.041
8
+ }
tokenizer_config.json CHANGED
@@ -123,7 +123,7 @@
123
  "legacy": false,
124
  "model_max_length": 4096,
125
  "pad_token": "<unk>",
126
- "padding_side": "right",
127
  "sp_model_kwargs": {},
128
  "tokenizer_class": "LlamaTokenizer",
129
  "unk_token": "<unk>",
 
123
  "legacy": false,
124
  "model_max_length": 4096,
125
  "pad_token": "<unk>",
126
+ "padding_side": "left",
127
  "sp_model_kwargs": {},
128
  "tokenizer_class": "LlamaTokenizer",
129
  "unk_token": "<unk>",
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "total_flos": 1.3318012341259592e+18,
4
+ "train_loss": 0.5598785406739636,
5
+ "train_runtime": 54928.4731,
6
+ "train_samples_per_second": 0.984,
7
+ "train_steps_per_second": 0.984
8
+ }
trainer_state.json ADDED
The diff for this file is too large to render. See raw diff