putazon commited on
Commit
8554000
·
verified ·
1 Parent(s): daa6400

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +143 -3
README.md CHANGED
@@ -1,3 +1,143 @@
1
- ---
2
- license: cc-by-nc-4.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: cc-by-nc-4.0
3
+ base_model:
4
+ - unsloth/llama-3-8b-bnb-4bit
5
+ pipeline_tag: text-generation
6
+ tags:
7
+ - conversational
8
+ - llama
9
+ - ollama
10
+ - unsloth
11
+ - gguf
12
+ - 8b
13
+ ---
14
+
15
+ # Training Process
16
+ ## Model + LoRA Loading
17
+ ```python
18
+ from unsloth import FastLanguageModel
19
+ import torch
20
+ max_seq_length = 2048
21
+ dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
22
+ load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.
23
+
24
+ model, tokenizer = FastLanguageModel.from_pretrained(
25
+ model_name = "unsloth/llama-3-8b-bnb-4bit",
26
+ max_seq_length = max_seq_length,
27
+ dtype = dtype,
28
+ load_in_4bit = load_in_4bit,
29
+ # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
30
+ )
31
+
32
+ model = FastLanguageModel.get_peft_model(
33
+ model,
34
+ r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
35
+ target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
36
+ "gate_proj", "up_proj", "down_proj",],
37
+ lora_alpha = 16,
38
+ lora_dropout = 0, # Supports any, but = 0 is optimized
39
+ bias = "none", # Supports any, but = "none" is optimized
40
+ use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
41
+ random_state = 3407,
42
+ use_rslora = False, # We support rank stabilized LoRA
43
+ loftq_config = None, # And LoftQ
44
+ )
45
+ ```
46
+
47
+ ## Dataset Preparation
48
+ ```python
49
+ from datasets import load_dataset
50
+ dataset = load_dataset(
51
+ "csv",
52
+ data_files = "/content/synth_data.csv",
53
+ split = "train",
54
+ )
55
+
56
+ from unsloth import to_sharegpt
57
+ dataset = to_sharegpt(
58
+ dataset,
59
+ merged_prompt = "Labels: {available_entities}\n\nText: {text}\n",
60
+ conversation_extension = 5, # Randomnly combines conversations into 1
61
+ output_column_name = "label",
62
+ )
63
+
64
+ from unsloth import standardize_sharegpt
65
+ dataset = standardize_sharegpt(dataset)
66
+
67
+ chat_template = """{SYSTEM}
68
+ USER: {INPUT}
69
+ ASSISTANT: {OUTPUT}"""
70
+
71
+ from unsloth import apply_chat_template
72
+ dataset = apply_chat_template(
73
+ dataset,
74
+ tokenizer = tokenizer,
75
+ chat_template = chat_template,
76
+ default_system_message = "NER Task: Label the text based on the available Labels."
77
+ )
78
+ ```
79
+
80
+ ## Training Configuration
81
+ ```python
82
+ from trl import SFTTrainer
83
+ from transformers import TrainingArguments
84
+ from unsloth import is_bfloat16_supported
85
+
86
+ trainer = SFTTrainer(
87
+ model = model,
88
+ tokenizer = tokenizer,
89
+ train_dataset = dataset,
90
+ dataset_text_field = "text",
91
+ max_seq_length = max_seq_length,
92
+ dataset_num_proc = 2,
93
+ packing = True, # Can make training 5x faster for short sequences.
94
+ args = TrainingArguments(
95
+ per_device_train_batch_size = 2,
96
+ gradient_accumulation_steps = 4,
97
+ warmup_steps = 5,
98
+ # max_steps = None,
99
+ num_train_epochs = 1,
100
+ learning_rate = 2e-4,
101
+ fp16 = not is_bfloat16_supported(),
102
+ bf16 = is_bfloat16_supported(),
103
+ logging_steps = 1,
104
+ optim = "adamw_8bit",
105
+ weight_decay = 0.01,
106
+ lr_scheduler_type = "linear",
107
+ seed = 3407,
108
+ output_dir = "outputs",
109
+ ),
110
+ )
111
+
112
+ trainer_stats = trainer.train()
113
+
114
+ # Save to 8bit Q8_0
115
+ if False: model.save_pretrained_gguf("model", tokenizer,)
116
+ ```
117
+
118
+ ## Training Results
119
+ ![image/png](https://cdn-uploads.huggingface.co/production/uploads/678fa336b0c8266724597a95/hcRhMrr2lC5f2b9enD_1z.png)
120
+ - Steps Trained: 26
121
+ - Final Loss: 0.1870
122
+ - Total Time: 21:04 min
123
+ - Full epoch had been 261 steps
124
+
125
+ # Sample Inference
126
+ ```python
127
+ FastLanguageModel.for_inference(model) # Enable native 2x faster inference
128
+ messages = [ # Change below!
129
+ {"role": "user", "content": 'Labels: ATTR, CITY, CITY_PART, COUNTRY, O, ORG, PER, PHONE, REGION, REL, STREET, WORK_P, WORK_S\n\n'\
130
+ 'Text: "doctors in berlin"'},
131
+ ]
132
+ input_ids = tokenizer.apply_chat_template(
133
+ messages,
134
+ add_generation_prompt = True,
135
+ return_tensors = "pt",
136
+ ).to("cuda")
137
+
138
+ from transformers import TextStreamer
139
+ text_streamer = TextStreamer(tokenizer, skip_prompt = True)
140
+ _ = model.generate(input_ids, streamer = text_streamer, max_new_tokens = 128, pad_token_id = tokenizer.eos_token_id)
141
+ ```
142
+
143
+