Mollel commited on
Commit
9692131
·
verified ·
1 Parent(s): c22a152

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +29 -0
README.md ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - sw
4
+ ---
5
+
6
+ alpaca_prompt = """Hapo chini kuna maelezo ya kazi, pamoja na maelezo ya ziada yanayotoa muktadha zaidi. Andika jibu ambalo linakamilisha ombi hilo ipasavyo.
7
+
8
+ ### Maelezo:
9
+ {}
10
+
11
+ ### Ziada:
12
+ {}
13
+
14
+ ### Jibu:
15
+ {}"""
16
+ EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
17
+ def formatting_prompts_func(examples):
18
+ instructions = examples["instruction"]
19
+ inputs = examples["input"]
20
+ outputs = examples["output"]
21
+ texts = []
22
+ for instruction, input, output in zip(instructions, inputs, outputs):
23
+ # Must add EOS_TOKEN, otherwise your generation will go on forever!
24
+ text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
25
+ texts.append(text)
26
+ return { "text" : texts, }
27
+ pass
28
+
29
+ from datasets import load_dataset