|
--- |
|
language: |
|
- sw |
|
--- |
|
|
|
```python |
|
alpaca_prompt = """Hapo chini kuna maelezo ya kazi, pamoja na maelezo ya ziada yanayotoa muktadha zaidi. Andika jibu ambalo linakamilisha ombi hilo ipasavyo. |
|
|
|
### Maelezo: |
|
{} |
|
|
|
### Ziada: |
|
{} |
|
|
|
### Jibu: |
|
{}""" |
|
EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN |
|
def formatting_prompts_func(examples): |
|
instructions = examples["instruction"] |
|
inputs = examples["input"] |
|
outputs = examples["output"] |
|
texts = [] |
|
for instruction, input, output in zip(instructions, inputs, outputs): |
|
# Must add EOS_TOKEN, otherwise your generation will go on forever! |
|
text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN |
|
texts.append(text) |
|
return { "text" : texts, } |
|
pass |
|
|
|
from datasets import load_dataset |
|
|
|
``` |