Spaces:
Runtime error
Runtime error
from transformers import Trainer, TrainingArguments, AutoModelForQuestionAnswering, AutoTokenizer | |
from datasets import load_dataset | |
# Load the model and tokenizer from the Hub | |
model = AutoModelForQuestionAnswering.from_pretrained("DeepSeek/DeepSeek-v3") | |
tokenizer = AutoTokenizer.from_pretrained("DeepSeek/DeepSeek-v3") | |
# Load your dataset | |
dataset = load_dataset("json", data_files={"train": "your_dataset_train.json", "test": "your_dataset_test.json"}) | |
# Tokenize the dataset | |
def tokenize_function(examples): | |
return tokenizer(examples['question'], examples['document'], truncation=True, padding=True) | |
tokenized_datasets = dataset.map(tokenize_function, batched=True) | |
# Set up the training arguments | |
training_args = TrainingArguments( | |
output_dir='./results', | |
evaluation_strategy="epoch", | |
learning_rate=2e-5, | |
per_device_train_batch_size=16, | |
per_device_eval_batch_size=16, | |
num_train_epochs=3, | |
weight_decay=0.01 | |
) | |
# Initialize the Trainer | |
trainer = Trainer( | |
model=model, | |
args=training_args, | |
train_dataset=tokenized_datasets['train'], | |
eval_dataset=tokenized_datasets['test'] | |
) | |
# Start fine-tuning | |
trainer.train() | |
# Save the model after fine-tuning | |
model.save_pretrained('./fine_tuned_deepseek') |