from transformers import Trainer, TrainingArguments, DeepSeekForQuestionAnswering, DeepSeekTokenizer from datasets import load_dataset # Load the DeepSeek model and tokenizer model = DeepSeekForQuestionAnswering.from_pretrained("DeepSeek/DeepSeek-v3") tokenizer = DeepSeekTokenizer.from_pretrained("DeepSeek/DeepSeek-v3") # Load dataset dataset = load_dataset("json", data_files={"train": "your_dataset_train.json", "test": "your_dataset_test.json"}) # Tokenize the dataset def tokenize_function(examples): return tokenizer(examples['question'], examples['document'], truncation=True, padding=True) tokenized_datasets = dataset.map(tokenize_function, batched=True) # Set up the training arguments training_args = TrainingArguments( output_dir='./results', evaluation_strategy="epoch", learning_rate=2e-5, per_device_train_batch_size=16, per_device_eval_batch_size=16, num_train_epochs=3, weight_decay=0.01 ) # Initialize Trainer trainer = Trainer( model=model, args=training_args, train_dataset=tokenized_datasets['train'], eval_dataset=tokenized_datasets['test'] ) # Start the fine-tuning trainer.train() # Save the model after fine-tuning model.save_pretrained('./fine_tuned_deepseek')