# /// script # dependencies = ["trl==0.11.4", "peft>=0.7.0", "trackio", "datasets", "transformers>=4.46.0", "accelerate", "bitsandbytes", "torch", "protobuf", "sentencepiece", "mistral-common>=1.5.0"] # /// import os import torch from datasets import load_dataset from peft import LoraConfig, TaskType from trl import SFTTrainer, SFTConfig from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig import trackio print("="*50) print("Starting Alizee Coder Devstral Training") print("="*50) # Configuration MODEL_NAME = "mistralai/Devstral-Small-2505" OUTPUT_REPO = "stmasson/alizee-coder-devstral-1-small" DATASET_SIZE = 10000 # Verify HF_TOKEN if not os.environ.get("HF_TOKEN"): raise ValueError("HF_TOKEN not set!") print("HF_TOKEN verified") print(f"Loading dataset nvidia/OpenCodeReasoning...") try: dataset = load_dataset("nvidia/OpenCodeReasoning", "split_0", split="split_0") dataset = dataset.shuffle(seed=42).select(range(min(DATASET_SIZE, len(dataset)))) print(f"Dataset loaded: {len(dataset)} examples") except Exception as e: print(f"Error loading dataset: {e}") raise # Split train/eval dataset_split = dataset.train_test_split(test_size=0.05, seed=42) train_dataset = dataset_split["train"] eval_dataset = dataset_split["test"] print(f"Train: {len(train_dataset)}, Eval: {len(eval_dataset)}") # Format for code reasoning def format_example(example): solution = example.get('solution', '') or '' output = example.get('output', '') or '' text = f"[INST] Solve this programming problem with detailed reasoning:\n\n{example['input']}\n[/INST]\n\n**Reasoning:**\n{output}\n\n**Solution:**\n```python\n{solution}\n```" return {"text": text} print("Formatting dataset...") train_dataset = train_dataset.map(format_example, remove_columns=train_dataset.column_names) eval_dataset = eval_dataset.map(format_example, remove_columns=eval_dataset.column_names) print("Dataset formatted") # Load tokenizer print(f"Loading tokenizer...") tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True) if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token print("Tokenizer loaded") # 4-bit quantization bnb_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16, bnb_4bit_use_double_quant=True, ) print(f"Loading model {MODEL_NAME}...") model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, quantization_config=bnb_config, device_map="auto", trust_remote_code=True, torch_dtype=torch.bfloat16, ) print("Model loaded") # LoRA configuration lora_config = LoraConfig( r=32, lora_alpha=64, target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"], lora_dropout=0.05, bias="none", task_type=TaskType.CAUSAL_LM, ) # Training config training_config = SFTConfig( output_dir="./alizee-coder-devstral-1-small", num_train_epochs=1, per_device_train_batch_size=1, per_device_eval_batch_size=1, gradient_accumulation_steps=16, gradient_checkpointing=True, learning_rate=2e-4, lr_scheduler_type="cosine", warmup_ratio=0.1, max_seq_length=4096, logging_steps=10, save_strategy="steps", save_steps=200, eval_strategy="steps", eval_steps=200, bf16=True, push_to_hub=True, hub_model_id=OUTPUT_REPO, hub_strategy="every_save", report_to="trackio", run_name="alizee-coder-devstral-1-small", ) print("Initializing trainer...") trainer = SFTTrainer( model=model, args=training_config, train_dataset=train_dataset, eval_dataset=eval_dataset, peft_config=lora_config, tokenizer=tokenizer, dataset_text_field="text", ) print("="*50) print("STARTING TRAINING") print("="*50) trainer.train() print("Pushing to Hub...") trainer.push_to_hub() print(f"Done! Model: https://huggingface.co/{OUTPUT_REPO}")