Training process: SFT > DPO > KTO
SFT with RP/ERP, Stories and in character assistant data.
DPO focused on reducing repetition, misgendered characters and slop.
KTO focused on further reducing repetition and slop.
>
Axolotl configs
Not optimized for cost / performance efficiency, YMMV.
SFT 1*H100
# ====================
# MODEL CONFIGURATION
# ====================
base_model: ConicCat/Mistral-Small-3.2-AntiRep-24B
model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer
chat_template: mistral_v7_tekken
# ====================
# DATASET CONFIGURATION
# ====================
datasets:
- path: ./dataset.jsonl
type: chat_template
split: train
chat_template_strategy: tokenizer
field_messages: messages
message_property_mappings:
role: role
content: content
roles:
user: ["user"]
assistant: ["assistant"]
system: ["system"]
dataset_prepared_path:
train_on_inputs: false # Only train on assistant responses
# ====================
# QLORA CONFIGURATION
# ====================
adapter: qlora
load_in_4bit: true
lora_r: 128
lora_alpha: 128
lora_dropout: 0.1
lora_target_linear: true
# lora_modules_to_save: # Uncomment only if you added NEW tokens
# ====================
# TRAINING PARAMETERS
# ====================
num_epochs: 3
micro_batch_size: 8
gradient_accumulation_steps: 1
learning_rate: 1e-5
optimizer: paged_adamw_8bit
lr_scheduler: rex
warmup_ratio: 0.05
weight_decay: 0.01
max_grad_norm: 1.0
# ====================
# SEQUENCE & PACKING
# ====================
sequence_len: 8192
sample_packing: true
eval_sample_packing: false
pad_to_sequence_len: true
# ====================
# HARDWARE OPTIMIZATIONS
# ====================
bf16: auto
flash_attention: true
gradient_checkpointing: true
# ====================
# EVALUATION & CHECKPOINTING
# ====================
save_strategy: steps
save_steps: 20
save_total_limit: 5 # Keep best + last few checkpoints
load_best_model_at_end: true
metric_for_best_model: eval_loss
greater_is_better: false
# ====================
# LOGGING & OUTPUT
# ====================
output_dir: ./PT-SFT_1
logging_steps: 2
save_safetensors: true
# ====================
# WANDB TRACKING
# ====================
wandb_project: PF-SFT
wandb_entity: your_entity
wandb_name: run_name