Spaces:

Tonic
/

SmolFactory

Running

SmolFactory / config /train_smollm3_long_context.py

adds no think tag correctly

32fca7d verified 3 months ago

1.02 kB

	"""
	SmolLM3 Long-Context Training Configuration
	Optimized for long-context tasks (up to 128k tokens)
	"""

	from config.train_smollm3 import SmolLM3Config

	config = SmolLM3Config(
	# Model configuration
	model_name="HuggingFaceTB/SmolLM3-3B",
	max_seq_length=131072, # 128k tokens
	use_flash_attention=True,
	use_gradient_checkpointing=True,

	# Training configuration
	batch_size=1, # Reduced for long sequences
	gradient_accumulation_steps=8, # Increased to maintain effective batch size
	learning_rate=1e-5, # Lower learning rate for stability
	weight_decay=0.01,
	warmup_steps=200,
	max_iters=500,

	# Mixed precision
	fp16=True,
	bf16=False,

	# Logging and saving
	save_steps=100,
	eval_steps=50,
	logging_steps=10,

	# Chat template configuration
	use_chat_template=True,
	chat_template_kwargs={
	"add_generation_prompt": True,
	"no_think_system_message": True # Allow thinking for long context tasks
	}
	)