Spaces:

Tonic
/

SmolFactory

Running

App Files Files Community

SmolFactory / scripts /rescue /quantize_and_push.py

testtest123

cleanup a bit the files

ad3b15d unverified about 2 months ago

raw

history blame

2.9 kB

	#!/usr/bin/env python3
	"""
	Quantize and Push Script
	Quantizes the uploaded model and pushes quantized versions to the same repository
	"""

	import os
	import sys
	import logging
	from pathlib import Path

	# Add src to path for imports
	sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))

	# Setup logging
	logging.basicConfig(
	level=logging.INFO,
	format='%(asctime)s - %(levelname)s - %(message)s'
	)
	logger = logging.getLogger(__name__)

	def main():
	"""Quantize and push the model"""

	# Configuration
	MODEL_PATH = "/output-checkpoint"
	REPO_NAME = "Tonic/smollm3-finetuned"
	HF_TOKEN = os.getenv('HF_TOKEN')

	if not HF_TOKEN:
	logger.error("❌ HF_TOKEN not set")
	return 1

	if not Path(MODEL_PATH).exists():
	logger.error(f"❌ Model path not found: {MODEL_PATH}")
	return 1

	logger.info("✅ Model files validated")

	# Import and run quantization
	try:
	from scripts.model_tonic.quantize_model import ModelQuantizer

	# Quantization types to process
	quant_types = ["int8_weight_only", "int4_weight_only"]

	success_count = 0
	total_count = len(quant_types)

	for quant_type in quant_types:
	logger.info(f"🔄 Processing quantization type: {quant_type}")

	# Initialize quantizer
	quantizer = ModelQuantizer(
	model_path=MODEL_PATH,
	repo_name=REPO_NAME,
	token=HF_TOKEN,
	private=False,
	hf_token=HF_TOKEN
	)

	# Perform quantization and push
	success = quantizer.quantize_and_push(
	quant_type=quant_type,
	device="auto",
	group_size=128
	)

	if success:
	logger.info(f"✅ {quant_type} quantization and push completed")
	success_count += 1
	else:
	logger.error(f"❌ {quant_type} quantization and push failed")

	logger.info(f"📊 Quantization summary: {success_count}/{total_count} successful")

	if success_count > 0:
	logger.info("✅ Quantization completed successfully!")
	logger.info(f"🌐 View your models at: https://huggingface.co/{REPO_NAME}")
	logger.info("📊 Quantized models available at:")
	logger.info(f" - https://huggingface.co/{REPO_NAME}/int8 (GPU optimized)")
	logger.info(f" - https://huggingface.co/{REPO_NAME}/int4 (CPU optimized)")
	return 0
	else:
	logger.error("❌ All quantization attempts failed!")
	return 1

	except Exception as e:
	logger.error(f"❌ Error during quantization: {e}")
	return 1

	if __name__ == "__main__":
	exit(main())