SmolFactory / scripts /rescue /quantize_and_push.py
testtest123's picture
cleanup a bit the files
ad3b15d unverified
raw
history blame
2.9 kB
#!/usr/bin/env python3
"""
Quantize and Push Script
Quantizes the uploaded model and pushes quantized versions to the same repository
"""
import os
import sys
import logging
from pathlib import Path
# Add src to path for imports
sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
# Setup logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
def main():
"""Quantize and push the model"""
# Configuration
MODEL_PATH = "/output-checkpoint"
REPO_NAME = "Tonic/smollm3-finetuned"
HF_TOKEN = os.getenv('HF_TOKEN')
if not HF_TOKEN:
logger.error("❌ HF_TOKEN not set")
return 1
if not Path(MODEL_PATH).exists():
logger.error(f"❌ Model path not found: {MODEL_PATH}")
return 1
logger.info("βœ… Model files validated")
# Import and run quantization
try:
from scripts.model_tonic.quantize_model import ModelQuantizer
# Quantization types to process
quant_types = ["int8_weight_only", "int4_weight_only"]
success_count = 0
total_count = len(quant_types)
for quant_type in quant_types:
logger.info(f"πŸ”„ Processing quantization type: {quant_type}")
# Initialize quantizer
quantizer = ModelQuantizer(
model_path=MODEL_PATH,
repo_name=REPO_NAME,
token=HF_TOKEN,
private=False,
hf_token=HF_TOKEN
)
# Perform quantization and push
success = quantizer.quantize_and_push(
quant_type=quant_type,
device="auto",
group_size=128
)
if success:
logger.info(f"βœ… {quant_type} quantization and push completed")
success_count += 1
else:
logger.error(f"❌ {quant_type} quantization and push failed")
logger.info(f"πŸ“Š Quantization summary: {success_count}/{total_count} successful")
if success_count > 0:
logger.info("βœ… Quantization completed successfully!")
logger.info(f"🌐 View your models at: https://huggingface.co/{REPO_NAME}")
logger.info("πŸ“Š Quantized models available at:")
logger.info(f" - https://huggingface.co/{REPO_NAME}/int8 (GPU optimized)")
logger.info(f" - https://huggingface.co/{REPO_NAME}/int4 (CPU optimized)")
return 0
else:
logger.error("❌ All quantization attempts failed!")
return 1
except Exception as e:
logger.error(f"❌ Error during quantization: {e}")
return 1
if __name__ == "__main__":
exit(main())