Spaces:

Tonic
/

SmolFactory

Running

App Files Files Community

Tonic commited on Jul 29

Commit

e6ad96a

verified ·

1 Parent(s): 41e9e02

adds quantize and push script

Browse files

Files changed (1) hide show

quantize_and_push.py +93 -0

quantize_and_push.py ADDED Viewed

	@@ -0,0 +1,93 @@

+#!/usr/bin/env python3
+"""
+Quantize and Push Script
+Quantizes the uploaded model and pushes quantized versions to the same repository
+"""
+import os
+import sys
+import logging
+from pathlib import Path
+# Add src to path for imports
+sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
+# Setup logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+def main():
+    """Quantize and push the model"""
+    # Configuration
+    MODEL_PATH = "/output-checkpoint"
+    REPO_NAME = "Tonic/smollm3-finetuned"
+    HF_TOKEN = os.getenv('HF_TOKEN')
+    if not HF_TOKEN:
+        logger.error("❌ HF_TOKEN not set")
+        return 1
+    if not Path(MODEL_PATH).exists():
+        logger.error(f"❌ Model path not found: {MODEL_PATH}")
+        return 1
+    logger.info("✅ Model files validated")
+    # Import and run quantization
+    try:
+        from scripts.model_tonic.quantize_model import ModelQuantizer
+        # Quantization types to process
+        quant_types = ["int8_weight_only", "int4_weight_only"]
+        success_count = 0
+        total_count = len(quant_types)
+        for quant_type in quant_types:
+            logger.info(f"🔄 Processing quantization type: {quant_type}")
+            # Initialize quantizer
+            quantizer = ModelQuantizer(
+                model_path=MODEL_PATH,
+                repo_name=REPO_NAME,
+                token=HF_TOKEN,
+                private=False,
+                hf_token=HF_TOKEN
+            )
+            # Perform quantization and push
+            success = quantizer.quantize_and_push(
+                quant_type=quant_type,
+                device="auto",
+                group_size=128
+            )
+            if success:
+                logger.info(f"✅ {quant_type} quantization and push completed")
+                success_count += 1
+            else:
+                logger.error(f"❌ {quant_type} quantization and push failed")
+        logger.info(f"📊 Quantization summary: {success_count}/{total_count} successful")
+        if success_count > 0:
+            logger.info("✅ Quantization completed successfully!")
+            logger.info(f"🌐 View your models at: https://huggingface.co/{REPO_NAME}")
+            logger.info("📊 Quantized models available at:")
+            logger.info(f"  - https://huggingface.co/{REPO_NAME}/int8 (GPU optimized)")
+            logger.info(f"  - https://huggingface.co/{REPO_NAME}/int4 (CPU optimized)")
+            return 0
+        else:
+            logger.error("❌ All quantization attempts failed!")
+            return 1
+    except Exception as e:
+        logger.error(f"❌ Error during quantization: {e}")
+        return 1
+if __name__ == "__main__":
+    exit(main())