Tonic commited on
Commit
e6ad96a
Β·
verified Β·
1 Parent(s): 41e9e02

adds quantize and push script

Browse files
Files changed (1) hide show
  1. quantize_and_push.py +93 -0
quantize_and_push.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Quantize and Push Script
4
+ Quantizes the uploaded model and pushes quantized versions to the same repository
5
+ """
6
+
7
+ import os
8
+ import sys
9
+ import logging
10
+ from pathlib import Path
11
+
12
+ # Add src to path for imports
13
+ sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
14
+
15
+ # Setup logging
16
+ logging.basicConfig(
17
+ level=logging.INFO,
18
+ format='%(asctime)s - %(levelname)s - %(message)s'
19
+ )
20
+ logger = logging.getLogger(__name__)
21
+
22
+ def main():
23
+ """Quantize and push the model"""
24
+
25
+ # Configuration
26
+ MODEL_PATH = "/output-checkpoint"
27
+ REPO_NAME = "Tonic/smollm3-finetuned"
28
+ HF_TOKEN = os.getenv('HF_TOKEN')
29
+
30
+ if not HF_TOKEN:
31
+ logger.error("❌ HF_TOKEN not set")
32
+ return 1
33
+
34
+ if not Path(MODEL_PATH).exists():
35
+ logger.error(f"❌ Model path not found: {MODEL_PATH}")
36
+ return 1
37
+
38
+ logger.info("βœ… Model files validated")
39
+
40
+ # Import and run quantization
41
+ try:
42
+ from scripts.model_tonic.quantize_model import ModelQuantizer
43
+
44
+ # Quantization types to process
45
+ quant_types = ["int8_weight_only", "int4_weight_only"]
46
+
47
+ success_count = 0
48
+ total_count = len(quant_types)
49
+
50
+ for quant_type in quant_types:
51
+ logger.info(f"πŸ”„ Processing quantization type: {quant_type}")
52
+
53
+ # Initialize quantizer
54
+ quantizer = ModelQuantizer(
55
+ model_path=MODEL_PATH,
56
+ repo_name=REPO_NAME,
57
+ token=HF_TOKEN,
58
+ private=False,
59
+ hf_token=HF_TOKEN
60
+ )
61
+
62
+ # Perform quantization and push
63
+ success = quantizer.quantize_and_push(
64
+ quant_type=quant_type,
65
+ device="auto",
66
+ group_size=128
67
+ )
68
+
69
+ if success:
70
+ logger.info(f"βœ… {quant_type} quantization and push completed")
71
+ success_count += 1
72
+ else:
73
+ logger.error(f"❌ {quant_type} quantization and push failed")
74
+
75
+ logger.info(f"πŸ“Š Quantization summary: {success_count}/{total_count} successful")
76
+
77
+ if success_count > 0:
78
+ logger.info("βœ… Quantization completed successfully!")
79
+ logger.info(f"🌐 View your models at: https://huggingface.co/{REPO_NAME}")
80
+ logger.info("πŸ“Š Quantized models available at:")
81
+ logger.info(f" - https://huggingface.co/{REPO_NAME}/int8 (GPU optimized)")
82
+ logger.info(f" - https://huggingface.co/{REPO_NAME}/int4 (CPU optimized)")
83
+ return 0
84
+ else:
85
+ logger.error("❌ All quantization attempts failed!")
86
+ return 1
87
+
88
+ except Exception as e:
89
+ logger.error(f"❌ Error during quantization: {e}")
90
+ return 1
91
+
92
+ if __name__ == "__main__":
93
+ exit(main())