Update README.md
Browse files
    	
        README.md
    CHANGED
    
    | @@ -96,21 +96,9 @@ linear_config = Int8DynamicActivationIntxWeightConfig( | |
| 96 |  | 
| 97 | 
             
            quant_config = AOPerModuleConfig({"_default": linear_config, "model.embed_tokens": embedding_config})
         | 
| 98 | 
             
            quantization_config = TorchAoConfig(quant_type=quant_config, include_embedding=True, untie_embedding_weights=True, modules_to_not_convert=[])
         | 
| 99 | 
            -
             | 
| 100 | 
             
            quantized_model = AutoModelForCausalLM.from_pretrained(untied_model_id, torch_dtype=torch.float32, device_map="auto", quantization_config=quantization_config)
         | 
| 101 | 
             
            tokenizer = AutoTokenizer.from_pretrained(model_id)
         | 
| 102 |  | 
| 103 | 
            -
            # TODO: use AOPerModuleConfig once fix for tied weights is landed 
         | 
| 104 | 
            -
            quantize_(
         | 
| 105 | 
            -
                quantized_model,
         | 
| 106 | 
            -
                embedding_config,
         | 
| 107 | 
            -
                lambda m, fqn: isinstance(m, torch.nn.Embedding) 
         | 
| 108 | 
            -
            )
         | 
| 109 | 
            -
            quantize_(
         | 
| 110 | 
            -
                quantized_model,
         | 
| 111 | 
            -
                linear_config,
         | 
| 112 | 
            -
            )
         | 
| 113 | 
            -
             | 
| 114 | 
             
            # Push to hub
         | 
| 115 | 
             
            # USER_ID = "YOUR_USER_ID"
         | 
| 116 | 
             
            # save_to = f"{USER_ID}/phi4-mini-8dq4w"
         | 
|  | |
| 96 |  | 
| 97 | 
             
            quant_config = AOPerModuleConfig({"_default": linear_config, "model.embed_tokens": embedding_config})
         | 
| 98 | 
             
            quantization_config = TorchAoConfig(quant_type=quant_config, include_embedding=True, untie_embedding_weights=True, modules_to_not_convert=[])
         | 
|  | |
| 99 | 
             
            quantized_model = AutoModelForCausalLM.from_pretrained(untied_model_id, torch_dtype=torch.float32, device_map="auto", quantization_config=quantization_config)
         | 
| 100 | 
             
            tokenizer = AutoTokenizer.from_pretrained(model_id)
         | 
| 101 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 102 | 
             
            # Push to hub
         | 
| 103 | 
             
            # USER_ID = "YOUR_USER_ID"
         | 
| 104 | 
             
            # save_to = f"{USER_ID}/phi4-mini-8dq4w"
         | 
