don't resize embeddings to multiples of 32x by default
Browse files- README.md +3 -0
 - src/axolotl/utils/models.py +5 -1
 
    	
        README.md
    CHANGED
    
    | 
         @@ -322,6 +322,9 @@ tokenizer_type: AutoTokenizer 
     | 
|
| 322 | 
         
             
            trust_remote_code:
         
     | 
| 323 | 
         
             
            # use_fast option for tokenizer loading from_pretrained, default to True
         
     | 
| 324 | 
         
             
            tokenizer_use_fast:
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 325 | 
         | 
| 326 | 
         
             
            # whether you are training a 4-bit GPTQ quantized model
         
     | 
| 327 | 
         
             
            gptq: true
         
     | 
| 
         | 
|
| 322 | 
         
             
            trust_remote_code:
         
     | 
| 323 | 
         
             
            # use_fast option for tokenizer loading from_pretrained, default to True
         
     | 
| 324 | 
         
             
            tokenizer_use_fast:
         
     | 
| 325 | 
         
            +
            # resize the model embeddings when new tokens are added to multiples of 32
         
     | 
| 326 | 
         
            +
            # this is reported to improve training speed on some models
         
     | 
| 327 | 
         
            +
            resize_token_embeddings_to_32x:
         
     | 
| 328 | 
         | 
| 329 | 
         
             
            # whether you are training a 4-bit GPTQ quantized model
         
     | 
| 330 | 
         
             
            gptq: true
         
     | 
    	
        src/axolotl/utils/models.py
    CHANGED
    
    | 
         @@ -301,7 +301,11 @@ def load_model( 
     | 
|
| 301 | 
         
             
                        **model_kwargs,
         
     | 
| 302 | 
         
             
                    )
         
     | 
| 303 | 
         | 
| 304 | 
         
            -
                embeddings_len =  
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 305 | 
         
             
                model.resize_token_embeddings(embeddings_len)
         
     | 
| 306 | 
         | 
| 307 | 
         
             
                if (
         
     | 
| 
         | 
|
| 301 | 
         
             
                        **model_kwargs,
         
     | 
| 302 | 
         
             
                    )
         
     | 
| 303 | 
         | 
| 304 | 
         
            +
                embeddings_len = (
         
     | 
| 305 | 
         
            +
                    math.ceil(len(tokenizer) / 32) * 32
         
     | 
| 306 | 
         
            +
                    if cfg.resize_token_embeddings_to_32x
         
     | 
| 307 | 
         
            +
                    else len(tokenizer)
         
     | 
| 308 | 
         
            +
                )
         
     | 
| 309 | 
         
             
                model.resize_token_embeddings(embeddings_len)
         
     | 
| 310 | 
         | 
| 311 | 
         
             
                if (
         
     |