Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	Update app.py
Browse files
    	
        app.py
    CHANGED
    
    | @@ -3,6 +3,9 @@ import gradio as gr | |
| 3 | 
             
            from transformers import AutoModelForCausalLM, AutoTokenizer, PreTrainedModel, PretrainedConfig
         | 
| 4 | 
             
            from huggingface_hub import hf_hub_download
         | 
| 5 | 
             
            import json
         | 
|  | |
|  | |
|  | |
| 6 |  | 
| 7 | 
             
            # Define the model architecture
         | 
| 8 | 
             
            class SmolLM2Config(PretrainedConfig):
         | 
| @@ -56,26 +59,35 @@ class SmolLM2ForCausalLM(PreTrainedModel): | |
| 56 | 
             
                def __init__(self, config):
         | 
| 57 | 
             
                    super().__init__(config)
         | 
| 58 | 
             
                    self.config = config
         | 
|  | |
|  | |
|  | |
|  | |
| 59 |  | 
| 60 | 
            -
                     | 
| 61 | 
            -
             | 
| 62 | 
            -
                         | 
| 63 | 
            -
                        config=config,
         | 
| 64 | 
            -
                        torch_dtype=torch.float16,
         | 
| 65 | 
            -
                        low_cpu_mem_usage=True,
         | 
| 66 | 
            -
                        trust_remote_code=True
         | 
| 67 | 
            -
                    )
         | 
| 68 | 
            -
                    
         | 
| 69 | 
             
                def forward(self, input_ids=None, attention_mask=None, labels=None, **kwargs):
         | 
| 70 | 
            -
                     | 
| 71 | 
            -
             | 
| 72 | 
            -
             | 
| 73 | 
            -
             | 
| 74 | 
            -
                         | 
| 75 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 76 |  | 
| 77 | 
             
                def prepare_inputs_for_generation(self, input_ids, **kwargs):
         | 
| 78 | 
            -
                    return  | 
|  | |
|  | |
|  | |
| 79 |  | 
| 80 | 
             
            # Register the model
         | 
| 81 | 
             
            AutoModelForCausalLM.register(SmolLM2Config, SmolLM2ForCausalLM)
         | 
| @@ -111,19 +123,20 @@ def initialize(): | |
| 111 | 
             
                        }
         | 
| 112 | 
             
                        TOKENIZER.add_special_tokens(special_tokens)
         | 
| 113 |  | 
| 114 | 
            -
                        # Load model
         | 
| 115 | 
             
                        print("Loading model...")
         | 
| 116 | 
            -
                         | 
| 117 | 
            -
             | 
| 118 | 
            -
             | 
| 119 | 
            -
             | 
| 120 | 
            -
             | 
| 121 | 
            -
             | 
| 122 | 
            -
                        )
         | 
|  | |
| 123 |  | 
| 124 | 
             
                        # Move model to device
         | 
| 125 | 
             
                        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         | 
| 126 | 
            -
                        MODEL.to(device)
         | 
| 127 |  | 
| 128 | 
             
                        print(f"Model loaded successfully on {device}")
         | 
| 129 |  | 
|  | |
| 3 | 
             
            from transformers import AutoModelForCausalLM, AutoTokenizer, PreTrainedModel, PretrainedConfig
         | 
| 4 | 
             
            from huggingface_hub import hf_hub_download
         | 
| 5 | 
             
            import json
         | 
| 6 | 
            +
            import torch.nn as nn
         | 
| 7 | 
            +
            import torch.nn.functional as F
         | 
| 8 | 
            +
            import math
         | 
| 9 |  | 
| 10 | 
             
            # Define the model architecture
         | 
| 11 | 
             
            class SmolLM2Config(PretrainedConfig):
         | 
|  | |
| 59 | 
             
                def __init__(self, config):
         | 
| 60 | 
             
                    super().__init__(config)
         | 
| 61 | 
             
                    self.config = config
         | 
| 62 | 
            +
                    self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size)
         | 
| 63 | 
            +
                    self.layers = nn.ModuleList([LlamaDecoderLayer(config) for _ in range(config.num_hidden_layers)])
         | 
| 64 | 
            +
                    self.norm = RMSNorm(config.hidden_size, config.rms_norm_eps)
         | 
| 65 | 
            +
                    self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
         | 
| 66 |  | 
| 67 | 
            +
                    if config.tie_word_embeddings:
         | 
| 68 | 
            +
                        self.lm_head.weight = self.embed_tokens.weight
         | 
| 69 | 
            +
                        
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 70 | 
             
                def forward(self, input_ids=None, attention_mask=None, labels=None, **kwargs):
         | 
| 71 | 
            +
                    hidden_states = self.embed_tokens(input_ids)
         | 
| 72 | 
            +
                    
         | 
| 73 | 
            +
                    # Process through layers
         | 
| 74 | 
            +
                    for layer in self.layers:
         | 
| 75 | 
            +
                        hidden_states = layer(hidden_states, attention_mask)
         | 
| 76 | 
            +
                        
         | 
| 77 | 
            +
                    hidden_states = self.norm(hidden_states)
         | 
| 78 | 
            +
                    logits = self.lm_head(hidden_states)
         | 
| 79 | 
            +
                    
         | 
| 80 | 
            +
                    loss = None
         | 
| 81 | 
            +
                    if labels is not None:
         | 
| 82 | 
            +
                        loss = F.cross_entropy(logits.view(-1, logits.size(-1)), labels.view(-1))
         | 
| 83 | 
            +
                        
         | 
| 84 | 
            +
                    return logits if loss is None else (loss, logits)
         | 
| 85 |  | 
| 86 | 
             
                def prepare_inputs_for_generation(self, input_ids, **kwargs):
         | 
| 87 | 
            +
                    return {
         | 
| 88 | 
            +
                        "input_ids": input_ids,
         | 
| 89 | 
            +
                        "attention_mask": kwargs.get("attention_mask", None)
         | 
| 90 | 
            +
                    }
         | 
| 91 |  | 
| 92 | 
             
            # Register the model
         | 
| 93 | 
             
            AutoModelForCausalLM.register(SmolLM2Config, SmolLM2ForCausalLM)
         | 
|  | |
| 123 | 
             
                        }
         | 
| 124 | 
             
                        TOKENIZER.add_special_tokens(special_tokens)
         | 
| 125 |  | 
| 126 | 
            +
                        # Load model weights
         | 
| 127 | 
             
                        print("Loading model...")
         | 
| 128 | 
            +
                        weights_path = hf_hub_download(repo_id=model_id, filename="pytorch_model.bin")
         | 
| 129 | 
            +
                        
         | 
| 130 | 
            +
                        # Initialize model
         | 
| 131 | 
            +
                        MODEL = SmolLM2ForCausalLM(config)
         | 
| 132 | 
            +
                        
         | 
| 133 | 
            +
                        # Load state dict
         | 
| 134 | 
            +
                        state_dict = torch.load(weights_path, map_location="cpu")
         | 
| 135 | 
            +
                        MODEL.load_state_dict(state_dict)
         | 
| 136 |  | 
| 137 | 
             
                        # Move model to device
         | 
| 138 | 
             
                        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         | 
| 139 | 
            +
                        MODEL = MODEL.to(device)
         | 
| 140 |  | 
| 141 | 
             
                        print(f"Model loaded successfully on {device}")
         | 
| 142 |  |