NetoAISolutions
/

TSLAM-15B

4-bit precision

Model card Files Files and versions

NetoAI commited on 4 days ago

Commit

0f7bf50

·

verified ·

1 Parent(s): 06dbc8a

Update handler.py

Files changed (1) hide show

handler.py +15 -7

handler.py CHANGED Viewed

@@ -20,7 +20,7 @@ class EndpointHandler:
         """
         # Extract text input and an optional system prompt from the request
         text_input = data.get("inputs", "")
-        system_prompt = data.get("system_prompt", "You are a helpful assistant.") # Default system prompt
         if not isinstance(text_input, str) or not text_input:
             return [{"error": "Invalid or empty input. Please provide a text string."}]
@@ -31,7 +31,13 @@ class EndpointHandler:
             {"role": "user", "content": text_input}
         ]
-        prompt = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
         # Tokenize and move input to device
         inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)
@@ -39,15 +45,17 @@ class EndpointHandler:
         # Generate text
         outputs = self.model.generate(
             **inputs,
-            max_length=2000,
             temperature=0.5,
-            num_return_sequences=1,
-            eos_token_id=self.tokenizer.eos_token_id
         )
-        output_ids = outputs[0][len(inputs.input_ids[0])+4:].tolist() # the +4 is to account for the think tags that are added by the template
         # Decode generated text and clean up
         response = self.tokenizer.decode(output_ids, skip_special_tokens=True)
-        return response

         """
         # Extract text input and an optional system prompt from the request
         text_input = data.get("inputs", "")
+        system_prompt = data.get("system_prompt", "You are a helpful assistant.")
         if not isinstance(text_input, str) or not text_input:
             return [{"error": "Invalid or empty input. Please provide a text string."}]
             {"role": "user", "content": text_input}
         ]
+        try:
+            prompt = self.tokenizer.apply_chat_template(
+                messages, tokenize=False, add_generation_prompt=True
+            )
+        except Exception:
+            # fallback for models without chat template
+            prompt = f"{system_prompt}\nUser: {text_input}\nAssistant:"
         # Tokenize and move input to device
         inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)
         # Generate text
         outputs = self.model.generate(
             **inputs,
+            max_new_tokens=512,
             temperature=0.5,
+            eos_token_id=self.tokenizer.eos_token_id,
         )
+        # Slice output to remove prompt + <think> tags if present
+        offset = len(inputs.input_ids[0]) + 4  # +4 accounts for <think> tags
+        output_ids = outputs[0][offset:].tolist()
         # Decode generated text and clean up
         response = self.tokenizer.decode(output_ids, skip_special_tokens=True)
+        # ✅ Return JSON-serializable format for HF Endpoints + eval_utils
+        return [{"generated_text": response}]