NetoAI commited on
Commit
0f7bf50
·
verified ·
1 Parent(s): 06dbc8a

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +15 -7
handler.py CHANGED
@@ -20,7 +20,7 @@ class EndpointHandler:
20
  """
21
  # Extract text input and an optional system prompt from the request
22
  text_input = data.get("inputs", "")
23
- system_prompt = data.get("system_prompt", "You are a helpful assistant.") # Default system prompt
24
 
25
  if not isinstance(text_input, str) or not text_input:
26
  return [{"error": "Invalid or empty input. Please provide a text string."}]
@@ -31,7 +31,13 @@ class EndpointHandler:
31
  {"role": "user", "content": text_input}
32
  ]
33
 
34
- prompt = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
 
 
 
 
 
 
35
 
36
  # Tokenize and move input to device
37
  inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)
@@ -39,15 +45,17 @@ class EndpointHandler:
39
  # Generate text
40
  outputs = self.model.generate(
41
  **inputs,
42
- max_length=2000,
43
  temperature=0.5,
44
- num_return_sequences=1,
45
- eos_token_id=self.tokenizer.eos_token_id
46
  )
47
 
48
- output_ids = outputs[0][len(inputs.input_ids[0])+4:].tolist() # the +4 is to account for the think tags that are added by the template
 
 
49
 
50
  # Decode generated text and clean up
51
  response = self.tokenizer.decode(output_ids, skip_special_tokens=True)
52
 
53
- return response
 
 
20
  """
21
  # Extract text input and an optional system prompt from the request
22
  text_input = data.get("inputs", "")
23
+ system_prompt = data.get("system_prompt", "You are a helpful assistant.")
24
 
25
  if not isinstance(text_input, str) or not text_input:
26
  return [{"error": "Invalid or empty input. Please provide a text string."}]
 
31
  {"role": "user", "content": text_input}
32
  ]
33
 
34
+ try:
35
+ prompt = self.tokenizer.apply_chat_template(
36
+ messages, tokenize=False, add_generation_prompt=True
37
+ )
38
+ except Exception:
39
+ # fallback for models without chat template
40
+ prompt = f"{system_prompt}\nUser: {text_input}\nAssistant:"
41
 
42
  # Tokenize and move input to device
43
  inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)
 
45
  # Generate text
46
  outputs = self.model.generate(
47
  **inputs,
48
+ max_new_tokens=512,
49
  temperature=0.5,
50
+ eos_token_id=self.tokenizer.eos_token_id,
 
51
  )
52
 
53
+ # Slice output to remove prompt + <think> tags if present
54
+ offset = len(inputs.input_ids[0]) + 4 # +4 accounts for <think> tags
55
+ output_ids = outputs[0][offset:].tolist()
56
 
57
  # Decode generated text and clean up
58
  response = self.tokenizer.decode(output_ids, skip_special_tokens=True)
59
 
60
+ # ✅ Return JSON-serializable format for HF Endpoints + eval_utils
61
+ return [{"generated_text": response}]