akhaliq HF Staff commited on
Commit
e26eb4c
·
verified ·
1 Parent(s): 1df30c2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -51
app.py CHANGED
@@ -8,70 +8,47 @@ model_id = "facebook/MobileLLM-R1-950M"
8
  pipe = pipeline(
9
  "text-generation",
10
  model=model_id,
11
- torch_dtype="auto",
12
  device_map="auto",
13
  )
14
 
15
  @spaces.GPU(duration=120)
16
  def respond(message, history):
17
- # Build messages list from history
18
- messages = []
19
-
20
- # Add system message based on content type detection
21
- if any(kw in message.lower() for kw in ["python", "def ", "function"]):
22
- messages.append({
23
- "role": "system",
24
- "content": (
25
- "\nYou are a helpful and harmless assistant. You should think step-by-step before responding to the instruction below.\n\n"
26
- "Please use python programming language only.\n"
27
- "You must use ```python for just the final solution code block with the following format:\n"
28
- "```python\n# Your code here\n```\n"
29
- )
30
- })
31
- elif any(kw in message.lower() for kw in ["c++", "cpp", "#include", "cout"]):
32
- messages.append({
33
- "role": "system",
34
- "content": (
35
- "\nYou are a helpful and harmless assistant. You should think step-by-step before responding to the instruction below.\n\n"
36
- "Please use c++ programming language only.\n"
37
- "You must use ```cpp for just the final solution code block with the following format:\n"
38
- "```cpp\n// Your code here\n```\n"
39
- )
40
- })
41
- elif any(kw in message.lower() for kw in ["compute", "calculate", "math", "+", "-", "*", "/"]):
42
- messages.append({
43
- "role": "system",
44
- "content": "Please reason step by step, and put your final answer within \\boxed{}."
45
- })
46
- else:
47
- messages.append({
48
- "role": "system",
49
- "content": "You are a helpful AI assistant."
50
- })
51
-
52
- # Add conversation history
53
  for user_msg, assistant_msg in history:
54
  if user_msg:
55
- messages.append({"role": "user", "content": user_msg})
56
  if assistant_msg:
57
- messages.append({"role": "assistant", "content": assistant_msg})
58
 
59
  # Add current message
60
- messages.append({"role": "user", "content": message})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
- # Generate response
63
- outputs = pipe(
64
- messages,
65
- max_new_tokens=8192,
66
- temperature=0.7,
67
- do_sample=True,
68
- )
69
 
70
- # Extract and stream the generated text
71
- full_response = outputs[0]["generated_text"][-1]["content"]
72
  response_text = ""
73
- for char in full_response:
74
- response_text += char
 
 
75
  yield response_text
76
 
77
  # Create the chat interface
 
8
  pipe = pipeline(
9
  "text-generation",
10
  model=model_id,
11
+ torch_dtype=torch.float16,
12
  device_map="auto",
13
  )
14
 
15
  @spaces.GPU(duration=120)
16
  def respond(message, history):
17
+ # Build prompt from history
18
+ prompt = ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  for user_msg, assistant_msg in history:
20
  if user_msg:
21
+ prompt += f"User: {user_msg}\n"
22
  if assistant_msg:
23
+ prompt += f"Assistant: {assistant_msg}\n"
24
 
25
  # Add current message
26
+ prompt += f"User: {message}\nAssistant: "
27
+
28
+ # Generate response with streaming
29
+ streamer = pipe.tokenizer.decode
30
+
31
+ # Generate tokens
32
+ inputs = pipe.tokenizer(prompt, return_tensors="pt").to(pipe.model.device)
33
+
34
+ with torch.no_grad():
35
+ outputs = pipe.model.generate(
36
+ **inputs,
37
+ max_new_tokens=10000,
38
+ temperature=0.7,
39
+ do_sample=True,
40
+ pad_token_id=pipe.tokenizer.eos_token_id,
41
+ )
42
 
43
+ # Decode the generated tokens, skipping the input tokens
44
+ generated_tokens = outputs[0][inputs['input_ids'].shape[-1]:]
 
 
 
 
 
45
 
46
+ # Stream the output token by token
 
47
  response_text = ""
48
+ for i in range(len(generated_tokens)):
49
+ token = generated_tokens[i:i+1]
50
+ token_text = pipe.tokenizer.decode(token, skip_special_tokens=True)
51
+ response_text += token_text
52
  yield response_text
53
 
54
  # Create the chat interface