Spaces:

akhaliq
/

MobileLLM-R1-950M

Running on Zero

App Files Files Community

akhaliq HF Staff commited on 6 days ago

Commit

1df30c2

verified ·

1 Parent(s): 2bdd70f

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -28

app.py CHANGED Viewed

@@ -8,47 +8,70 @@ model_id = "facebook/MobileLLM-R1-950M"
 pipe = pipeline(
     "text-generation",
     model=model_id,
-    torch_dtype=torch.float16,
     device_map="auto",
 )
 @spaces.GPU(duration=120)
 def respond(message, history):
-    # Build prompt from history
-    prompt = ""
     for user_msg, assistant_msg in history:
         if user_msg:
-            prompt += f"User: {user_msg}\n"
         if assistant_msg:
-            prompt += f"Assistant: {assistant_msg}\n"
     # Add current message
-    prompt += f"User: {message}\nAssistant: "
-    # Generate response with streaming
-    streamer = pipe.tokenizer.decode
-    # Generate tokens
-    inputs = pipe.tokenizer(prompt, return_tensors="pt").to(pipe.model.device)
-    with torch.no_grad():
-        outputs = pipe.model.generate(
-            **inputs,
-            max_new_tokens=10000,
-            temperature=0.7,
-            do_sample=True,
-            pad_token_id=pipe.tokenizer.eos_token_id,
-        )
-    # Decode the generated tokens, skipping the input tokens
-    generated_tokens = outputs[0][inputs['input_ids'].shape[-1]:]
-    # Stream the output token by token
     response_text = ""
-    for i in range(len(generated_tokens)):
-        token = generated_tokens[i:i+1]
-        token_text = pipe.tokenizer.decode(token, skip_special_tokens=True)
-        response_text += token_text
         yield response_text
 # Create the chat interface

 pipe = pipeline(
     "text-generation",
     model=model_id,
+    torch_dtype="auto",
     device_map="auto",
 )
 @spaces.GPU(duration=120)
 def respond(message, history):
+    # Build messages list from history
+    messages = []
+    # Add system message based on content type detection
+    if any(kw in message.lower() for kw in ["python", "def ", "function"]):
+        messages.append({
+            "role": "system",
+            "content": (
+                "\nYou are a helpful and harmless assistant. You should think step-by-step before responding to the instruction below.\n\n"
+                "Please use python programming language only.\n"
+                "You must use ```python for just the final solution code block with the following format:\n"
+                "```python\n# Your code here\n```\n"
+            )
+        })
+    elif any(kw in message.lower() for kw in ["c++", "cpp", "#include", "cout"]):
+        messages.append({
+            "role": "system",
+            "content": (
+                "\nYou are a helpful and harmless assistant. You should think step-by-step before responding to the instruction below.\n\n"
+                "Please use c++ programming language only.\n"
+                "You must use ```cpp for just the final solution code block with the following format:\n"
+                "```cpp\n// Your code here\n```\n"
+            )
+        })
+    elif any(kw in message.lower() for kw in ["compute", "calculate", "math", "+", "-", "*", "/"]):
+        messages.append({
+            "role": "system",
+            "content": "Please reason step by step, and put your final answer within \\boxed{}."
+        })
+    else:
+        messages.append({
+            "role": "system",
+            "content": "You are a helpful AI assistant."
+        })
+    # Add conversation history
     for user_msg, assistant_msg in history:
         if user_msg:
+            messages.append({"role": "user", "content": user_msg})
         if assistant_msg:
+            messages.append({"role": "assistant", "content": assistant_msg})
     # Add current message
+    messages.append({"role": "user", "content": message})
+    # Generate response
+    outputs = pipe(
+        messages,
+        max_new_tokens=8192,
+        temperature=0.7,
+        do_sample=True,
+    )
+    # Extract and stream the generated text
+    full_response = outputs[0]["generated_text"][-1]["content"]
     response_text = ""
+    for char in full_response:
+        response_text += char
         yield response_text
 # Create the chat interface