akhaliq HF Staff commited on
Commit
1df30c2
·
verified ·
1 Parent(s): 2bdd70f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -28
app.py CHANGED
@@ -8,47 +8,70 @@ model_id = "facebook/MobileLLM-R1-950M"
8
  pipe = pipeline(
9
  "text-generation",
10
  model=model_id,
11
- torch_dtype=torch.float16,
12
  device_map="auto",
13
  )
14
 
15
  @spaces.GPU(duration=120)
16
  def respond(message, history):
17
- # Build prompt from history
18
- prompt = ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  for user_msg, assistant_msg in history:
20
  if user_msg:
21
- prompt += f"User: {user_msg}\n"
22
  if assistant_msg:
23
- prompt += f"Assistant: {assistant_msg}\n"
24
 
25
  # Add current message
26
- prompt += f"User: {message}\nAssistant: "
27
-
28
- # Generate response with streaming
29
- streamer = pipe.tokenizer.decode
30
-
31
- # Generate tokens
32
- inputs = pipe.tokenizer(prompt, return_tensors="pt").to(pipe.model.device)
33
-
34
- with torch.no_grad():
35
- outputs = pipe.model.generate(
36
- **inputs,
37
- max_new_tokens=10000,
38
- temperature=0.7,
39
- do_sample=True,
40
- pad_token_id=pipe.tokenizer.eos_token_id,
41
- )
42
 
43
- # Decode the generated tokens, skipping the input tokens
44
- generated_tokens = outputs[0][inputs['input_ids'].shape[-1]:]
 
 
 
 
 
45
 
46
- # Stream the output token by token
 
47
  response_text = ""
48
- for i in range(len(generated_tokens)):
49
- token = generated_tokens[i:i+1]
50
- token_text = pipe.tokenizer.decode(token, skip_special_tokens=True)
51
- response_text += token_text
52
  yield response_text
53
 
54
  # Create the chat interface
 
8
  pipe = pipeline(
9
  "text-generation",
10
  model=model_id,
11
+ torch_dtype="auto",
12
  device_map="auto",
13
  )
14
 
15
  @spaces.GPU(duration=120)
16
  def respond(message, history):
17
+ # Build messages list from history
18
+ messages = []
19
+
20
+ # Add system message based on content type detection
21
+ if any(kw in message.lower() for kw in ["python", "def ", "function"]):
22
+ messages.append({
23
+ "role": "system",
24
+ "content": (
25
+ "\nYou are a helpful and harmless assistant. You should think step-by-step before responding to the instruction below.\n\n"
26
+ "Please use python programming language only.\n"
27
+ "You must use ```python for just the final solution code block with the following format:\n"
28
+ "```python\n# Your code here\n```\n"
29
+ )
30
+ })
31
+ elif any(kw in message.lower() for kw in ["c++", "cpp", "#include", "cout"]):
32
+ messages.append({
33
+ "role": "system",
34
+ "content": (
35
+ "\nYou are a helpful and harmless assistant. You should think step-by-step before responding to the instruction below.\n\n"
36
+ "Please use c++ programming language only.\n"
37
+ "You must use ```cpp for just the final solution code block with the following format:\n"
38
+ "```cpp\n// Your code here\n```\n"
39
+ )
40
+ })
41
+ elif any(kw in message.lower() for kw in ["compute", "calculate", "math", "+", "-", "*", "/"]):
42
+ messages.append({
43
+ "role": "system",
44
+ "content": "Please reason step by step, and put your final answer within \\boxed{}."
45
+ })
46
+ else:
47
+ messages.append({
48
+ "role": "system",
49
+ "content": "You are a helpful AI assistant."
50
+ })
51
+
52
+ # Add conversation history
53
  for user_msg, assistant_msg in history:
54
  if user_msg:
55
+ messages.append({"role": "user", "content": user_msg})
56
  if assistant_msg:
57
+ messages.append({"role": "assistant", "content": assistant_msg})
58
 
59
  # Add current message
60
+ messages.append({"role": "user", "content": message})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
+ # Generate response
63
+ outputs = pipe(
64
+ messages,
65
+ max_new_tokens=8192,
66
+ temperature=0.7,
67
+ do_sample=True,
68
+ )
69
 
70
+ # Extract and stream the generated text
71
+ full_response = outputs[0]["generated_text"][-1]["content"]
72
  response_text = ""
73
+ for char in full_response:
74
+ response_text += char
 
 
75
  yield response_text
76
 
77
  # Create the chat interface