Spaces:
Paused
Paused
Reapply "ai: Append reasoning tag."
Browse files* 77e3d5f76d19fa7474b15d168425c13ddf0ad885
Updated with new code.
Needed for: https://huggingface.co/spaces/hadadrjt/api
- src/main/gradio.py +61 -27
src/main/gradio.py
CHANGED
|
@@ -55,54 +55,88 @@ async def respond_async(multi, history, model_display, sess, custom_prompt, deep
|
|
| 55 |
if msg_input["text"]:
|
| 56 |
inp += msg_input["text"]
|
| 57 |
|
| 58 |
-
# Append user input to chat history
|
| 59 |
-
history.append([inp,
|
| 60 |
|
| 61 |
# Yield updated history and disable input while AI is responding
|
| 62 |
yield history, gr.update(interactive=False, submit_btn=False, stop_btn=True), sess
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
|
|
|
| 66 |
async def background():
|
| 67 |
"""
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
"""
|
| 72 |
-
reasoning = "" #
|
| 73 |
-
responses = "" #
|
| 74 |
-
content_started = False # Flag to indicate content streaming
|
| 75 |
-
ignore_reasoning = False # Flag to ignore reasoning after content starts
|
| 76 |
-
|
| 77 |
-
|
|
|
|
| 78 |
async for typ, chunk in chat_with_model_async(history, inp, model_display, sess, custom_prompt, deep_search):
|
| 79 |
-
# Break if user requested stop or cancellation flagged
|
| 80 |
if sess.stop_event.is_set() or sess.cancel_token["cancelled"]:
|
| 81 |
break
|
| 82 |
-
|
| 83 |
if typ == "reasoning":
|
| 84 |
-
# Append reasoning chunk unless ignoring reasoning after content
|
| 85 |
if ignore_reasoning:
|
| 86 |
continue
|
| 87 |
-
|
| 88 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
await queue.put(("reasoning", reasoning))
|
| 90 |
-
|
| 91 |
elif typ == "content":
|
| 92 |
if not content_started:
|
| 93 |
-
# On first content chunk,
|
| 94 |
content_started = True
|
| 95 |
ignore_reasoning = True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
responses = chunk
|
| 97 |
-
await queue.put(("
|
| 98 |
-
await queue.put(("replace", responses)) # Replace placeholder with content start
|
| 99 |
else:
|
| 100 |
-
# Append subsequent content chunks and update UI
|
| 101 |
responses += chunk
|
| 102 |
await queue.put(("append", responses))
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
|
| 107 |
bg_task = asyncio.create_task(background()) # Start background streaming task
|
| 108 |
stop_task = asyncio.create_task(sess.stop_event.wait()) # Task to wait for stop event
|
|
|
|
| 55 |
if msg_input["text"]:
|
| 56 |
inp += msg_input["text"]
|
| 57 |
|
| 58 |
+
# Append user input to chat history
|
| 59 |
+
history.append([inp, ""]) # placeholder
|
| 60 |
|
| 61 |
# Yield updated history and disable input while AI is responding
|
| 62 |
yield history, gr.update(interactive=False, submit_btn=False, stop_btn=True), sess
|
| 63 |
+
|
| 64 |
+
# Create queue for streaming AI response chunks
|
| 65 |
+
queue = asyncio.Queue()
|
| 66 |
+
|
| 67 |
async def background():
|
| 68 |
"""
|
| 69 |
+
This coroutine handles streaming responses from an AI model asynchronously.
|
| 70 |
+
It processes two types of streamed data separately: 'reasoning' chunks and 'content' chunks.
|
| 71 |
+
The function supports graceful cancellation if a stop event or cancel token is triggered in the session.
|
| 72 |
+
|
| 73 |
+
Reasoning text is accumulated until content streaming starts, after which reasoning is ignored.
|
| 74 |
+
Special tags <think> and </think> are managed to mark reasoning sections for UI display.
|
| 75 |
+
Content chunks are streamed and accumulated separately, with incremental UI updates.
|
| 76 |
+
|
| 77 |
+
When streaming ends, any open reasoning tags are closed properly.
|
| 78 |
+
Finally, the function signals completion by putting None into the queue and returns the full content response.
|
| 79 |
"""
|
| 80 |
+
reasoning = "" # String to accumulate reasoning text chunks
|
| 81 |
+
responses = "" # String to accumulate content text chunks
|
| 82 |
+
content_started = False # Flag to indicate if content streaming has begun
|
| 83 |
+
ignore_reasoning = False # Flag to ignore reasoning after content starts streaming
|
| 84 |
+
think_opened = False # Flag to track if reasoning <think> tag has been sent
|
| 85 |
+
|
| 86 |
+
# Asynchronously iterate over streamed response chunks from the AI model
|
| 87 |
async for typ, chunk in chat_with_model_async(history, inp, model_display, sess, custom_prompt, deep_search):
|
| 88 |
+
# Break the loop if user requested stop or cancellation is flagged
|
| 89 |
if sess.stop_event.is_set() or sess.cancel_token["cancelled"]:
|
| 90 |
break
|
| 91 |
+
|
| 92 |
if typ == "reasoning":
|
| 93 |
+
# Append reasoning chunk unless ignoring reasoning after content started
|
| 94 |
if ignore_reasoning:
|
| 95 |
continue
|
| 96 |
+
# Handle opening <think> tag for reasoning
|
| 97 |
+
if chunk.strip() == "<think>":
|
| 98 |
+
if not think_opened:
|
| 99 |
+
think_opened = True # Mark that reasoning tag has been opened
|
| 100 |
+
continue # Skip sending the tag itself to UI
|
| 101 |
+
if not think_opened:
|
| 102 |
+
# If reasoning tag not yet opened, prepend it and mark as opened
|
| 103 |
+
reasoning += "<think>\n" + chunk
|
| 104 |
+
think_opened = True
|
| 105 |
+
else:
|
| 106 |
+
# Append reasoning chunk normally
|
| 107 |
+
reasoning += chunk
|
| 108 |
+
# Send current reasoning content to queue for UI update (without sending tag again)
|
| 109 |
await queue.put(("reasoning", reasoning))
|
| 110 |
+
|
| 111 |
elif typ == "content":
|
| 112 |
if not content_started:
|
| 113 |
+
# On first content chunk, mark content started and ignore further reasoning
|
| 114 |
content_started = True
|
| 115 |
ignore_reasoning = True
|
| 116 |
+
if think_opened:
|
| 117 |
+
# Close reasoning tag before sending content
|
| 118 |
+
reasoning += "\n</think>\n\n"
|
| 119 |
+
await queue.put(("reasoning", reasoning)) # Update UI with closed reasoning
|
| 120 |
+
else:
|
| 121 |
+
# No reasoning was sent, clear reasoning display in UI
|
| 122 |
+
await queue.put(("reasoning", ""))
|
| 123 |
+
# Start accumulating content and send initial content to UI replacing placeholder
|
| 124 |
responses = chunk
|
| 125 |
+
await queue.put(("replace", responses))
|
|
|
|
| 126 |
else:
|
| 127 |
+
# Append subsequent content chunks and update UI incrementally
|
| 128 |
responses += chunk
|
| 129 |
await queue.put(("append", responses))
|
| 130 |
+
|
| 131 |
+
# If stream ends without content, close reasoning tag if it was opened
|
| 132 |
+
if think_opened and not content_started:
|
| 133 |
+
reasoning += "\n</think>\n\n"
|
| 134 |
+
await queue.put(("reasoning", reasoning))
|
| 135 |
+
|
| 136 |
+
# Signal completion of streaming by putting None into the queue
|
| 137 |
+
await queue.put(None)
|
| 138 |
+
# Return the full accumulated content response
|
| 139 |
+
return responses
|
| 140 |
|
| 141 |
bg_task = asyncio.create_task(background()) # Start background streaming task
|
| 142 |
stop_task = asyncio.create_task(sess.stop_event.wait()) # Task to wait for stop event
|