Spaces:
Running
Running
ai: Enable API for Next-Gen!
Browse filesAnd also coordinate all the necessary arrangements for it.
- README.md +24 -27
- app.py +1 -1
- assets/bin/ai +135 -0
- assets/bin/install.sh +33 -0
- src/core/server.py +97 -241
- src/core/transport/__init__.py +0 -0
- src/core/transport/aiohttp.py +151 -0
- src/core/transport/httpx.py +152 -0
- src/ui/interface.py +8 -3
- src/ui/reasoning.py +0 -75
- src/utils/instruction.py +55 -0
- src/utils/reasoning.py +140 -0
- src/utils/session_mapping.py +39 -28
- src/utils/time.py +49 -0
README.md
CHANGED
@@ -12,28 +12,29 @@ app_port: 7860
|
|
12 |
pinned: true
|
13 |
short_description: Just a Rather Very Intelligent System
|
14 |
models:
|
15 |
-
- hadadrjt/JARVIS
|
16 |
-
|
17 |
-
-
|
18 |
-
- deepseek-ai/DeepSeek-
|
19 |
-
- deepseek-ai/DeepSeek-R1
|
20 |
-
- deepseek-ai/DeepSeek-R1-
|
21 |
-
- deepseek-ai/DeepSeek-R1-Distill-
|
22 |
-
-
|
23 |
-
- google/gemma-3-
|
24 |
-
- google/gemma-3-
|
25 |
-
-
|
26 |
-
- meta-llama/Llama-3.
|
27 |
-
- meta-llama/Llama-3.
|
28 |
-
- meta-llama/Llama-
|
29 |
-
- meta-llama/Llama-4-
|
30 |
-
-
|
31 |
-
- Qwen/Qwen2.5-VL-
|
32 |
-
- Qwen/Qwen2.5-VL-
|
33 |
-
- Qwen/
|
34 |
-
- Qwen/
|
35 |
-
-
|
36 |
-
-
|
|
|
37 |
---
|
38 |
|
39 |
## Credits
|
@@ -44,8 +45,4 @@ Thanks are extended to [SearXNG](https://paulgo.io), [Baidu](https://www.baidu.c
|
|
44 |
|
45 |
The latest version of Deep Search is entirely inspired by the [OpenWebUI](https://openwebui.com/t/cooksleep/infinite_search) tools script.
|
46 |
|
47 |
-
Special appreciation is given to [Hugging Face](https://huggingface.co) for hosting this Space as the primary deployment platform.
|
48 |
-
|
49 |
-
## API
|
50 |
-
|
51 |
-
Efforts are underway to restore API and multi-platform support at the earliest opportunity.
|
|
|
12 |
pinned: true
|
13 |
short_description: Just a Rather Very Intelligent System
|
14 |
models:
|
15 |
+
- hadadrjt/JARVIS
|
16 |
+
# Credits for several models previously used across multiple platforms
|
17 |
+
- agentica-org/DeepCoder-14B-Preview
|
18 |
+
- deepseek-ai/DeepSeek-V3-0324
|
19 |
+
- deepseek-ai/DeepSeek-R1
|
20 |
+
- deepseek-ai/DeepSeek-R1-0528
|
21 |
+
- deepseek-ai/DeepSeek-R1-Distill-Qwen-32B
|
22 |
+
- deepseek-ai/DeepSeek-R1-Distill-Llama-70B
|
23 |
+
- google/gemma-3-1b-it
|
24 |
+
- google/gemma-3-4b-it
|
25 |
+
- google/gemma-3-27b-it
|
26 |
+
- meta-llama/Llama-3.1-8B-Instruct
|
27 |
+
- meta-llama/Llama-3.2-3B-Instruct
|
28 |
+
- meta-llama/Llama-3.3-70B-Instruct
|
29 |
+
- meta-llama/Llama-4-Maverick-17B-128E-Instruct
|
30 |
+
- meta-llama/Llama-4-Scout-17B-16E-Instruct
|
31 |
+
- Qwen/Qwen2.5-VL-3B-Instruct
|
32 |
+
- Qwen/Qwen2.5-VL-32B-Instruct
|
33 |
+
- Qwen/Qwen2.5-VL-72B-Instruct
|
34 |
+
- Qwen/QwQ-32B
|
35 |
+
- Qwen/Qwen3-235B-A22B
|
36 |
+
- mistralai/Devstral-Small-2505
|
37 |
+
- google/gemma-3n-E4B-it-litert-preview
|
38 |
---
|
39 |
|
40 |
## Credits
|
|
|
45 |
|
46 |
The latest version of Deep Search is entirely inspired by the [OpenWebUI](https://openwebui.com/t/cooksleep/infinite_search) tools script.
|
47 |
|
48 |
+
Special appreciation is given to [Hugging Face](https://huggingface.co) for hosting this Space as the primary deployment platform.
|
|
|
|
|
|
|
|
app.py
CHANGED
@@ -16,4 +16,4 @@ if __name__ == "__main__":
|
|
16 |
|
17 |
# Call the 'launch' method on the 'app' object to start the user interface.
|
18 |
# This typically opens the UI window or begins the event loop, making the application interactive.
|
19 |
-
app.queue(default_concurrency_limit=2).launch(
|
|
|
16 |
|
17 |
# Call the 'launch' method on the 'app' object to start the user interface.
|
18 |
# This typically opens the UI window or begins the event loop, making the application interactive.
|
19 |
+
app.queue(default_concurrency_limit=2).launch(share=True, quiet=True, pwa=True)
|
assets/bin/ai
ADDED
@@ -0,0 +1,135 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
#
|
3 |
+
# SPDX-FileCopyrightText: Hadad <[email protected]>
|
4 |
+
# SPDX-License-Identifier: Apache-2.0
|
5 |
+
#
|
6 |
+
|
7 |
+
import sys # Provides access to command-line arguments and system-related functions for input handling
|
8 |
+
import re # Provides regular expression operations used here to parse and extract code blocks from text
|
9 |
+
from gradio_client import Client # Imports Client class to interact programmatically with a Gradio-hosted AI model endpoint
|
10 |
+
from rich.console import Console, Group # Imports Console for rich text output, Group to combine multiple renderables for display
|
11 |
+
from rich.markdown import Markdown # Imports Markdown renderer to format and display markdown text in the terminal
|
12 |
+
from rich.syntax import Syntax # Imports Syntax highlighter to render code blocks with language-specific coloring
|
13 |
+
from rich.live import Live # Imports Live to enable live-updating terminal output for streaming content display
|
14 |
+
|
15 |
+
console = Console() # Creates a Console instance for enhanced terminal output with colors and formatting
|
16 |
+
client = Client("https://hadadrjt-ai.hf.space/") # Initializes a Gradio client connected to the specified AI service URL
|
17 |
+
|
18 |
+
def layout(text):
|
19 |
+
"""
|
20 |
+
Processes the input text to separate markdown content and code blocks, then formats them for terminal display.
|
21 |
+
Code blocks are detected by triple backticks with language specifiers. The function returns a Group object
|
22 |
+
combining Markdown and Syntax renderables for rich output.
|
23 |
+
|
24 |
+
Args:
|
25 |
+
text (str): The input string potentially containing markdown and fenced code blocks.
|
26 |
+
|
27 |
+
Returns:
|
28 |
+
Group: A rich Group object containing formatted markdown and syntax-highlighted code blocks.
|
29 |
+
"""
|
30 |
+
if not isinstance(text, str):
|
31 |
+
# Convert input to string if it is not already, to avoid errors during regex processing
|
32 |
+
text = str(text)
|
33 |
+
|
34 |
+
# Use regex to find all code blocks in the text matching the pattern:
|
35 |
+
# Two newlines, triple backticks, language identifier, two newlines, code content, two newlines, triple backticks, then three newlines
|
36 |
+
# The pattern captures language and code separately for formatting
|
37 |
+
code_blocks = list(re.finditer(r"\n\n``````\n\n\n", text, re.DOTALL))
|
38 |
+
|
39 |
+
segments = [] # List to hold markdown and syntax segments for rendering
|
40 |
+
last_end = 0 # Tracks the end position of the last matched code block to slice text correctly
|
41 |
+
|
42 |
+
for block in code_blocks:
|
43 |
+
# Extract text before the current code block
|
44 |
+
pre = text[last_end:block.start()]
|
45 |
+
if pre.strip():
|
46 |
+
# If pre-block text is not just whitespace, convert it to Markdown renderable
|
47 |
+
segments.append(Markdown(pre.strip()))
|
48 |
+
|
49 |
+
# Extract language and code content from the current code block
|
50 |
+
lang, code = block.group(1) or "text", block.group(2).rstrip()
|
51 |
+
|
52 |
+
# Append a Syntax renderable with the extracted code and language for syntax highlighting
|
53 |
+
segments.append(Syntax(code, lang, theme="monokai", line_numbers=False, word_wrap=True))
|
54 |
+
|
55 |
+
# Update last_end to the end of the current code block for next iteration
|
56 |
+
last_end = block.end()
|
57 |
+
|
58 |
+
# Append any remaining text after the last code block as Markdown
|
59 |
+
tail = text[last_end:]
|
60 |
+
if tail.strip():
|
61 |
+
segments.append(Markdown(tail.strip()))
|
62 |
+
|
63 |
+
# Return a Group combining all markdown and syntax segments for unified rendering
|
64 |
+
return Group(*segments)
|
65 |
+
|
66 |
+
def main():
|
67 |
+
"""
|
68 |
+
Main entry point of the script that handles user input, sends it to the AI model, and streams the response.
|
69 |
+
The function supports command-line input or defaults to a greeting message. It streams the AI's response token-by-token,
|
70 |
+
updating the terminal output live with proper markdown and code formatting.
|
71 |
+
|
72 |
+
Workflow:
|
73 |
+
1. Parse command-line arguments to form the user input message.
|
74 |
+
2. Define parameters for the AI model including model precision, reasoning mode, and feature toggles.
|
75 |
+
3. Submit the request to the remote AI service and receive a streaming response.
|
76 |
+
4. Incrementally update the console output with streamed tokens, formatting markdown and code blocks dynamically.
|
77 |
+
"""
|
78 |
+
|
79 |
+
# Extract command-line arguments excluding the script name
|
80 |
+
args = sys.argv[1:]
|
81 |
+
|
82 |
+
# Join arguments into a single string as user input; default to "Hi!" if no input provided
|
83 |
+
user_input = " ".join(args) if args else "Hi!"
|
84 |
+
|
85 |
+
# Define parameters for the AI model request
|
86 |
+
params = dict(
|
87 |
+
message=user_input, # The input message to send to the AI model
|
88 |
+
model_label="Q8_K_XL", # Specifies the model precision or variant to use
|
89 |
+
thinking=True, # Enables reasoning mode for more thoughtful responses
|
90 |
+
image_gen=False, # Disables image generation as terminal cannot display images
|
91 |
+
audio_gen=False, # Disables audio generation as terminal cannot play audio
|
92 |
+
search_gen=True, # Enables deep search feature for enhanced response accuracy
|
93 |
+
# Type /dp followed by the instructions to search the web
|
94 |
+
api_name="/api" # API endpoint path to use on the Gradio client
|
95 |
+
)
|
96 |
+
|
97 |
+
# Submit the request to the AI model and start receiving a streaming response
|
98 |
+
job = client.submit(**params)
|
99 |
+
|
100 |
+
partial = "" # Stores the accumulated response text received so far
|
101 |
+
|
102 |
+
# Use Live context manager to dynamically update the console output as new tokens arrive
|
103 |
+
with Live(layout(partial), console=console) as live:
|
104 |
+
for chunk in job:
|
105 |
+
# Each chunk can be a list or a single item; extract the 'content' field if present
|
106 |
+
|
107 |
+
if isinstance(chunk, list):
|
108 |
+
# Iterate through list items to find dictionary with 'content' key
|
109 |
+
for item in chunk:
|
110 |
+
if isinstance(item, dict) and 'content' in item:
|
111 |
+
new_response = item['content'] # Extract the new content token
|
112 |
+
break
|
113 |
+
else:
|
114 |
+
# If no content found in list, convert entire chunk to string
|
115 |
+
new_response = str(chunk)
|
116 |
+
else:
|
117 |
+
# If chunk is not a list, convert it directly to string
|
118 |
+
new_response = str(chunk)
|
119 |
+
|
120 |
+
# Determine the new token by removing the already received part from the new response
|
121 |
+
if new_response.startswith(partial):
|
122 |
+
new_token = new_response[len(partial):]
|
123 |
+
else:
|
124 |
+
# If the new response does not start with partial, treat entire response as new token
|
125 |
+
new_token = new_response
|
126 |
+
|
127 |
+
# Update the accumulated partial response with the new token
|
128 |
+
partial = new_response
|
129 |
+
|
130 |
+
# Update the live display in the terminal with the newly formatted content
|
131 |
+
live.update(layout(partial))
|
132 |
+
|
133 |
+
# Entry point check to ensure main() runs only if this script is executed directly
|
134 |
+
if __name__ == "__main__":
|
135 |
+
main() # Call the main function to start the program
|
assets/bin/install.sh
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/sh
|
2 |
+
#
|
3 |
+
# SPDX-FileCopyrightText: Hadad <[email protected]>
|
4 |
+
# SPDX-License-Identifier: Apache-2.0
|
5 |
+
#
|
6 |
+
|
7 |
+
echo "Installing required Python packages..."
|
8 |
+
pip install gradio_client rich --upgrade
|
9 |
+
echo "Installation complete."
|
10 |
+
echo ""
|
11 |
+
echo ""
|
12 |
+
echo "Downloading the J.A.R.V.I.S. script..."
|
13 |
+
wget https://huggingface.co/spaces/hadadrjt/ai/raw/main/assets/bin/ai
|
14 |
+
echo "Download complete."
|
15 |
+
echo ""
|
16 |
+
echo ""
|
17 |
+
echo "Setting executable permission..."
|
18 |
+
chmod a+x ai
|
19 |
+
echo "Permission set."
|
20 |
+
echo ""
|
21 |
+
echo ""
|
22 |
+
echo "Removing installer script..."
|
23 |
+
rm install.sh
|
24 |
+
echo "Done."
|
25 |
+
echo ""
|
26 |
+
echo ""
|
27 |
+
echo "To send a regular message:"
|
28 |
+
echo "./ai Your message here"
|
29 |
+
echo ""
|
30 |
+
echo "To use Deep Search mode:"
|
31 |
+
echo "./ai /dp Your message here"
|
32 |
+
echo ""
|
33 |
+
echo ""
|
src/core/server.py
CHANGED
@@ -3,268 +3,124 @@
|
|
3 |
# SPDX-License-Identifier: Apache-2.0
|
4 |
#
|
5 |
|
6 |
-
import json #
|
7 |
-
import uuid #
|
8 |
-
from typing import List, Dict
|
9 |
-
from
|
10 |
-
from
|
11 |
-
from src.utils.
|
12 |
-
from src.utils.
|
13 |
-
|
14 |
-
from src.
|
15 |
-
import
|
16 |
-
import
|
17 |
-
import
|
18 |
-
|
|
|
|
|
19 |
async def jarvis(
|
20 |
-
session_id: str, # Unique session
|
21 |
-
model: str, # AI model name
|
22 |
-
history: List[Dict[str, str]], # List of previous conversation messages
|
23 |
-
user_message: str, #
|
24 |
-
mode: str, # Mode string
|
25 |
-
files=None, # Optional files
|
26 |
-
temperature: float = 0.6, # Sampling temperature controlling randomness
|
27 |
-
top_k: int = 20, #
|
28 |
-
min_p: float = 0, # Minimum probability threshold for token
|
29 |
-
top_p: float = 0.95, #
|
30 |
-
repetition_penalty: float = 1, #
|
31 |
):
|
32 |
"""
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
and yields incremental parts of the AI-generated response as they arrive. It integrates CSS styling into
|
37 |
-
the reasoning output only if the mode is not '/no_think', preserving the behavior where reasoning is streamed
|
38 |
-
first inside a styled HTML block, followed by the main content streamed normally.
|
39 |
-
|
40 |
-
The implementation uses both httpx (with HTTP/2 support) and aiohttp to ensure compatibility and robustness
|
41 |
-
in streaming responses.
|
42 |
-
|
43 |
-
Args:
|
44 |
-
session_id (str): Identifier for the user session to maintain consistent server assignment.
|
45 |
-
model (str): Name of the AI model to use for generating the response.
|
46 |
-
history (List[Dict[str, str]]): List of previous messages in the conversation.
|
47 |
-
user_message (str): The current message from the user to send to the AI model.
|
48 |
-
mode (str): Contextual instructions to guide the AI model's response style.
|
49 |
-
files (optional): Additional files or attachments to include with the user message.
|
50 |
-
temperature (float): Controls randomness in token generation.
|
51 |
-
top_k (int): Limits token selection to top_k probable tokens.
|
52 |
-
min_p (float): Minimum probability threshold for token selection.
|
53 |
-
top_p (float): Nucleus sampling cumulative probability threshold.
|
54 |
-
repetition_penalty (float): Factor to reduce token repetition.
|
55 |
|
56 |
-
|
57 |
-
|
58 |
-
Reasoning is wrapped in a styled HTML details block and streamed incrementally only if mode is not '/no_think'.
|
59 |
-
After reasoning finishes, the main content is streamed normally.
|
60 |
|
61 |
-
|
62 |
-
|
63 |
-
If the server returns a specific error code indicating it is busy, it retries with another server.
|
64 |
-
If all servers are busy or fail, it yields a message indicating the server is busy.
|
65 |
-
"""
|
66 |
-
tried = set() # Set to track servers already tried to avoid repeated retries
|
67 |
|
68 |
-
|
69 |
-
|
70 |
-
#
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
error = setup["error"] # HTTP error code integer which triggers retry
|
76 |
-
tried.add(server) # Mark this server as tried to prevent retrying immediately
|
77 |
|
78 |
-
#
|
79 |
-
date =
|
80 |
|
81 |
-
#
|
82 |
-
instructions =
|
83 |
|
84 |
-
#
|
85 |
-
messages = history.copy()
|
86 |
|
87 |
-
# Insert system
|
88 |
-
messages.insert(0, {"role": "system", "content": instructions})
|
89 |
|
90 |
-
#
|
91 |
-
msg = {"role": "user", "content": user_message}
|
92 |
-
if files:
|
93 |
-
msg["files"] = files
|
94 |
-
messages.append(msg) # Append user message to the
|
95 |
|
96 |
-
# Prepare HTTP headers
|
97 |
headers = {
|
98 |
-
"Authorization": f"Bearer {token}", # Bearer token for
|
99 |
-
"Content-Type": "application/json", #
|
100 |
-
"X-Forwarded-For": generate_ip(), #
|
101 |
}
|
102 |
|
103 |
-
#
|
104 |
payload = {
|
105 |
-
"model": model,
|
106 |
-
"messages": messages,
|
107 |
-
"stream": True, # Enable streaming response
|
108 |
-
"temperature": temperature,
|
109 |
-
"top_k": top_k,
|
110 |
-
"min_p": min_p,
|
111 |
-
"top_p": top_p,
|
112 |
-
"repetition_penalty": repetition_penalty,
|
113 |
}
|
114 |
|
115 |
-
#
|
116 |
-
reasoning = "" # String accumulator for reasoning text from the AI
|
117 |
-
reasoning_check = None # Flag to detect presence of reasoning in response; None means not checked yet
|
118 |
-
reasoning_done = False # Flag marking reasoning completion
|
119 |
-
content = "" # String accumulator for main content text from the AI
|
120 |
-
|
121 |
try:
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
continue
|
131 |
-
try:
|
132 |
-
# Parse JSON data after "data:" prefix which contains incremental response delta
|
133 |
-
data = json.loads(chunk[5:])
|
134 |
-
# Extract incremental delta message from first choice in response
|
135 |
-
choice = data["choices"][0]["delta"]
|
136 |
-
|
137 |
-
# On first delta received, detect if 'reasoning' field is present and non-empty
|
138 |
-
if reasoning_check is None:
|
139 |
-
# Initialize reasoning_check to empty string if reasoning exists and is non-empty, else None
|
140 |
-
reasoning_check = "" if ("reasoning" in choice and choice["reasoning"]) else None
|
141 |
-
|
142 |
-
# If reasoning is present and mode is not '/no_think' and reasoning not done yet
|
143 |
-
if (
|
144 |
-
reasoning_check == "" # Reasoning detected in response
|
145 |
-
and mode != "/no_think" # Mode allows reasoning output
|
146 |
-
and not reasoning_done # Reasoning phase not finished yet
|
147 |
-
and "reasoning" in choice # Current delta includes reasoning part
|
148 |
-
and choice["reasoning"] # Reasoning content is not empty
|
149 |
-
):
|
150 |
-
reasoning += choice["reasoning"] # Append incremental reasoning text
|
151 |
-
# Yield reasoning wrapped in styled HTML block with details expanded
|
152 |
-
yield styles(reasoning=reasoning, content="", expanded=True)
|
153 |
-
continue # Continue streaming reasoning increments without processing content yet
|
154 |
-
|
155 |
-
# When reasoning ends and content starts, mark reasoning done, yield empty string, then content
|
156 |
-
if (
|
157 |
-
reasoning_check == "" # Reasoning was detected previously
|
158 |
-
and mode != "/no_think" # Mode allows reasoning output
|
159 |
-
and not reasoning_done # Reasoning phase not finished yet
|
160 |
-
and "content" in choice # Current delta includes content part
|
161 |
-
and choice["content"] # Content is not empty
|
162 |
-
):
|
163 |
-
reasoning_done = True # Mark reasoning phase complete
|
164 |
-
yield "" # Yield empty string to signal end of reasoning block to the consumer
|
165 |
-
content += choice["content"] # Start accumulating content text
|
166 |
-
yield content # Yield first part of content to the consumer
|
167 |
-
continue # Continue streaming content increments
|
168 |
-
|
169 |
-
# If no reasoning present or reasoning done, accumulate content and yield incrementally
|
170 |
-
if (
|
171 |
-
(reasoning_check is None or reasoning_done or mode == "/no_think") # No reasoning or reasoning finished or mode disables reasoning
|
172 |
-
and "content" in choice # Current delta includes content part
|
173 |
-
and choice["content"] # Content is not empty
|
174 |
-
):
|
175 |
-
content += choice["content"] # Append incremental content text
|
176 |
-
yield content # Yield updated content string to the consumer
|
177 |
-
except Exception:
|
178 |
-
# Ignore exceptions during JSON parsing or key access and continue streaming
|
179 |
-
continue
|
180 |
-
return # Exit function after successful streaming completion
|
181 |
-
|
182 |
-
except httpx.HTTPStatusError as e:
|
183 |
-
# If server returns specific error code indicating busy, retry with another server
|
184 |
-
if e.response.status_code == error:
|
185 |
-
# Continue to next iteration to try a different server
|
186 |
-
continue
|
187 |
else:
|
188 |
-
#
|
189 |
-
mark(server)
|
190 |
-
except Exception:
|
191 |
-
# For other exceptions (network errors, timeouts), mark server as busy/unavailable
|
192 |
-
mark(server)
|
193 |
-
|
194 |
-
# If httpx fails or server is busy, fallback to aiohttp for robustness and compatibility
|
195 |
-
try:
|
196 |
-
# Create aiohttp client session with no timeout for streaming
|
197 |
-
async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=None)) as session:
|
198 |
-
# Open async streaming POST request to Jarvis server endpoint with headers and JSON payload
|
199 |
-
async with session.post(host, headers=headers, json=payload) as resp:
|
200 |
-
# Raise for status to catch HTTP errors
|
201 |
-
resp.raise_for_status()
|
202 |
-
# Iterate asynchronously over each line of streaming response as it arrives
|
203 |
-
async for line_bytes in resp.content:
|
204 |
-
# Decode bytes to string and strip whitespace
|
205 |
-
line = line_bytes.decode("utf-8").strip()
|
206 |
-
# Skip lines that do not start with "data:" prefix as per SSE format
|
207 |
-
if not line.startswith("data:"):
|
208 |
-
continue
|
209 |
-
try:
|
210 |
-
# Parse JSON data after "data:" prefix which contains incremental response delta
|
211 |
-
data = json.loads(line[5:])
|
212 |
-
# Extract incremental delta message from first choice in response
|
213 |
-
choice = data["choices"][0]["delta"]
|
214 |
-
|
215 |
-
# On first delta received, detect if 'reasoning' field is present and non-empty
|
216 |
-
if reasoning_check is None:
|
217 |
-
reasoning_check = "" if ("reasoning" in choice and choice["reasoning"]) else None
|
218 |
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
and mode != "/no_think"
|
223 |
-
and not reasoning_done
|
224 |
-
and "reasoning" in choice
|
225 |
-
and choice["reasoning"]
|
226 |
-
):
|
227 |
-
reasoning += choice["reasoning"]
|
228 |
-
yield styles(reasoning=reasoning, content="", expanded=True)
|
229 |
-
continue
|
230 |
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
content += choice["content"]
|
242 |
-
yield content
|
243 |
-
continue
|
244 |
-
|
245 |
-
# If no reasoning present or reasoning done, accumulate content and yield incrementally
|
246 |
-
if (
|
247 |
-
(reasoning_check is None or reasoning_done or mode == "/no_think")
|
248 |
-
and "content" in choice
|
249 |
-
and choice["content"]
|
250 |
-
):
|
251 |
-
content += choice["content"]
|
252 |
-
yield content
|
253 |
-
except Exception:
|
254 |
-
# Ignore exceptions during JSON parsing or key access and continue streaming
|
255 |
-
continue
|
256 |
-
return # Exit function after successful streaming completion with aiohttp
|
257 |
-
|
258 |
-
except aiohttp.ClientResponseError as e:
|
259 |
-
# If server returns specific error code indicating busy, retry with another server
|
260 |
-
if e.status == error:
|
261 |
-
continue # Try next available server
|
262 |
else:
|
263 |
-
mark(server) # Mark server as
|
264 |
-
|
265 |
-
|
266 |
-
|
|
|
267 |
|
268 |
-
# If all servers tried and
|
269 |
-
yield "The server is currently busy. Please wait a moment or try again later
|
270 |
-
return # End
|
|
|
3 |
# SPDX-License-Identifier: Apache-2.0
|
4 |
#
|
5 |
|
6 |
+
import json # Import json module to work with JSON objects for request and response handling
|
7 |
+
import uuid # Import uuid module to generate unique identifiers if needed for tracking or sessions
|
8 |
+
from typing import List, Dict # Import type hinting for function parameters to improve code clarity and checking
|
9 |
+
from config import * # Import all configuration variables such as server lists and tokens from config files
|
10 |
+
from src.utils.session_mapping import get_host # Import helper function to map session ID to appropriate server host
|
11 |
+
from src.utils.ip_generator import generate_ip # Import utility function to generate random IP addresses for headers
|
12 |
+
from src.utils.helper import mark # Import function to mark servers as failed for retry or logging purposes
|
13 |
+
import asyncio # Import asyncio module to enable asynchronous programming constructs in the function
|
14 |
+
from src.utils.time import get_time # Import function to get current date and time in required format
|
15 |
+
from src.utils.reasoning import reasoning_tag_open, reasoning_tag_close # Import functions to wrap reasoning text with tags
|
16 |
+
from src.utils.instruction import set_instructions # Import function to generate system instructions based on mode and time
|
17 |
+
from src.core.transport.httpx import httpx_transport # Import primary HTTP transport method using httpx for streaming
|
18 |
+
from src.core.transport.aiohttp import aiohttp_transport # Import fallback HTTP transport method using aiohttp
|
19 |
+
|
20 |
+
# Define the main asynchronous function to communicate with AI server and stream responses
|
21 |
async def jarvis(
|
22 |
+
session_id: str, # Unique identifier for the user session to route requests correctly
|
23 |
+
model: str, # AI model name or identifier to specify which model to use for generation
|
24 |
+
history: List[Dict[str, str]], # List of previous conversation messages to maintain context
|
25 |
+
user_message: str, # The latest message input from the user to send to the AI model
|
26 |
+
mode: str, # Mode string controlling behavior such as enabling or disabling reasoning output
|
27 |
+
files=None, # Optional parameter for any files attached by the user to include in the request
|
28 |
+
temperature: float = 0.6, # Sampling temperature controlling randomness of AI responses
|
29 |
+
top_k: int = 20, # Limits token selection to top-k probable tokens for response generation
|
30 |
+
min_p: float = 0, # Minimum probability threshold for token sampling to filter unlikely tokens
|
31 |
+
top_p: float = 0.95, # Cumulative probability cutoff for nucleus sampling of tokens
|
32 |
+
repetition_penalty: float = 1, # Parameter to penalize repeated tokens to reduce repetition in output
|
33 |
):
|
34 |
"""
|
35 |
+
Stream AI response from multiple configured servers using asynchronous HTTP requests
|
36 |
+
Yields chunks of response that include reasoning and content parts as they arrive
|
37 |
+
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
|
39 |
+
# Initialize a set to keep track of servers that have already been attempted
|
40 |
+
tried = set() # Prevents retrying the same server multiple times to avoid redundant requests
|
|
|
|
|
41 |
|
42 |
+
# Loop until a server successfully returns a response or all servers have been exhausted
|
43 |
+
while len(tried) < len(auth): # Continue trying servers until all configured servers are tried
|
|
|
|
|
|
|
|
|
44 |
|
45 |
+
# Retrieve server configuration details mapped to the current session
|
46 |
+
setup = get_host(session_id) # Get server host, token, and error codes for the session
|
47 |
+
server = setup["jarvis"] # Extract server name identifier for logging and marking
|
48 |
+
host = setup["endpoint"] # Extract server endpoint URL for sending requests
|
49 |
+
token = setup["token"] # Extract authentication token for authorized access
|
50 |
+
error = setup["error"] # Extract HTTP status code that indicates retryable error
|
51 |
+
tried.add(server) # Add current server to tried set to avoid retrying it again
|
|
|
|
|
52 |
|
53 |
+
# Get the current date and time for system instruction
|
54 |
+
date = get_time() # Retrieve current timestamp to include in system instructions
|
55 |
|
56 |
+
# Generate system instructions
|
57 |
+
instructions = set_instructions(mode, date) # Create system instructions guiding AI behavior
|
58 |
|
59 |
+
# Make a shallow copy of the conversation history to avoid mutating original list
|
60 |
+
messages = history.copy() # Duplicate previous messages to safely modify for this request
|
61 |
|
62 |
+
# Insert the system instruction message at the beginning of the message list
|
63 |
+
messages.insert(0, {"role": "system", "content": instructions}) # Add system instructions as first message
|
64 |
|
65 |
+
# Construct the user message dictionary with role and content
|
66 |
+
msg = {"role": "user", "content": user_message} # Prepare user's latest input for the request
|
67 |
+
if files: # Check if any files are attached to include in the message payload
|
68 |
+
msg["files"] = files # Attach files to the user message to send alongside text input
|
69 |
+
messages.append(msg) # Append the user message (with optional files) to the message history
|
70 |
|
71 |
+
# Prepare HTTP headers including authorization and content type for the request
|
72 |
headers = {
|
73 |
+
"Authorization": f"Bearer {token}", # Bearer token for authenticating with the AI server
|
74 |
+
"Content-Type": "application/json", # Specify that the request body is JSON formatted
|
75 |
+
"X-Forwarded-For": generate_ip(), # Randomly generated IP address to simulate client origin
|
76 |
}
|
77 |
|
78 |
+
# Build the JSON payload containing model, messages, and generation parameters
|
79 |
payload = {
|
80 |
+
"model": model, # Specify which AI model to use for generating responses
|
81 |
+
"messages": messages, # Provide the full message history including system and user inputs
|
82 |
+
"stream": True, # Enable streaming mode to receive partial response chunks progressively
|
83 |
+
"temperature": temperature, # Control randomness in token sampling for response diversity
|
84 |
+
"top_k": top_k, # Restrict token selection to top-k most probable tokens
|
85 |
+
"min_p": min_p, # Set minimum probability threshold to filter out unlikely tokens
|
86 |
+
"top_p": top_p, # Use nucleus sampling with cumulative probability cutoff
|
87 |
+
"repetition_penalty": repetition_penalty, # Penalize repeated tokens to reduce redundancy
|
88 |
}
|
89 |
|
90 |
+
# Attempt to stream the response using the primary HTTP transport method (httpx)
|
|
|
|
|
|
|
|
|
|
|
91 |
try:
|
92 |
+
async for chunk in httpx_transport(host, headers, payload, mode): # Stream response chunks asynchronously
|
93 |
+
yield chunk # Yield each chunk to the caller as it arrives for real-time processing
|
94 |
+
return # Exit the function if streaming completes successfully without errors
|
95 |
+
|
96 |
+
# Handle HTTP errors with status codes that indicate retryable failures
|
97 |
+
except httpx.HTTPStatusError as e: # Catch HTTP errors specific to httpx transport
|
98 |
+
if e.response.status_code == error: # If error code matches retryable error, try next server
|
99 |
+
continue # Skip current server and proceed to next iteration to retry
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
else:
|
101 |
+
mark(server) # Mark the current server as failed for non-retryable errors
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
|
103 |
+
# Handle any other unexpected exceptions during httpx transport
|
104 |
+
except Exception: # Catch all other exceptions to prevent crashing
|
105 |
+
mark(server) # Mark server as failed due to unexpected error
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
|
107 |
+
# If the primary transport fails, attempt to stream response using fallback transport (aiohttp)
|
108 |
+
try:
|
109 |
+
async for chunk in aiohttp_transport(host, headers, payload, mode): # Use fallback streaming method
|
110 |
+
yield chunk # Yield streamed chunks to caller as they arrive
|
111 |
+
return # Exit if fallback transport succeeds
|
112 |
+
|
113 |
+
# Handle aiohttp-specific response errors with retryable status codes
|
114 |
+
except aiohttp.ClientResponseError as e: # Catch HTTP response errors from aiohttp transport
|
115 |
+
if e.status == error: # Retry on matching error code by trying next server
|
116 |
+
continue # Continue to next server attempt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
117 |
else:
|
118 |
+
mark(server) # Mark server as failed for non-retryable errors
|
119 |
+
|
120 |
+
# Handle any other exceptions during aiohttp transport
|
121 |
+
except Exception: # Catch generic exceptions to avoid crashing
|
122 |
+
mark(server) # Mark fallback server as failed
|
123 |
|
124 |
+
# If all servers have been tried and failed, yield a user-friendly error message
|
125 |
+
yield "The server is currently busy. Please wait a moment or try again later" # Inform user of service unavailability
|
126 |
+
return # End the function after exhausting all servers
|
src/core/transport/__init__.py
ADDED
File without changes
|
src/core/transport/aiohttp.py
ADDED
@@ -0,0 +1,151 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# SPDX-FileCopyrightText: Hadad <[email protected]>
|
3 |
+
# SPDX-License-Identifier: Apache-2.0
|
4 |
+
#
|
5 |
+
|
6 |
+
import json # Import json module to parse JSON formatted strings from server response lines
|
7 |
+
import aiohttp # Import aiohttp library to perform asynchronous HTTP requests and handle streaming responses
|
8 |
+
# Import helper functions to add opening and closing reasoning tags around reasoning text
|
9 |
+
from src.utils.reasoning import reasoning_tag_open, reasoning_tag_close # Functions to wrap reasoning with tags
|
10 |
+
|
11 |
+
# Define an asynchronous function to send a POST request and stream the response from the server
|
12 |
+
async def aiohttp_transport(host: str, headers: dict, payload: dict, mode: str):
|
13 |
+
"""
|
14 |
+
This asynchronous function establishes a streaming HTTP POST connection to the specified server endpoint
|
15 |
+
using the aiohttp library. It sends a JSON payload containing the request parameters and headers, and
|
16 |
+
processes the server's streamed response line by line in real time.
|
17 |
+
|
18 |
+
The function is designed to handle responses that include two types of data chunks: reasoning text and
|
19 |
+
content text. Reasoning text represents intermediate thought processes or explanations generated by the AI,
|
20 |
+
while content text represents the final output or answer.
|
21 |
+
|
22 |
+
The function maintains several internal state variables to manage the streaming process:
|
23 |
+
|
24 |
+
- 'reasoning' accumulates the reasoning text segments as they arrive incrementally from the server.
|
25 |
+
- 'reasoning_tag' is a boolean flag that ensures the opening reasoning tag (<think>) is inserted only once.
|
26 |
+
- 'reasoning_check' is used to detect if the reasoning field is present in the initial streamed data chunk,
|
27 |
+
which determines whether reasoning processing should occur.
|
28 |
+
- 'reasoning_done' indicates when the reasoning phase has completed and the function should switch to
|
29 |
+
accumulating content text.
|
30 |
+
- 'content' accumulates the main content text after reasoning finishes.
|
31 |
+
|
32 |
+
The function reads the response stream asynchronously, decoding each line from bytes to UTF-8 strings,
|
33 |
+
and filters out any lines that do not start with the expected "data:" prefix. For valid data lines, it
|
34 |
+
parses the JSON payload to extract incremental updates contained within the 'delta' field of the first
|
35 |
+
choice in the response.
|
36 |
+
|
37 |
+
Upon detecting reasoning text in the delta, and if the current mode allows reasoning output (i.e., mode is
|
38 |
+
not "/no_think"), the function inserts an opening <think> tag once and appends subsequent reasoning chunks,
|
39 |
+
carefully removing any duplicate tags to maintain clean formatting. It yields these reasoning segments
|
40 |
+
progressively to the caller, enabling real-time display of the AI's intermediate thoughts.
|
41 |
+
|
42 |
+
When the response transitions from reasoning to content (indicated by the presence of 'content' in the delta),
|
43 |
+
the function closes the reasoning block with a closing </think> tag if it was opened, yields the final reasoning
|
44 |
+
block, and then begins accumulating and yielding content chunks. An empty string is yielded as a separator
|
45 |
+
between reasoning and content for clarity.
|
46 |
+
|
47 |
+
If reasoning is absent, completed, or disabled by mode, the function directly accumulates and yields content
|
48 |
+
chunks as they arrive.
|
49 |
+
|
50 |
+
The function includes robust error handling to gracefully skip over any malformed JSON chunks or transient
|
51 |
+
connection issues without interrupting the streaming process. This ensures continuous and reliable streaming
|
52 |
+
of AI responses even in the face of occasional data irregularities.
|
53 |
+
|
54 |
+
Overall, this function provides a comprehensive and efficient mechanism to stream, parse, and yield AI-generated
|
55 |
+
reasoning and content in real time, supporting interactive and dynamic user experiences.
|
56 |
+
"""
|
57 |
+
|
58 |
+
# Initialize an empty string to accumulate streamed reasoning text segments from the response
|
59 |
+
reasoning = "" # This will hold the reasoning text as it is received incrementally
|
60 |
+
|
61 |
+
# Boolean flag to track if the opening <think> tag has been inserted to avoid duplicates
|
62 |
+
reasoning_tag = False # Ensures the reasoning opening tag is added only once
|
63 |
+
|
64 |
+
# Variable to check presence of reasoning field in the first chunk of streamed data
|
65 |
+
reasoning_check = None # Used to determine if reasoning should be processed for this response
|
66 |
+
|
67 |
+
# Flag to indicate that reasoning section has finished and content streaming should start
|
68 |
+
reasoning_done = False # Marks when reasoning is complete and content output begins
|
69 |
+
|
70 |
+
# Initialize an empty string to accumulate the main content text from the response
|
71 |
+
content = "" # Will hold the actual content output after reasoning is finished
|
72 |
+
|
73 |
+
# Create an aiohttp client session with no timeout to allow indefinite streaming
|
74 |
+
async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=None)) as session:
|
75 |
+
# Send a POST request to the given host with specified headers and JSON payload
|
76 |
+
async with session.post(host, headers=headers, json=payload) as resp:
|
77 |
+
resp.raise_for_status() # Raise an exception if HTTP response status is not successful (2xx)
|
78 |
+
|
79 |
+
# Iterate asynchronously over each line of bytes in the streamed response content
|
80 |
+
async for line_bytes in resp.content:
|
81 |
+
line = line_bytes.decode("utf-8").strip() # Decode bytes to UTF-8 string and strip whitespace
|
82 |
+
|
83 |
+
# Skip processing for lines that do not start with the expected "data:" prefix
|
84 |
+
if not line.startswith("data:"):
|
85 |
+
continue # Ignore lines without data prefix and continue to next streamed line
|
86 |
+
|
87 |
+
try:
|
88 |
+
# Parse the JSON object from the line after removing the "data:" prefix
|
89 |
+
data = json.loads(line[5:]) # Convert JSON string to Python dictionary
|
90 |
+
|
91 |
+
# Extract the 'delta' dictionary which contains incremental update fields
|
92 |
+
choice = data["choices"][0]["delta"] # Access the partial update from the streamed response
|
93 |
+
|
94 |
+
# Perform a one-time check on the first chunk to detect if reasoning field exists and is non-empty
|
95 |
+
if reasoning_check is None: # Only check once on the initial chunk received
|
96 |
+
# Set reasoning_check to empty string if reasoning key exists and has content, else None
|
97 |
+
reasoning_check = "" if ("reasoning" in choice and choice["reasoning"]) else None
|
98 |
+
|
99 |
+
# If reasoning is present, mode allows thinking, reasoning not done, and reasoning text exists
|
100 |
+
if (
|
101 |
+
reasoning_check == "" # Reasoning field detected in first chunk
|
102 |
+
and mode != "/no_think" # Mode does not disable reasoning output
|
103 |
+
and not reasoning_done # Reasoning section is still in progress
|
104 |
+
and "reasoning" in choice # Current chunk contains reasoning text
|
105 |
+
and choice["reasoning"] # Reasoning text is non-empty
|
106 |
+
):
|
107 |
+
# Insert opening reasoning tag once and append the first reasoning chunk
|
108 |
+
if not reasoning_tag: # Only add opening tag once at the start of reasoning
|
109 |
+
reasoning_tag = True # Mark that opening tag has been inserted
|
110 |
+
reasoning = reasoning_tag_open(reasoning) # Add opening <think> tag to reasoning string
|
111 |
+
reasoning += choice["reasoning"] # Append initial reasoning text chunk
|
112 |
+
else:
|
113 |
+
# Remove any duplicate opening tags and append subsequent reasoning chunks
|
114 |
+
reasoning_content = choice["reasoning"].replace("<think>", "") # Clean redundant tags
|
115 |
+
reasoning += reasoning_content # Append next reasoning segment to accumulated text
|
116 |
+
|
117 |
+
yield reasoning # Yield the intermediate reasoning text chunk to the caller
|
118 |
+
continue # Continue to next streamed line without further processing
|
119 |
+
|
120 |
+
# If reasoning is done and content starts arriving, finalize reasoning output
|
121 |
+
if (
|
122 |
+
reasoning_check == "" # Reasoning was detected initially
|
123 |
+
and mode != "/no_think" # Mode allows reasoning output
|
124 |
+
and not reasoning_done # Reasoning not yet marked as done
|
125 |
+
and "content" in choice # Current chunk contains content field
|
126 |
+
and choice["content"] # Content text is non-empty
|
127 |
+
):
|
128 |
+
reasoning_done = True # Mark reasoning section as complete
|
129 |
+
|
130 |
+
# If reasoning tag was opened, close it properly before yielding final reasoning block
|
131 |
+
if reasoning_tag: # Only close tag if it was previously opened
|
132 |
+
reasoning = reasoning_tag_close(reasoning) # Append closing </think> tag
|
133 |
+
yield reasoning # Yield the complete reasoning text block
|
134 |
+
|
135 |
+
yield "" # Yield an empty string as a separator between reasoning and content
|
136 |
+
content += choice["content"] # Start accumulating content text from this chunk
|
137 |
+
yield content # Yield the first chunk of content text to the caller
|
138 |
+
continue # Proceed to next line in the stream
|
139 |
+
|
140 |
+
# Handle cases where reasoning is absent, finished, or mode disables reasoning, but content is present
|
141 |
+
if (
|
142 |
+
(reasoning_check is None or reasoning_done or mode == "/no_think") # No reasoning or reasoning done or disabled mode
|
143 |
+
and "content" in choice # Current chunk contains content field
|
144 |
+
and choice["content"] # Content text is non-empty
|
145 |
+
):
|
146 |
+
content += choice["content"] # Append the content chunk to accumulated content string
|
147 |
+
yield content # Yield the updated content string so far
|
148 |
+
|
149 |
+
# Catch any exceptions from JSON parsing errors or connection issues to prevent stream break
|
150 |
+
except Exception:
|
151 |
+
continue # Ignore malformed chunks or transient errors and continue processing next lines
|
src/core/transport/httpx.py
ADDED
@@ -0,0 +1,152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# SPDX-FileCopyrightText: Hadad <[email protected]>
|
3 |
+
# SPDX-License-Identifier: Apache-2.0
|
4 |
+
#
|
5 |
+
|
6 |
+
import json # Import json module to decode JSON formatted strings from server responses
|
7 |
+
import httpx # Import httpx library to perform asynchronous HTTP requests with HTTP/2 support
|
8 |
+
# Import functions to add opening and closing tags around reasoning text for proper formatting
|
9 |
+
from src.utils.reasoning import reasoning_tag_open, reasoning_tag_close # Functions to wrap reasoning with tags
|
10 |
+
|
11 |
+
# Define asynchronous function to send a POST request and stream the response from the server
|
12 |
+
async def httpx_transport(host: str, headers: dict, payload: dict, mode: str):
|
13 |
+
"""
|
14 |
+
This asynchronous function establishes a streaming POST request to the specified server endpoint using the httpx library with HTTP/2 support.
|
15 |
+
It is designed to handle incremental server responses that include both reasoning and content parts, which are streamed as separate chunks.
|
16 |
+
The function processes each line of the streamed response, parsing JSON data prefixed by "data:", and yields partial outputs to the caller in real-time.
|
17 |
+
|
18 |
+
The function maintains internal state to manage the reasoning text separately from the main content. It detects whether the response includes a reasoning section
|
19 |
+
by inspecting the first chunk containing the 'reasoning' field. If reasoning is present and the mode allows it (i.e., mode is not "/no_think"), it wraps the reasoning text
|
20 |
+
within custom tags (<think> ... </think>) to clearly demarcate this part of the output. The opening tag is inserted once at the start of reasoning, and subsequent chunks
|
21 |
+
append reasoning text after cleansing redundant tags.
|
22 |
+
|
23 |
+
Once the reasoning section is complete and the content part begins, the function closes the reasoning tags properly before yielding the final reasoning block. It then yields
|
24 |
+
an empty string as a separator, followed by the streamed content chunks. If reasoning is absent or disabled, the function directly accumulates and yields content chunks.
|
25 |
+
|
26 |
+
The function is robust against malformed data or transient connection issues, gracefully skipping any problematic chunks without interrupting the stream. It reads each line
|
27 |
+
as a UTF-8 decoded string, strips whitespace, and only processes lines starting with the "data:" prefix to ensure valid data handling.
|
28 |
+
|
29 |
+
Parameters:
|
30 |
+
- host (str): The URL of the server endpoint to which the POST request is sent.
|
31 |
+
- headers (dict): HTTP headers to include in the request, such as authorization and content type.
|
32 |
+
- payload (dict): The JSON payload containing the request data, including model, messages, and generation parameters.
|
33 |
+
- mode (str): A string controlling behavior such as enabling or disabling reasoning output (e.g., "/no_think" disables reasoning).
|
34 |
+
|
35 |
+
Yields:
|
36 |
+
- str: Partial chunks of reasoning or content as they are received from the server, allowing real-time streaming output.
|
37 |
+
|
38 |
+
Workflow:
|
39 |
+
1. Initializes empty strings and flags to track reasoning text, content text, and reasoning state.
|
40 |
+
2. Opens an asynchronous HTTP client session with HTTP/2 enabled and no timeout to allow indefinite streaming.
|
41 |
+
3. Sends a POST request to the specified host with provided headers and JSON payload, initiating a streaming response.
|
42 |
+
4. Iterates asynchronously over each line of the streamed response.
|
43 |
+
- Skips any lines that do not start with the "data:" prefix to filter valid data chunks.
|
44 |
+
- Parses the JSON content after the "data:" prefix into a Python dictionary.
|
45 |
+
- Extracts the 'delta' field from the first choice in the response, which contains incremental updates.
|
46 |
+
5. On the first chunk, checks if the 'reasoning' field is present and non-empty to determine if reasoning should be processed.
|
47 |
+
6. If reasoning is present and allowed by mode, and reasoning is not yet complete:
|
48 |
+
- Inserts the opening <think> tag once.
|
49 |
+
- Appends reasoning text chunks, removing redundant opening tags if necessary.
|
50 |
+
- Yields the accumulated reasoning text for real-time consumption.
|
51 |
+
7. When reasoning ends and content begins:
|
52 |
+
- Marks reasoning as done.
|
53 |
+
- Closes the reasoning tag properly if it was opened.
|
54 |
+
- Yields the finalized reasoning block.
|
55 |
+
- Yields an empty string as a separator.
|
56 |
+
- Starts accumulating content text and yields the first content chunk.
|
57 |
+
8. If reasoning is absent, finished, or disabled, accumulates and yields content chunks directly.
|
58 |
+
9. Handles any exceptions during parsing or connection by skipping malformed chunks, ensuring the stream continues uninterrupted.
|
59 |
+
|
60 |
+
This design allows clients to receive partial reasoning and content outputs as they are generated by the server, enabling interactive and responsive user experiences.
|
61 |
+
"""
|
62 |
+
|
63 |
+
# Initialize an empty string to accumulate streamed reasoning text from the response
|
64 |
+
reasoning = "" # Holds reasoning text segments as they are received incrementally
|
65 |
+
|
66 |
+
# Boolean flag to track whether the opening <think> tag has been inserted to avoid duplicates
|
67 |
+
reasoning_tag = False # Ensures the reasoning opening tag is added only once during streaming
|
68 |
+
|
69 |
+
# Variable to check presence of reasoning field in the first chunk of streamed data
|
70 |
+
reasoning_check = None # Used to determine if reasoning should be processed for this response
|
71 |
+
|
72 |
+
# Flag to indicate that reasoning section has finished and content streaming should start
|
73 |
+
reasoning_done = False # Marks when reasoning is complete and content output begins
|
74 |
+
|
75 |
+
# Initialize an empty string to accumulate the main content text from the response
|
76 |
+
content = "" # Will hold the actual content output after reasoning is finished
|
77 |
+
|
78 |
+
# Create an asynchronous HTTP client session with HTTP/2 enabled and no timeout to allow indefinite streaming
|
79 |
+
async with httpx.AsyncClient(timeout=None, http2=True) as client: # Establish persistent HTTP/2 connection
|
80 |
+
# Send a POST request to the given host with specified headers and JSON payload, and start streaming response
|
81 |
+
async with client.stream("POST", host, headers=headers, json=payload) as response: # Initiate streaming POST request
|
82 |
+
# Iterate asynchronously over each line of text in the streamed response content
|
83 |
+
async for chunk in response.aiter_lines(): # Read response line by line as it arrives from the server
|
84 |
+
# Skip processing for lines that do not start with the expected "data:" prefix
|
85 |
+
if not chunk.strip().startswith("data:"): # Only process lines that contain data payloads
|
86 |
+
continue # Ignore non-data lines and continue to next streamed line
|
87 |
+
|
88 |
+
try:
|
89 |
+
# Parse the JSON object from the line after removing the "data:" prefix
|
90 |
+
data = json.loads(chunk[5:]) # Convert JSON string to Python dictionary
|
91 |
+
|
92 |
+
# Extract the 'delta' dictionary which contains incremental update fields
|
93 |
+
choice = data["choices"][0]["delta"] # Access the partial update from the streamed response
|
94 |
+
|
95 |
+
# Perform a one-time check on the first chunk to detect if reasoning field exists and is non-empty
|
96 |
+
if reasoning_check is None: # Only check once on the initial chunk received
|
97 |
+
# Set reasoning_check to empty string if reasoning key exists and has content, else None
|
98 |
+
reasoning_check = "" if ("reasoning" in choice and choice["reasoning"]) else None
|
99 |
+
|
100 |
+
# If reasoning is present, mode allows thinking, reasoning not done, and reasoning text exists
|
101 |
+
if (
|
102 |
+
reasoning_check == "" # Reasoning field detected in first chunk
|
103 |
+
and mode != "/no_think" # Mode does not disable reasoning output
|
104 |
+
and not reasoning_done # Reasoning section is still in progress
|
105 |
+
and "reasoning" in choice # Current chunk contains reasoning text
|
106 |
+
and choice["reasoning"] # Reasoning text is non-empty
|
107 |
+
):
|
108 |
+
# Insert opening reasoning tag once and append the first reasoning chunk
|
109 |
+
if not reasoning_tag: # Only add opening tag once at the start of reasoning
|
110 |
+
reasoning_tag = True # Mark that opening tag has been inserted
|
111 |
+
reasoning = reasoning_tag_open(reasoning) # Add opening <think> tag to reasoning string
|
112 |
+
reasoning += choice["reasoning"] # Append initial reasoning text chunk
|
113 |
+
else:
|
114 |
+
# Remove any duplicate opening tags and append subsequent reasoning chunks
|
115 |
+
reasoning_content = choice["reasoning"].replace("<think>", "") # Clean redundant tags
|
116 |
+
reasoning += reasoning_content # Append next reasoning segment to accumulated text
|
117 |
+
|
118 |
+
yield reasoning # Yield the intermediate reasoning text chunk to the caller
|
119 |
+
continue # Continue to next streamed line without further processing
|
120 |
+
|
121 |
+
# If reasoning is done and content starts arriving, finalize reasoning output
|
122 |
+
if (
|
123 |
+
reasoning_check == "" # Reasoning was detected initially
|
124 |
+
and mode != "/no_think" # Mode allows reasoning output
|
125 |
+
and not reasoning_done # Reasoning not yet marked as done
|
126 |
+
and "content" in choice # Current chunk contains content field
|
127 |
+
and choice["content"] # Content text is non-empty
|
128 |
+
):
|
129 |
+
reasoning_done = True # Mark reasoning section as complete
|
130 |
+
|
131 |
+
# If reasoning tag was opened, close it properly before yielding final reasoning block
|
132 |
+
if reasoning_tag: # Only close tag if it was previously opened
|
133 |
+
reasoning = reasoning_tag_close(reasoning) # Append closing </think> tag
|
134 |
+
yield reasoning # Yield the complete reasoning text block
|
135 |
+
|
136 |
+
yield "" # Yield an empty string as a separator between reasoning and content
|
137 |
+
content += choice["content"] # Start accumulating content text from this chunk
|
138 |
+
yield content # Yield the first chunk of content text to the caller
|
139 |
+
continue # Proceed to next line in the stream
|
140 |
+
|
141 |
+
# Handle cases where reasoning is absent, finished, or mode disables reasoning, but content is present
|
142 |
+
if (
|
143 |
+
(reasoning_check is None or reasoning_done or mode == "/no_think") # No reasoning or reasoning done or disabled mode
|
144 |
+
and "content" in choice # Current chunk contains content field
|
145 |
+
and choice["content"] # Content text is non-empty
|
146 |
+
):
|
147 |
+
content += choice["content"] # Append the content chunk to accumulated content string
|
148 |
+
yield content # Yield the updated content string so far
|
149 |
+
|
150 |
+
# Catch any exceptions from JSON parsing errors or connection issues to prevent stream break
|
151 |
+
except Exception: # Gracefully handle any error encountered during streaming or parsing
|
152 |
+
continue # Ignore malformed chunks or transient errors and continue processing next lines
|
src/ui/interface.py
CHANGED
@@ -102,7 +102,8 @@ def ui():
|
|
102 |
reasoning.change(
|
103 |
fn=update_parameters, # Function to call on checkbox state change
|
104 |
inputs=[reasoning], # Input is the reasoning checkbox's current value
|
105 |
-
outputs=[temperature, top_k, min_p, top_p, repetition_penalty] # Update these sliders with new values
|
|
|
106 |
)
|
107 |
|
108 |
# Initialize the parameter sliders with values corresponding to the default reasoning checkbox state
|
@@ -164,6 +165,8 @@ def ui():
|
|
164 |
["/image Create a cartoon-style image of a man."],
|
165 |
["What day is it today, what's the date, and what time is it?"],
|
166 |
['/audio Say "I am J.A.R.V.I.S.".'],
|
|
|
|
|
167 |
["Please generate a highly complex code snippet on any topic."],
|
168 |
["Explain about quantum computers."]
|
169 |
],
|
@@ -171,13 +174,15 @@ def ui():
|
|
171 |
chatbot=gr.Chatbot(
|
172 |
label="J.A.R.V.I.S.", # Title label displayed above the chat window
|
173 |
show_copy_button=True, # Show a button allowing users to copy chat messages
|
174 |
-
scale=1 # Scale factor for the chatbot UI size
|
|
|
175 |
),
|
176 |
multimodal=False, # Disable support for multimodal inputs such as images or audio files
|
177 |
fill_height=True, # Duplicate from Blocks to Chat Interface
|
178 |
fill_width=True, # Duplicate from Blocks to Chat Interface
|
179 |
head=meta_tags, # Duplicate from Blocks to Chat Interface
|
180 |
-
show_progress="full" # Progress animation
|
|
|
181 |
)
|
182 |
# Return the complete Gradio app object for launching or embedding
|
183 |
return app
|
|
|
102 |
reasoning.change(
|
103 |
fn=update_parameters, # Function to call on checkbox state change
|
104 |
inputs=[reasoning], # Input is the reasoning checkbox's current value
|
105 |
+
outputs=[temperature, top_k, min_p, top_p, repetition_penalty], # Update these sliders with new values
|
106 |
+
api_name=False # Disable API
|
107 |
)
|
108 |
|
109 |
# Initialize the parameter sliders with values corresponding to the default reasoning checkbox state
|
|
|
165 |
["/image Create a cartoon-style image of a man."],
|
166 |
["What day is it today, what's the date, and what time is it?"],
|
167 |
['/audio Say "I am J.A.R.V.I.S.".'],
|
168 |
+
["How can I run you in the terminal without having to download the model?"],
|
169 |
+
["Do you have an OpenAI-compatible API for your model?"],
|
170 |
["Please generate a highly complex code snippet on any topic."],
|
171 |
["Explain about quantum computers."]
|
172 |
],
|
|
|
174 |
chatbot=gr.Chatbot(
|
175 |
label="J.A.R.V.I.S.", # Title label displayed above the chat window
|
176 |
show_copy_button=True, # Show a button allowing users to copy chat messages
|
177 |
+
scale=1, # Scale factor for the chatbot UI size
|
178 |
+
allow_tags=["think"] # Reasoning tag
|
179 |
),
|
180 |
multimodal=False, # Disable support for multimodal inputs such as images or audio files
|
181 |
fill_height=True, # Duplicate from Blocks to Chat Interface
|
182 |
fill_width=True, # Duplicate from Blocks to Chat Interface
|
183 |
head=meta_tags, # Duplicate from Blocks to Chat Interface
|
184 |
+
show_progress="full", # Progress animation
|
185 |
+
api_name="api" # API endpoint
|
186 |
)
|
187 |
# Return the complete Gradio app object for launching or embedding
|
188 |
return app
|
src/ui/reasoning.py
DELETED
@@ -1,75 +0,0 @@
|
|
1 |
-
#
|
2 |
-
# SPDX-FileCopyrightText: Hadad <[email protected]>
|
3 |
-
# SPDX-License-Identifier: Apache-2.0
|
4 |
-
#
|
5 |
-
|
6 |
-
def styles(reasoning: str, content: str, expanded: bool = False) -> str:
|
7 |
-
"""
|
8 |
-
Generate a clean, interactive HTML <details> block that displays reasoning text inside a collapsible container
|
9 |
-
with subtle styling and enhanced user experience, without applying any custom colors or shadows.
|
10 |
-
|
11 |
-
This function creates a collapsible section using HTML and inline CSS that focuses on simplicity and readability.
|
12 |
-
It avoids setting any custom text or background colors and does not include any box shadow effects,
|
13 |
-
allowing the block to inherit colors and styles from its surrounding environment.
|
14 |
-
The summary header includes a brain emoji represented by its HTML numeric character reference to symbolize reasoning.
|
15 |
-
The collapsible block can be rendered initially expanded or collapsed based on the 'expanded' parameter.
|
16 |
-
The 'content' parameter is unused but kept for compatibility with similar function signatures.
|
17 |
-
|
18 |
-
Args:
|
19 |
-
reasoning (str): The explanation or reasoning text to be displayed inside the collapsible block.
|
20 |
-
This text is wrapped in a styled <div> for clear presentation.
|
21 |
-
content (str): An unused parameter retained for compatibility with other functions sharing this signature.
|
22 |
-
expanded (bool): If True, the collapsible block is initially open; if False, it starts closed.
|
23 |
-
|
24 |
-
Returns:
|
25 |
-
str: A complete HTML snippet string containing a <details> element with inline CSS that styles it as
|
26 |
-
a simple, user-friendly collapsible container. The styling includes padding, rounded corners,
|
27 |
-
smooth transitions on the summary header's text color, and readable font sizing without any color overrides or shadows.
|
28 |
-
"""
|
29 |
-
# Determine whether to include the 'open' attribute in the <details> tag.
|
30 |
-
# If 'expanded' is True, the block will be rendered open by default in the browser.
|
31 |
-
open_attr = "open" if expanded else ""
|
32 |
-
|
33 |
-
# Define the brain emoji using its HTML numeric character reference to ensure consistent display
|
34 |
-
# across different browsers and platforms, avoiding potential encoding issues.
|
35 |
-
emoji = "🧠" # Unicode code point U+1F9E0 representing the 🧠 emoji
|
36 |
-
|
37 |
-
# Construct and return the full HTML string for the collapsible block.
|
38 |
-
# The <details> element acts as a toggleable container with padding and rounded corners for a modern look.
|
39 |
-
# The inline styles avoid setting explicit colors or shadows, allowing the block to inherit styles from its context.
|
40 |
-
# The <summary> element serves as the clickable header, featuring the brain emoji and the label "Reasoning".
|
41 |
-
# It includes styling for font weight, size, cursor, and smooth color transitions on hover.
|
42 |
-
# The hover effect is implemented using inline JavaScript event handlers that maintain the inherited color,
|
43 |
-
# ensuring no color changes occur but allowing for potential future customization.
|
44 |
-
# The reasoning text is wrapped inside a <div> with spacing and a subtle top border to visually separate it from the summary.
|
45 |
-
# Typography settings improve readability with increased line height and slight letter spacing.
|
46 |
-
# The 'content' parameter is intentionally unused but present to maintain compatibility with similar function signatures.
|
47 |
-
return f"""
|
48 |
-
<details {open_attr} style="
|
49 |
-
padding: 16px; /* Inner spacing for comfortable content layout */
|
50 |
-
border-radius: 12px; /* Rounded corners for a smooth, friendly appearance */
|
51 |
-
margin: 12px 0; /* Vertical spacing to separate from adjacent elements */
|
52 |
-
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; /* Modern, readable font stack */
|
53 |
-
transition: box-shadow 0.3s ease-in-out; /* Transition effect retained though no shadow is applied */
|
54 |
-
">
|
55 |
-
<summary style="
|
56 |
-
font-weight: 700; /* Bold text to highlight the summary header */
|
57 |
-
font-size: 14px !important; /* Slightly larger font size for emphasis */
|
58 |
-
cursor: pointer; /* Cursor changes to pointer to indicate interactivity */
|
59 |
-
user-select: none; /* Prevents text selection on click for cleaner UX */
|
60 |
-
transition: color 0.25s ease-in-out; /* Smooth transition for color changes on hover */
|
61 |
-
" onmouseover="this.style.color='inherit';" onmouseout="this.style.color='inherit';">
|
62 |
-
{emoji} Reasoning
|
63 |
-
</summary>
|
64 |
-
<div style="
|
65 |
-
margin-top: 12px; /* Space separating the summary from the content */
|
66 |
-
padding-top: 8px; /* Additional padding for comfortable reading */
|
67 |
-
border-top: 1.5px solid; /* Subtle top border to visually separate content */
|
68 |
-
font-size: 11px !important; /* Slightly larger font size for better readability */
|
69 |
-
line-height: 1.7; /* Increased line height for easy text flow */
|
70 |
-
letter-spacing: 0.02em; /* Slight letter spacing to enhance legibility */
|
71 |
-
">
|
72 |
-
{reasoning}
|
73 |
-
</div>
|
74 |
-
</details>
|
75 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/utils/instruction.py
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# SPDX-FileCopyrightText: Hadad <[email protected]>
|
3 |
+
# SPDX-License-Identifier: Apache-2.0
|
4 |
+
#
|
5 |
+
|
6 |
+
from config import restrictions # Load predefined restriction settings for instruction building
|
7 |
+
|
8 |
+
# Define a function to set system instructions
|
9 |
+
def set_instructions(mode: str, date: str) -> str:
|
10 |
+
"""
|
11 |
+
This function constructs a comprehensive system instruction string that integrates several key components needed
|
12 |
+
to guide the behavior of an AI model or system during interaction. It takes two inputs: 'mode' and 'date', and
|
13 |
+
returns a single formatted string that combines these inputs with a predefined set of restrictions loaded from
|
14 |
+
the configuration.
|
15 |
+
|
16 |
+
The purpose of this instruction string is to provide contextual and operational directives to the AI system.
|
17 |
+
The 'mode' parameter typically represents the current operational mode or state, which may influence how the AI
|
18 |
+
processes inputs or generates outputs. This could include modes such as normal operation, restricted mode, or
|
19 |
+
specialized behavior modes.
|
20 |
+
|
21 |
+
The 'restrictions' component, imported from the configuration, contains predefined rules, limitations, or guidelines
|
22 |
+
that the AI should adhere to during its operation. These restrictions might include content filters, ethical
|
23 |
+
guidelines, or other constraints necessary to ensure safe and appropriate AI behavior.
|
24 |
+
|
25 |
+
The 'date' parameter represents the current date or timestamp, providing temporal context that may be relevant
|
26 |
+
for time-sensitive instructions or for logging and auditing purposes.
|
27 |
+
|
28 |
+
The function formats these three components into a single string with clear separation using multiple newline
|
29 |
+
characters. This spacing improves readability and ensures that each section is distinctly identifiable when the
|
30 |
+
instruction string is parsed or displayed. The resulting string looks like this:
|
31 |
+
|
32 |
+
<mode>
|
33 |
+
|
34 |
+
<restrictions>
|
35 |
+
|
36 |
+
Today: <date>
|
37 |
+
|
38 |
+
This structured format allows downstream systems or models to easily extract and interpret each part of the
|
39 |
+
instruction, facilitating consistent and context-aware AI responses.
|
40 |
+
|
41 |
+
Parameters:
|
42 |
+
- mode (str): A string indicating the current operational mode or context for the AI system.
|
43 |
+
- date (str): A string representing the current date or timestamp to provide temporal context.
|
44 |
+
|
45 |
+
Returns:
|
46 |
+
- str: A formatted instruction string combining the mode, restrictions, and date sections with spacing.
|
47 |
+
|
48 |
+
Usage:
|
49 |
+
This function is typically called before sending prompts or requests to the AI model to ensure that all necessary
|
50 |
+
contextual information and operational constraints are included in the system instructions.
|
51 |
+
"""
|
52 |
+
|
53 |
+
# Combine mode, restrictions, and date into a formatted instruction block
|
54 |
+
return f"{mode}\n\n\n{restrictions}\n\n\nToday: {date}\n\n\n"
|
55 |
+
# Return the composed string with spacing between sections
|
src/utils/reasoning.py
ADDED
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# SPDX-FileCopyrightText: Hadad <[email protected]>
|
3 |
+
# SPDX-License-Identifier: Apache-2.0
|
4 |
+
#
|
5 |
+
|
6 |
+
# Define function to keep only the first <think> tag at the beginning of the text
|
7 |
+
def reasoning_tag_start(text: str) -> str:
|
8 |
+
"""
|
9 |
+
This function ensures that the reasoning text contains exactly one opening <think> tag at the very beginning.
|
10 |
+
It is common for streamed or concatenated reasoning texts to accumulate multiple <think> tags due to incremental
|
11 |
+
appends or repeated insertions. This function cleans the text by removing all occurrences of the <think> tag
|
12 |
+
throughout the entire string, then checks if the original text started with a <think> tag. If it did, it reinserts
|
13 |
+
a single <think> tag at the start to preserve the intended opening marker.
|
14 |
+
|
15 |
+
The purpose of this function is to normalize the reasoning text so that it has a clean, unambiguous opening tag,
|
16 |
+
which is critical for consistent parsing, rendering, or further processing downstream. By preventing multiple
|
17 |
+
opening tags, it avoids confusion and formatting errors in the final output.
|
18 |
+
|
19 |
+
Steps:
|
20 |
+
1. Remove all <think> tags from the entire text to eliminate duplicates.
|
21 |
+
2. Check if the original text began with <think>.
|
22 |
+
3. If yes, prepend a single <think> tag to the cleaned text.
|
23 |
+
4. If no, return the cleaned text without any opening tag.
|
24 |
+
|
25 |
+
Parameters:
|
26 |
+
- text (str): The reasoning text which may contain multiple or misplaced <think> tags.
|
27 |
+
|
28 |
+
Returns:
|
29 |
+
- str: The reasoning text normalized to have at most one <think> tag at the start.
|
30 |
+
"""
|
31 |
+
|
32 |
+
# Remove all <think> tags from the text
|
33 |
+
reasoning_mode = text.replace("<think>", "") # Strip all <think> tags throughout the text
|
34 |
+
# Check if the original text started with <think> and reinsert one if so
|
35 |
+
if text.startswith("<think>"): # Reinsert a single <think> tag at the beginning
|
36 |
+
return "<think>" + reasoning_mode # Return the cleaned text with one <think> tag at the start
|
37 |
+
else:
|
38 |
+
return reasoning_mode # Return the cleaned text without any <think> tags
|
39 |
+
|
40 |
+
# Define function to keep only the last </think> tag at the end of the text
|
41 |
+
def reasoning_tag_stop(text: str) -> str:
|
42 |
+
"""
|
43 |
+
This function ensures that the reasoning text contains exactly one closing </think> tag at the very end.
|
44 |
+
Similar to the opening tag, streamed or concatenated reasoning texts might accumulate multiple closing </think> tags,
|
45 |
+
which can cause parsing or display issues. This function removes all closing </think> tags from the text and then
|
46 |
+
checks if the original text ended with a closing tag. If it did, it appends a single closing </think> tag at the end,
|
47 |
+
preserving the intended closing marker.
|
48 |
+
|
49 |
+
This normalization is important to maintain a clean and consistent structure in the reasoning text, ensuring that
|
50 |
+
the closing tag is unambiguous and properly positioned for downstream consumers or renderers.
|
51 |
+
|
52 |
+
Steps:
|
53 |
+
1. Remove all </think> tags from the entire text to eliminate duplicates.
|
54 |
+
2. Check if the original text ended with </think>.
|
55 |
+
3. If yes, append a single </think> tag to the cleaned text.
|
56 |
+
4. If no, return the cleaned text without any closing tag.
|
57 |
+
|
58 |
+
Parameters:
|
59 |
+
- text (str): The reasoning text which may contain multiple or misplaced </think> tags.
|
60 |
+
|
61 |
+
Returns:
|
62 |
+
- str: The reasoning text normalized to have at most one </think> tag at the end.
|
63 |
+
"""
|
64 |
+
|
65 |
+
# Remove all </think> tags from the text
|
66 |
+
reasoning_mode = text.replace("</think>", "") # Strip all </think> tags throughout the text
|
67 |
+
# Check if the original text ended with </think> and reinsert one if so
|
68 |
+
if text.endswith("</think>"): # Reinsert a single </think> tag at the end
|
69 |
+
return reasoning_mode + "</think>" # Return the cleaned text with one </think> tag at the end
|
70 |
+
else:
|
71 |
+
return reasoning_mode # Return the cleaned text without any </think> tags
|
72 |
+
|
73 |
+
# Define function to ensure text starts with exactly one <think> tag
|
74 |
+
def reasoning_tag_open(text: str) -> str:
|
75 |
+
"""
|
76 |
+
This function guarantees that the reasoning text starts with exactly one opening <think> tag.
|
77 |
+
It first strips any leading whitespace to accurately detect whether the tag is already present.
|
78 |
+
If the tag is missing, it inserts a <think> tag followed by a newline at the very beginning of the text.
|
79 |
+
If the tag is present, it calls reasoning_tag_start to remove any duplicate tags and ensure only one opening tag remains.
|
80 |
+
|
81 |
+
This function is essential for preparing reasoning text before streaming or output, as it enforces a consistent
|
82 |
+
and clean opening tag structure. The newline after the tag improves readability and formatting when displayed.
|
83 |
+
|
84 |
+
Steps:
|
85 |
+
1. Strip leading whitespace from the text.
|
86 |
+
2. Check if the text starts with <think>.
|
87 |
+
3. If not, prepend "<think>\n" to the text.
|
88 |
+
4. If yes, clean duplicates using reasoning_tag_start.
|
89 |
+
5. Return the normalized text.
|
90 |
+
|
91 |
+
Parameters:
|
92 |
+
- text (str): The reasoning text to be normalized.
|
93 |
+
|
94 |
+
Returns:
|
95 |
+
- str: The reasoning text with exactly one <think> tag at the start.
|
96 |
+
"""
|
97 |
+
|
98 |
+
# Remove leading whitespace for accurate tag checking
|
99 |
+
stripped = text.lstrip() # Eliminate spaces or newlines from the start
|
100 |
+
# If tag is missing, insert it, else clean up any duplicates
|
101 |
+
if not stripped.startswith("<think>"): # Check if <think> tag is absent at the beginning
|
102 |
+
text = "<think>\n" + text # Add <think> tag followed by a newline at the start
|
103 |
+
else:
|
104 |
+
text = reasoning_tag_start(text) # Remove duplicates if the tag is already present
|
105 |
+
return text # Return text with one valid <think> tag at the start
|
106 |
+
|
107 |
+
# Define function to ensure text ends with exactly one </think> tag
|
108 |
+
def reasoning_tag_close(text: str) -> str:
|
109 |
+
"""
|
110 |
+
This function guarantees that the reasoning text ends with exactly one closing </think> tag.
|
111 |
+
It first strips any trailing whitespace to accurately detect whether the tag is already present.
|
112 |
+
If the tag is missing, it appends a newline, the closing </think> tag, and two additional newlines to the end of the text.
|
113 |
+
If the tag is present, it calls reasoning_tag_stop to remove any duplicate closing tags and ensure only one remains.
|
114 |
+
|
115 |
+
This function is crucial for finalizing reasoning text before output or further processing, ensuring the closing tag
|
116 |
+
is properly placed and that the text formatting remains clean and readable. The added newlines after the closing tag
|
117 |
+
provide spacing for separation from subsequent content.
|
118 |
+
|
119 |
+
Steps:
|
120 |
+
1. Strip trailing whitespace from the text.
|
121 |
+
2. Check if the text ends with </think>.
|
122 |
+
3. If not, append "\n</think>\n\n" to the text.
|
123 |
+
4. If yes, clean duplicates using reasoning_tag_stop.
|
124 |
+
5. Return the normalized text.
|
125 |
+
|
126 |
+
Parameters:
|
127 |
+
- text (str): The reasoning text to be normalized.
|
128 |
+
|
129 |
+
Returns:
|
130 |
+
- str: The reasoning text with exactly one </think> tag at the end.
|
131 |
+
"""
|
132 |
+
|
133 |
+
# Remove trailing whitespace for accurate tag checking
|
134 |
+
stripped = text.rstrip() # Eliminate spaces or newlines from the end
|
135 |
+
# If tag is missing, append it, else clean up any duplicates
|
136 |
+
if not stripped.endswith("</think>"): # Check if </think> tag is absent at the end
|
137 |
+
text = text.rstrip() + "\n</think>\n\n" # Append </think> tag with spacing
|
138 |
+
else:
|
139 |
+
text = reasoning_tag_stop(text) # Remove duplicates if the tag is already present
|
140 |
+
return text # Return text with one valid </think> tag at the end
|
src/utils/session_mapping.py
CHANGED
@@ -9,10 +9,11 @@ from typing import Dict, List # Import type hints for dictionaries and lists (n
|
|
9 |
from config import auth # Import authentication configuration, likely a list of host dictionaries with credentials
|
10 |
from src.utils.helper import busy, mark # Import 'busy' dictionary and 'mark' function to track and update host busy status
|
11 |
|
12 |
-
#
|
13 |
-
mapping = {}
|
14 |
|
15 |
-
|
|
|
16 |
"""
|
17 |
Retrieve or assign a host for the given session ID.
|
18 |
|
@@ -26,38 +27,48 @@ def get_host(session_id: str):
|
|
26 |
Exception: If no available hosts are found to assign.
|
27 |
|
28 |
Explanation:
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
marks the selected host as busy for one hour, and returns the selected host.
|
34 |
"""
|
|
|
|
|
|
|
|
|
|
|
35 |
# Check if the session ID already has an assigned host in the mapping dictionary
|
36 |
-
if session_id in mapping:
|
37 |
-
#
|
38 |
-
return
|
|
|
|
|
|
|
|
|
|
|
39 |
|
40 |
-
# Get the current UTC time to compare against busy timestamps
|
41 |
-
now = datetime.utcnow()
|
42 |
|
43 |
-
#
|
44 |
-
|
45 |
-
h for h in auth
|
46 |
-
if h["jarvis"] not in busy or busy[h["jarvis"]] <= now
|
|
|
47 |
]
|
48 |
|
49 |
-
# If no hosts are available after filtering, raise an exception to indicate
|
50 |
-
if not
|
51 |
-
raise Exception("No available hosts to assign.")
|
52 |
|
53 |
-
# Randomly select one host from the list of available hosts
|
54 |
-
selected = random.choice(
|
55 |
|
56 |
-
#
|
57 |
-
mapping[session_id] = selected
|
58 |
|
59 |
-
# Mark the selected host as busy
|
60 |
-
mark(selected["jarvis"])
|
61 |
|
62 |
-
# Return the selected host dictionary
|
63 |
-
return selected
|
|
|
9 |
from config import auth # Import authentication configuration, likely a list of host dictionaries with credentials
|
10 |
from src.utils.helper import busy, mark # Import 'busy' dictionary and 'mark' function to track and update host busy status
|
11 |
|
12 |
+
# Initialize a global dictionary to map session IDs to assigned hosts
|
13 |
+
mapping = {} # Store session_id to host assignment mapping to maintain consistent host allocation per session
|
14 |
|
15 |
+
# Define a function to get an available host for a given session, optionally excluding certain hosts
|
16 |
+
def get_host(session_id: str, exclude_hosts: List[str] = None) -> dict:
|
17 |
"""
|
18 |
Retrieve or assign a host for the given session ID.
|
19 |
|
|
|
27 |
Exception: If no available hosts are found to assign.
|
28 |
|
29 |
Explanation:
|
30 |
+
Retrieve an available host for the specified session ID, ensuring excluded hosts are not assigned.
|
31 |
+
This function maintains a mapping of session IDs to hosts to provide consistent host assignment.
|
32 |
+
It filters out busy hosts and those explicitly excluded, then randomly selects an available host.
|
33 |
+
If no hosts are available, it raises an exception.
|
|
|
34 |
"""
|
35 |
+
|
36 |
+
# If no list of hosts to exclude is provided, initialize it as an empty list
|
37 |
+
if exclude_hosts is None: # Check if exclude_hosts parameter was omitted or set to None
|
38 |
+
exclude_hosts = [] # Initialize exclude_hosts to an empty list to avoid errors during filtering
|
39 |
+
|
40 |
# Check if the session ID already has an assigned host in the mapping dictionary
|
41 |
+
if session_id in mapping: # Verify if a host was previously assigned to this session
|
42 |
+
assigned_host = mapping[session_id] # Retrieve the assigned host dictionary for this session
|
43 |
+
# If the assigned host is not in the list of hosts to exclude, return it immediately
|
44 |
+
if assigned_host["jarvis"] not in exclude_hosts: # Ensure assigned host is allowed for this request
|
45 |
+
return assigned_host # Return the cached host assignment for session consistency
|
46 |
+
else:
|
47 |
+
# If the assigned host is excluded, remove the mapping to allow reassignment
|
48 |
+
del mapping[session_id] # Delete the existing session-host mapping to find a new host
|
49 |
|
50 |
+
# Get the current UTC time to compare against host busy status timestamps
|
51 |
+
now = datetime.utcnow() # Capture current time to filter out hosts that are still busy
|
52 |
|
53 |
+
# Create a list of hosts that are not currently busy and not in the exclude list
|
54 |
+
available_hosts = [
|
55 |
+
h for h in auth # Iterate over all hosts defined in the authentication configuration
|
56 |
+
if h["jarvis"] not in busy or busy[h["jarvis"]] <= now # Include hosts not busy or whose busy time has expired
|
57 |
+
if h["jarvis"] not in exclude_hosts # Exclude hosts specified in the exclude_hosts list
|
58 |
]
|
59 |
|
60 |
+
# If no hosts are available after filtering, raise an exception to indicate resource exhaustion
|
61 |
+
if not available_hosts: # Check if the filtered list of hosts is empty
|
62 |
+
raise Exception("No available hosts to assign.") # Inform caller that no hosts can be assigned currently
|
63 |
|
64 |
+
# Randomly select one host from the list of available hosts to distribute load evenly
|
65 |
+
selected = random.choice(available_hosts) # Choose a host at random to avoid bias in host selection
|
66 |
|
67 |
+
# Store the selected host in the mapping dictionary for future requests with the same session ID
|
68 |
+
mapping[session_id] = selected # Cache the selected host to maintain session affinity
|
69 |
|
70 |
+
# Mark the selected host as busy using the helper function to update its busy status
|
71 |
+
mark(selected["jarvis"]) # Update the busy dictionary to indicate this host is now in use
|
72 |
|
73 |
+
# Return the selected host dictionary to the caller for use in processing the session
|
74 |
+
return selected # Provide the caller with the assigned host details
|
src/utils/time.py
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# SPDX-FileCopyrightText: Hadad <[email protected]>
|
3 |
+
# SPDX-License-Identifier: Apache-2.0
|
4 |
+
#
|
5 |
+
|
6 |
+
from datetime import datetime # Import datetime module to work with date and time
|
7 |
+
|
8 |
+
# Define a function to get the current date and time in a specific format
|
9 |
+
def get_time() -> str:
|
10 |
+
"""
|
11 |
+
This function retrieves the current local date and time and returns it as a human-readable formatted string.
|
12 |
+
It leverages Python's built-in datetime module to obtain the precise moment at which the function is called,
|
13 |
+
ensuring that the timestamp reflects the current system time accurately.
|
14 |
+
|
15 |
+
The formatting applied to the datetime object is designed to produce a clear and comprehensive representation
|
16 |
+
of the date and time, suitable for display in user interfaces, logging, or as contextual information within
|
17 |
+
system instructions or AI prompts.
|
18 |
+
|
19 |
+
Specifically, the format string used in strftime produces the following components in order:
|
20 |
+
|
21 |
+
- %A: Full weekday name (e.g., Monday, Tuesday) to indicate the day of the week explicitly.
|
22 |
+
- %B: Full month name (e.g., January, February) providing the month in a readable form.
|
23 |
+
- %d: Day of the month as a zero-padded decimal number (01 to 31), giving the exact calendar day.
|
24 |
+
- %Y: Four-digit year (e.g., 2025), specifying the calendar year.
|
25 |
+
- %I: Hour (12-hour clock) as a zero-padded decimal number (01 to 12), for conventional time representation.
|
26 |
+
- %M: Minute as a zero-padded decimal number (00 to 59), showing the exact minute.
|
27 |
+
- %p: Locale’s AM or PM designation, clarifying morning or afternoon/evening time.
|
28 |
+
- %Z: Time zone name or abbreviation, providing the timezone context of the timestamp.
|
29 |
+
|
30 |
+
By combining these elements, the returned string might look like:
|
31 |
+
"Sunday, June 29, 2025, 08:11 PM WIB"
|
32 |
+
|
33 |
+
This detailed timestamp format is particularly useful in contexts where precise temporal information is necessary,
|
34 |
+
such as generating system instructions that depend on the current date and time, logging events with timestamps,
|
35 |
+
or displaying current time information to users in a clear and localized manner.
|
36 |
+
|
37 |
+
Returns:
|
38 |
+
- str: A string representing the current date and time formatted with weekday, month, day, year, 12-hour time,
|
39 |
+
AM/PM marker, and timezone abbreviation.
|
40 |
+
|
41 |
+
Usage:
|
42 |
+
This function can be called whenever the current timestamp is needed in a standardized human-readable format,
|
43 |
+
especially before sending instructions or prompts to AI systems that may require temporal context.
|
44 |
+
"""
|
45 |
+
|
46 |
+
# Get the current date and time and format it using strftime
|
47 |
+
return datetime.now().strftime("%A, %B %d, %Y, %I:%M %p %Z")
|
48 |
+
# Format as full weekday name, month name, day, year,
|
49 |
+
# 12-hour time, AM/PM, and timezone
|