Spaces:

Nymbo
/

Serverless-TextGen-Hub

Running

App Files Files Community

Nymbo commited on Jan 4

Commit

8696822

verified ·

1 Parent(s): f523c40

Update app.py

Browse files

Files changed (1) hide show

app.py +203 -73

app.py CHANGED Viewed

@@ -2,6 +2,10 @@ import gradio as gr
 from openai import OpenAI
 import os
 # Retrieve the access token from the environment variable
 ACCESS_TOKEN = os.getenv("HF_TOKEN")
 print("Access token loaded.")
@@ -13,6 +17,28 @@ client = OpenAI(
 )
 print("OpenAI client initialized.")
 def respond(
     message,
     history: list[tuple[str, str]],
@@ -22,34 +48,32 @@ def respond(
     top_p,
     frequency_penalty,
     seed,
-    custom_model
 ):
     """
     This function handles the chatbot response. It takes in:
     - message: the user's new message
     - history: the list of previous messages, each as a tuple (user_msg, assistant_msg)
     - system_message: the system prompt
-    - max_tokens: the maximum number of tokens to generate in the response
-    - temperature: sampling temperature
-    - top_p: top-p (nucleus) sampling
-    - frequency_penalty: penalize repeated tokens in the output
-    - seed: a fixed seed for reproducibility; -1 will mean 'random'
-    - custom_model: the user-provided custom model name (if any)
     """
     print(f"Received message: {message}")
     print(f"History: {history}")
     print(f"System message: {system_message}")
     print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
     print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
     print(f"Custom model: {custom_model}")
     # Convert seed to None if -1 (meaning random)
     if seed == -1:
         seed = None
     # Construct the messages array required by the API
-    messages = [{"role": "system", "content": system_message}]
     # Add conversation history to the context
     for val in history:
@@ -65,19 +89,27 @@ def respond(
     # Append the latest user message
     messages.append({"role": "user", "content": message})
-    # Determine which model to use: either custom_model or a default
-    model_to_use = custom_model.strip() if custom_model.strip() != "" else "meta-llama/Llama-3.3-70B-Instruct"
     print(f"Model selected for inference: {model_to_use}")
-    # Start with an empty string to build the response as tokens stream in
     response = ""
     print("Sending request to OpenAI API.")
     # Make the streaming request to the HF Inference API via openai-like client
     for message_chunk in client.chat.completions.create(
-        model=model_to_use,              # Use either the user-provided custom model or default
         max_tokens=max_tokens,
-        stream=True,                     # Stream the response
         temperature=temperature,
         top_p=top_p,
         frequency_penalty=frequency_penalty,
@@ -86,70 +118,168 @@ def respond(
     ):
         # Extract the token text from the response chunk
         token_text = message_chunk.choices[0].delta.content
-        print(f"Received token: {token_text}")
         response += token_text
         # Yield the partial response to Gradio so it can display in real-time
         yield response
     print("Completed response generation.")
-# Create a Chatbot component with a specified height
-chatbot = gr.Chatbot(height=600)
-print("Chatbot interface created.")
-# Create the Gradio ChatInterface
-# We add two new sliders for Frequency Penalty, Seed, and now a new "Custom Model" text box.
-demo = gr.ChatInterface(
-    fn=respond,
-    additional_inputs=[
-        gr.Textbox(value="", label="System message"),
-        gr.Slider(
-            minimum=1,
-            maximum=4096,
-            value=512,
-            step=1,
-            label="Max new tokens"
-        ),
-        gr.Slider(
-            minimum=0.1,
-            maximum=4.0,
-            value=0.7,
-            step=0.1,
-            label="Temperature"
-        ),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-P"
-        ),
-        gr.Slider(
-            minimum=-2.0,
-            maximum=2.0,
-            value=0.0,
-            step=0.1,
-            label="Frequency Penalty"
-        ),
-        gr.Slider(
-            minimum=-1,
-            maximum=65535,  # Arbitrary upper limit for demonstration
-            value=-1,
-            step=1,
-            label="Seed (-1 for random)"
-        ),
-        gr.Textbox(
-            value="",
-            label="Custom Model",
-            info="(Optional) Provide a custom Hugging Face model path. This will override the default model if not empty."
-        ),
-    ],
-    fill_height=True,
-    chatbot=chatbot,
-    theme="Nymbo/Nymbo_Theme",
-)
-print("Gradio interface initialized.")
 if __name__ == "__main__":
-    print("Launching the demo application.")
-    demo.launch()

 from openai import OpenAI
 import os
+# =============================
+#     GLOBAL SETUP / CLIENT
+# =============================
 # Retrieve the access token from the environment variable
 ACCESS_TOKEN = os.getenv("HF_TOKEN")
 print("Access token loaded.")
 )
 print("OpenAI client initialized.")
+# =============================
+#     MODEL CONFIG / LOGIC
+# =============================
+# Sample placeholder list of "featured" models for demonstration
+featured_models_list = [
+    "meta-llama/Llama-2-13B-chat-hf",
+    "bigscience/bloom",
+    "microsoft/DialoGPT-large",
+    "OpenAssistant/oasst-sft-1-pythia-12b",
+    "tiiuae/falcon-7b-instruct",
+    "meta-llama/Llama-3.3-70B-Instruct"
+]
+def filter_featured_models(search_term: str):
+    """
+    Returns a list of models that contain the search term (case-insensitive).
+    """
+    filtered = [m for m in featured_models_list if search_term.lower() in m.lower()]
+    return gr.update(choices=filtered)
 def respond(
     message,
     history: list[tuple[str, str]],
     top_p,
     frequency_penalty,
     seed,
+    custom_model,
+    selected_featured_model
 ):
     """
     This function handles the chatbot response. It takes in:
     - message: the user's new message
     - history: the list of previous messages, each as a tuple (user_msg, assistant_msg)
     - system_message: the system prompt
+    - max_tokens, temperature, top_p, frequency_penalty, seed: generation params
+    - custom_model: user-provided custom model path/name
+    - selected_featured_model: model chosen from the featured radio list
     """
     print(f"Received message: {message}")
     print(f"History: {history}")
     print(f"System message: {system_message}")
     print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
     print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
     print(f"Custom model: {custom_model}")
+    print(f"Selected featured model: {selected_featured_model}")
     # Convert seed to None if -1 (meaning random)
     if seed == -1:
         seed = None
     # Construct the messages array required by the API
+    messages = [{"role": "system", "content": system_message}] if system_message.strip() else []
     # Add conversation history to the context
     for val in history:
     # Append the latest user message
     messages.append({"role": "user", "content": message})
+    # Determine which model to use:
+    # 1) If custom_model is non-empty, it overrides everything.
+    # 2) Otherwise, use the selected featured model from the radio button if available.
+    # 3) If both are empty, fall back to the default.
+    model_to_use = "meta-llama/Llama-3.3-70B-Instruct"  # Default
+    if custom_model.strip() != "":
+        model_to_use = custom_model.strip()
+    elif selected_featured_model.strip() != "":
+        model_to_use = selected_featured_model.strip()
     print(f"Model selected for inference: {model_to_use}")
+    # Start building the streaming response
     response = ""
     print("Sending request to OpenAI API.")
     # Make the streaming request to the HF Inference API via openai-like client
     for message_chunk in client.chat.completions.create(
+        model=model_to_use,
         max_tokens=max_tokens,
+        stream=True,  # Stream the response
         temperature=temperature,
         top_p=top_p,
         frequency_penalty=frequency_penalty,
     ):
         # Extract the token text from the response chunk
         token_text = message_chunk.choices[0].delta.content
+        print(f"Received token: {token_text}", flush=True)
         response += token_text
         # Yield the partial response to Gradio so it can display in real-time
         yield response
     print("Completed response generation.")
+# =============================
+#         MAIN UI
+# =============================
+def build_app():
+    """
+    Build the Gradio Blocks interface containing:
+      - A Chat tab (ChatInterface)
+      - A Featured Models tab
+      - An Information tab
+    """
+    with gr.Blocks(theme="Nymbo/Nymbo_Theme") as main_interface:
+        # We define a Gr.State to hold the user's chosen featured model
+        selected_featured_model_state = gr.State("")
+        with gr.Tab("Chat Interface"):
+            gr.Markdown("## Serverless-TextGen-Hub")
+            # Here we embed the ChatInterface for streaming conversation
+            # We add extra inputs for "Selected Featured Model" as hidden,
+            # so the user can't directly edit but it flows into respond().
+            demo = gr.ChatInterface(
+                fn=respond,
+                additional_inputs=[
+                    gr.Textbox(value="", label="System message", lines=2),
+                    gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max new tokens"),
+                    gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
+                    gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P"),
+                    gr.Slider(minimum=-2.0, maximum=2.0, value=0.0, step=0.1, label="Frequency Penalty"),
+                    gr.Slider(minimum=-1, maximum=65535, value=-1, step=1, label="Seed (-1 for random)"),
+                    gr.Textbox(value="", label="Custom Model", info="(Optional) Provide a custom HF model path"),
+                    gr.Textbox(value="", label="Selected Featured Model (from tab)", visible=False),
+                ],
+                fill_height=True,
+                chatbot=gr.Chatbot(height=600),
+                theme="Nymbo/Nymbo_Theme",
+            )
+            # We want to connect the selected_featured_model_state to that hidden text box
+            def set_featured_model_in_chatbox(val):
+                return val
+            # Whenever the selected_featured_model_state changes, update the hidden field in the ChatInterface
+            selected_featured_model_state.change(
+                fn=set_featured_model_in_chatbox,
+                inputs=selected_featured_model_state,
+                outputs=demo.additional_inputs[-1],  # The last additional input is the "Selected Featured Model"
+            )
+        # ==========================
+        #   Featured Models Tab
+        # ==========================
+        with gr.Tab("Featured Models"):
+            gr.Markdown("### Choose from our Featured Models")
+            # A text box for searching/filtering
+            model_search = gr.Textbox(
+                label="Filter Models",
+                placeholder="Search for a featured model..."
+            )
+            # A radio component listing the featured models (default to first)
+            model_radio = gr.Radio(
+                choices=featured_models_list,
+                label="Select a model below",
+                value=featured_models_list[0],
+                interactive=True
+            )
+            # Define how to update the radio choices when the search box changes
+            model_search.change(
+                fn=filter_featured_models,
+                inputs=model_search,
+                outputs=model_radio
+            )
+            # Button to confirm the selection
+            def select_featured_model(radio_val):
+                """
+                Updates the hidden state with the user-chosen featured model.
+                """
+                return radio_val
+            choose_btn = gr.Button("Use this Featured Model", variant="primary")
+            choose_btn.click(
+                fn=select_featured_model,
+                inputs=model_radio,
+                outputs=selected_featured_model_state
+            )
+            gr.Markdown(
+                """
+                **Tip**: If you type a Custom Model in the "Chat Interface" tab, it overrides the
+                featured model you selected here.
+                """
+            )
+        # ==========================
+        #   Information Tab
+        # ==========================
+        with gr.Tab("Information"):
+            gr.Markdown("## Learn More About These Models and Parameters")
+            with gr.Accordion("Featured Models (Table)", open=False):
+                gr.HTML(
+                    """
+                    <p>Below is a small sample table showing some featured models.</p>
+                    <table style="width:100%; text-align:center; margin:auto;">
+                        <tr>
+                            <th>Model Name</th>
+                            <th>Type</th>
+                            <th>Notes</th>
+                        </tr>
+                        <tr>
+                            <td>meta-llama/Llama-2-13B-chat-hf</td>
+                            <td>Chat</td>
+                            <td>Good for multi-turn dialogue.</td>
+                        </tr>
+                        <tr>
+                            <td>bigscience/bloom</td>
+                            <td>Language Model</td>
+                            <td>Large multilingual model.</td>
+                        </tr>
+                        <tr>
+                            <td>microsoft/DialoGPT-large</td>
+                            <td>Chat</td>
+                            <td>Well-known smaller chat model.</td>
+                        </tr>
+                    </table>
+                    """
+                )
+            with gr.Accordion("Parameters Overview", open=False):
+                gr.Markdown(
+                    """
+                    ### Explanation of Key Parameters
+                    - **System Message**: Provides context or initial instructions to the model.
+                    - **Max Tokens**: The maximum number of tokens (roughly pieces of words) in the generated response.
+                    - **Temperature**: Higher values produce more random/creative outputs, while lower values make the output more focused and deterministic.
+                    - **Top-P**: Controls nucleus sampling. The model considers only the tokens whose probability mass exceeds this value.
+                    - **Frequency Penalty**: Penalizes repeated tokens. Positive values (like 1.0) reduce repetition in the output. Negative values can increase repetition.
+                    - **Seed**: Determines reproducibility. Set it to a fixed integer for consistent results; `-1` is random each time.
+                    - **Custom Model**: Overwrites the featured model. Provide the Hugging Face path (e.g., `openai/whisper-base`) for your own usage.
+                    Use these settings to guide how the model generates text. If in doubt, stick to defaults and experiment in small increments.
+                    """
+                )
+    return main_interface
+# If run as a standalone script, just launch.
 if __name__ == "__main__":
+    print("Building and launching the Serverless-TextGen-Hub interface...")
+    ui = build_app()
+    ui.launch()