Spaces:

Nymbo
/

Serverless-TextGen-Hub

Running

App Files Files Community

Nymbo commited on 22 days ago

Commit

a8fc89d

verified ·

1 Parent(s): 62429d1

Update app.py

Browse files

Files changed (1) hide show

app.py +264 -229

app.py CHANGED Viewed

@@ -2,26 +2,11 @@ import gradio as gr
 from openai import OpenAI
 import os
-# -------------------
-# SERVERLESS-TEXTGEN-HUB
-# -------------------
-#
-# This version has been updated to include an "Information" tab above the Chat tab.
-# The Information tab has two accordions:
-#   - "Featured Models" which displays a simple table
-#   - "Parameters Overview" which contains markdown describing the settings
-#
-# The Chat tab contains the existing chatbot UI.
-# -------------------
-# SETUP AND CONFIG
-# -------------------
 # Retrieve the access token from the environment variable
 ACCESS_TOKEN = os.getenv("HF_TOKEN")
 print("Access token loaded.")
-# Initialize the OpenAI-like client (Hugging Face Inference API) with your token
 client = OpenAI(
     base_url="https://api-inference.huggingface.co/v1/",
     api_key=ACCESS_TOKEN,
@@ -63,19 +48,19 @@ def respond(
     if seed == -1:
         seed = None
-    # Construct the messages array required by the HF Inference API
     messages = [{"role": "system", "content": system_message}]
     print("Initial messages array constructed.")
     # Add conversation history to the context
     for val in history:
         user_part = val[0]  # Extract user message from the tuple
-        assistant_part = val[1]  # Extract assistant message
         if user_part:
-            messages.append({"role": "user", "content": user_part})
             print(f"Added user message to context: {user_part}")
         if assistant_part:
-            messages.append({"role": "assistant", "content": assistant_part})
             print(f"Added assistant message to context: {assistant_part}")
     # Append the latest user message
@@ -86,262 +71,312 @@ def respond(
     model_to_use = custom_model.strip() if custom_model.strip() != "" else "meta-llama/Llama-3.3-70B-Instruct"
     print(f"Model selected for inference: {model_to_use}")
-    # Start with an empty string to build the streamed response
-    response_text = ""
-    print("Sending request to Hugging Face Inference API via OpenAI-like client...")
-    # Make the streaming request to the HF Inference API
     for message_chunk in client.chat.completions.create(
-        model=model_to_use,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-        frequency_penalty=frequency_penalty,
-        seed=seed,
-        messages=messages,
     ):
         # Extract the token text from the response chunk
         token_text = message_chunk.choices[0].delta.content
         print(f"Received token: {token_text}")
-        response_text += token_text
         # Yield the partial response to Gradio so it can display in real-time
-        yield response_text
     print("Completed response generation.")
-# ----------------------
-# BUILDING THE INTERFACE
-# ----------------------
-# We will use a "Blocks" layout with two tabs:
-#   1) "Information" tab, which shows helpful info and a table of "Featured Models"
-#   2) "Chat" tab, which holds our ChatInterface and associated controls
-with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
-    # -----------------
-    # TAB: INFORMATION
-    # -----------------
-    with gr.Tab("Information"):
-        # You can add instructions, disclaimers, or helpful text here
-        gr.Markdown("## Welcome to Serverless-TextGen-Hub - Information")
-        # Accordion for Featured Models (table)
-        with gr.Accordion("Featured Models (WiP)", open=False):
-            gr.HTML(
-                """
-                <p><a href="https://huggingface.co/models?inference=warm&pipeline_tag=chat&sort=trending" target="_blank">See all available text models on Hugging Face</a></p>
                 <table style="width:100%; text-align:center; margin:auto;">
                     <tr>
                         <th>Model Name</th>
-                        <th>Supported</th>
                         <th>Notes</th>
                     </tr>
                     <tr>
                         <td>meta-llama/Llama-3.3-70B-Instruct</td>
                         <td>✅</td>
-                        <td>Default model, if none is provided in the 'Custom Model' box.</td>
                     </tr>
                     <tr>
                         <td>meta-llama/Llama-3.2-3B-Instruct</td>
                         <td>✅</td>
-                        <td>Smaller Llama-based instruct model for faster responses.</td>
                     </tr>
                     <tr>
-                        <td>microsoft/Phi-3.5-mini-instruct</td>
                         <td>✅</td>
-                        <td>A smaller instruct model from Microsoft.</td>
                     </tr>
                     <tr>
                         <td>Qwen/Qwen2.5-72B-Instruct</td>
                         <td>✅</td>
-                        <td>Large-scale Qwen-based model.</td>
                     </tr>
                 </table>
                 """
-            )
-        # Accordion for Parameters Overview
-        with gr.Accordion("Parameters Overview", open=False):
-            gr.Markdown(
-                """
-                **Here is a brief overview of the main parameters for text generation:**
-                - **Max Tokens**: The maximum number of tokens (think of these as word-pieces) the model will generate in its response.
-                - **Temperature**: Controls how "creative" or random the output is. Lower values = more deterministic, higher values = more varied.
-                - **Top-P**: Similar to temperature, but uses nucleus sampling. Top-P defines the probability mass of the tokens to sample from. For example, `top_p=0.9` means "use the top 90% probable tokens."
-                - **Frequency Penalty**: A higher penalty discourages repeated tokens, helping reduce repetitive answers.
-                - **Seed**: You can set a seed for deterministic results. `-1` means random each time.
-                **Featured Models** can also be selected. If you want to override the model, you may specify a custom Hugging Face model path in the "Custom Model" text box.
-                ---
-                If you are new to text-generation parameters, the defaults are a great place to start!
                 """
-            )
-    # -----------
-    # TAB: CHAT
-    # -----------
-    with gr.Tab("Chat"):
-        gr.Markdown("## Chat with the TextGen Model")
-        # Create a Chatbot component with a specified height
-        chatbot = gr.Chatbot(height=600)
-        print("Chatbot interface created.")
-        # Create textboxes and sliders for system prompt, tokens, and other parameters
-        system_message_box = gr.Textbox(
-            value="",
-            label="System message",
-            info="You can use this to provide instructions or context to the assistant. Leave empty if not needed."
-        )
-        max_tokens_slider = gr.Slider(
-            minimum=1,
-            maximum=4096,
-            value=512,
-            step=1,
-            label="Max new tokens",
-            info="Controls the maximum length of the output. Keep an eye on your usage!"
-        )
-        temperature_slider = gr.Slider(
-            minimum=0.1,
-            maximum=4.0,
-            value=0.7,
-            step=0.1,
-            label="Temperature",
-            info="Controls creativity. Higher values = more random replies, lower = more deterministic."
-        )
-        top_p_slider = gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-P",
-            info="Use nucleus sampling with probability mass cutoff. 1.0 includes all tokens."
-        )
-        frequency_penalty_slider = gr.Slider(
-            minimum=-2.0,
-            maximum=2.0,
-            value=0.0,
-            step=0.1,
-            label="Frequency Penalty",
-            info="Penalize repeated tokens to avoid repetition in output."
-        )
-        seed_slider = gr.Slider(
-            minimum=-1,
-            maximum=65535,
-            value=-1,
-            step=1,
-            label="Seed (-1 for random)",
-            info="Fixing a seed (0 to 65535) can make results reproducible. -1 picks a random seed each time."
-        )
-        # The custom_model_box is what the respond function sees as "custom_model"
-        custom_model_box = gr.Textbox(
-            value="",
-            label="Custom Model",
-            info="(Optional) Provide a custom Hugging Face model path. Overrides any selected featured model."
-        )
-        # Function to update the custom model box when a featured model is selected
-        def set_custom_model_from_radio(selected):
-            print(f"Featured model selected: {selected}")
-            return selected
-        print("ChatInterface object created.")
-        # The main ChatInterface call
-        chat_interface = gr.ChatInterface(
-            fn=respond,  # The function to handle responses
-            additional_inputs=[
-                system_message_box,
-                max_tokens_slider,
-                temperature_slider,
-                top_p_slider,
-                frequency_penalty_slider,
-                seed_slider,
-                custom_model_box
-            ],
-            fill_height=True,  # Let the chatbot fill the container height
-            chatbot=chatbot,   # The Chatbot UI component
-            theme="Nymbo/Nymbo_Theme",
-        )
-        print("Gradio interface for Chat created.")
-        # -----------
-        # ADDING THE "FEATURED MODELS" ACCORDION (Same logic as before)
-        # -----------
-        with gr.Accordion("Featured Models", open=False):
-            model_search_box = gr.Textbox(
-                label="Filter Models",
-                placeholder="Search for a featured model...",
-                lines=1
-            )
-            print("Model search box created.")
-            # Sample list of popular text models
-            models_list = [
-                "meta-llama/Llama-3.3-70B-Instruct",
-                "meta-llama/Llama-3.2-3B-Instruct",
-                "meta-llama/Llama-3.2-1B-Instruct",
-                "meta-llama/Llama-3.1-8B-Instruct",
-                "NousResearch/Hermes-3-Llama-3.1-8B",
-                "google/gemma-2-27b-it",
-                "google/gemma-2-9b-it",
-                "google/gemma-2-2b-it",
-                "mistralai/Mistral-Nemo-Instruct-2407",
-                "mistralai/Mixtral-8x7B-Instruct-v0.1",
-                "mistralai/Mistral-7B-Instruct-v0.3",
-                "Qwen/Qwen2.5-72B-Instruct",
-                "Qwen/QwQ-32B-Preview",
-                "PowerInfer/SmallThinker-3B-Preview",
-                "HuggingFaceTB/SmolLM2-1.7B-Instruct",
-                "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
-                "microsoft/Phi-3.5-mini-instruct",
-            ]
-            print("Models list initialized.")
-            featured_model_radio = gr.Radio(
-                label="Select a model below",
-                choices=models_list,
-                value="meta-llama/Llama-3.3-70B-Instruct",
-                interactive=True
-            )
-            print("Featured models radio button created.")
-            def filter_models(search_term):
-                print(f"Filtering models with search term: {search_term}")
-                filtered = [m for m in models_list if search_term.lower() in m.lower()]
-                print(f"Filtered models: {filtered}")
-                return gr.update(choices=filtered)
-            model_search_box.change(
-                fn=filter_models,
-                inputs=model_search_box,
-                outputs=featured_model_radio
-            )
-            print("Model search box change event linked.")
-            featured_model_radio.change(
-                fn=set_custom_model_from_radio,
-                inputs=featured_model_radio,
-                outputs=custom_model_box
-            )
-            print("Featured model radio button change event linked.")
 print("Gradio interface initialized.")
-# ------------------------
-# MAIN ENTRY POINT
-# ------------------------
 if __name__ == "__main__":
     print("Launching the demo application.")
     demo.launch()

 from openai import OpenAI
 import os
 # Retrieve the access token from the environment variable
 ACCESS_TOKEN = os.getenv("HF_TOKEN")
 print("Access token loaded.")
+# Initialize the OpenAI client with the Hugging Face Inference API endpoint
 client = OpenAI(
     base_url="https://api-inference.huggingface.co/v1/",
     api_key=ACCESS_TOKEN,
     if seed == -1:
         seed = None
+    # Construct the messages array required by the API
     messages = [{"role": "system", "content": system_message}]
     print("Initial messages array constructed.")
     # Add conversation history to the context
     for val in history:
         user_part = val[0]  # Extract user message from the tuple
+        assistant_part = val[1]  # Extract assistant message from the tuple
         if user_part:
+            messages.append({"role": "user", "content": user_part})  # Append user message
             print(f"Added user message to context: {user_part}")
         if assistant_part:
+            messages.append({"role": "assistant", "content": assistant_part})  # Append assistant message
             print(f"Added assistant message to context: {assistant_part}")
     # Append the latest user message
     model_to_use = custom_model.strip() if custom_model.strip() != "" else "meta-llama/Llama-3.3-70B-Instruct"
     print(f"Model selected for inference: {model_to_use}")
+    # Start with an empty string to build the response as tokens stream in
+    response = ""
+    print("Sending request to OpenAI API.")
+    # Make the streaming request to the HF Inference API via openai-like client
     for message_chunk in client.chat.completions.create(
+        model=model_to_use,              # Use either the user-provided or default model
+        max_tokens=max_tokens,           # Maximum tokens for the response
+        stream=True,                     # Enable streaming responses
+        temperature=temperature,         # Adjust randomness in response
+        top_p=top_p,                     # Control diversity in response generation
+        frequency_penalty=frequency_penalty,  # Penalize repeated phrases
+        seed=seed,                       # Set random seed for reproducibility
+        messages=messages,               # Contextual conversation messages
     ):
         # Extract the token text from the response chunk
         token_text = message_chunk.choices[0].delta.content
         print(f"Received token: {token_text}")
+        response += token_text
         # Yield the partial response to Gradio so it can display in real-time
+        yield response
     print("Completed response generation.")
+# -------------------------
+# GRADIO UI CONFIGURATION
+# -------------------------
+# Create a Chatbot component with a specified height
+chatbot = gr.Chatbot(height=600)  # Define the height of the chatbot interface
+print("Chatbot interface created.")
+# Create textboxes and sliders for system prompt, tokens, and other parameters
+system_message_box = gr.Textbox(value="", label="System message")  # Input box for system message
+max_tokens_slider = gr.Slider(
+    minimum=1,   # Minimum allowable tokens
+    maximum=4096,  # Maximum allowable tokens
+    value=512,   # Default value
+    step=1,      # Increment step size
+    label="Max new tokens"  # Slider label
+)
+temperature_slider = gr.Slider(
+    minimum=0.1,  # Minimum temperature
+    maximum=4.0,  # Maximum temperature
+    value=0.7,    # Default value
+    step=0.1,     # Increment step size
+    label="Temperature"  # Slider label
+)
+top_p_slider = gr.Slider(
+    minimum=0.1,  # Minimum top-p value
+    maximum=1.0,  # Maximum top-p value
+    value=0.95,   # Default value
+    step=0.05,    # Increment step size
+    label="Top-P"  # Slider label
+)
+frequency_penalty_slider = gr.Slider(
+    minimum=-2.0,  # Minimum penalty
+    maximum=2.0,   # Maximum penalty
+    value=0.0,     # Default value
+    step=0.1,      # Increment step size
+    label="Frequency Penalty"  # Slider label
+)
+seed_slider = gr.Slider(
+    minimum=-1,    # -1 for random seed
+    maximum=65535, # Maximum seed value
+    value=-1,      # Default value
+    step=1,        # Increment step size
+    label="Seed (-1 for random)"  # Slider label
+)
+# The custom_model_box is what the respond function sees as "custom_model"
+custom_model_box = gr.Textbox(
+    value="",  # Default value
+    label="Custom Model",  # Label for the textbox
+    info="(Optional) Provide a custom Hugging Face model path. Overrides any selected featured model."  # Additional info
+)
+# Define a function that updates the custom model box when a featured model is selected
+def set_custom_model_from_radio(selected):
+    """
+    This function will get triggered whenever someone picks a model from the 'Featured Models' radio.
+    We will update the Custom Model text box with that selection automatically.
+    """
+    print(f"Featured model selected: {selected}")  # Log selected model
+    return selected
+# Create the main ChatInterface object
+demo = gr.ChatInterface(
+    fn=respond,  # The function to handle responses
+    additional_inputs=[
+        system_message_box,          # System message input
+        max_tokens_slider,           # Max tokens slider
+        temperature_slider,          # Temperature slider
+        top_p_slider,                # Top-P slider
+        frequency_penalty_slider,    # Frequency penalty slider
+        seed_slider,                 # Seed slider
+        custom_model_box             # Custom model input
+    ],
+    fill_height=True,  # Allow the chatbot to fill the container height
+    chatbot=chatbot,   # Chatbot UI component
+    theme="Nymbo/Nymbo_Theme",  # Theme for the interface
+)
+print("ChatInterface object created.")
+# -----------
+# ADDING THE "FEATURED MODELS" ACCORDION
+# -----------
+with demo:
+    with gr.Accordion("Featured Models", open=False):  # Collapsible section for featured models
+        model_search_box = gr.Textbox(
+            label="Filter Models",  # Label for the search box
+            placeholder="Search for a featured model...",  # Placeholder text
+            lines=1  # Single-line input
+        )
+        print("Model search box created.")
+        # Sample list of popular text models
+        models_list = [
+            "meta-llama/Llama-3.3-70B-Instruct",
+            "meta-llama/Llama-3.2-3B-Instruct",
+            "meta-llama/Llama-3.2-1B-Instruct",
+            "meta-llama/Llama-3.1-8B-Instruct",
+            "NousResearch/Hermes-3-Llama-3.1-8B",
+            "google/gemma-2-27b-it",
+            "google/gemma-2-9b-it",
+            "google/gemma-2-2b-it",
+            "mistralai/Mistral-Nemo-Instruct-2407",
+            "mistralai/Mixtral-8x7B-Instruct-v0.1",
+            "mistralai/Mistral-7B-Instruct-v0.3",
+            "Qwen/Qwen2.5-72B-Instruct",
+            "Qwen/QwQ-32B-Preview",
+            "PowerInfer/SmallThinker-3B-Preview",
+            "HuggingFaceTB/SmolLM2-1.7B-Instruct",
+            "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+            "microsoft/Phi-3.5-mini-instruct",
+        ]
+        print("Models list initialized.")
+        featured_model_radio = gr.Radio(
+            label="Select a model below",  # Label for the radio buttons
+            choices=models_list,  # List of available models
+            value="meta-llama/Llama-3.3-70B-Instruct",  # Default selection
+            interactive=True  # Allow user interaction
+        )
+        print("Featured models radio button created.")
+        # Filter function for the radio button list
+        def filter_models(search_term):
+            print(f"Filtering models with search term: {search_term}")  # Log the search term
+            filtered = [m for m in models_list if search_term.lower() in m.lower()]  # Filter models by search term
+            print(f"Filtered models: {filtered}")  # Log filtered models
+            return gr.update(choices=filtered)
+        # Update the radio list when the search box value changes
+        model_search_box.change(
+            fn=filter_models,  # Function to filter models
+            inputs=model_search_box,  # Input: search box value
+            outputs=featured_model_radio  # Output: update radio button list
+        )
+        print("Model search box change event linked.")
+        # Update the custom model textbox when a featured model is selected
+        featured_model_radio.change(
+            fn=set_custom_model_from_radio,  # Function to set custom model
+            inputs=featured_model_radio,  # Input: selected model
+            outputs=custom_model_box  # Output: update custom model textbox
+        )
+        print("Featured model radio button change event linked.")
+    # -----------
+    # ADDING THE "INFORMATION" TAB
+    # -----------
+    with gr.Tab("Information"):
+        with gr.Row():
+            # Accordion for Featured Models
+            with gr.Accordion("Featured Models", open=False):
+                gr.HTML(
+                    """
                 <table style="width:100%; text-align:center; margin:auto;">
                     <tr>
                         <th>Model Name</th>
+                        <th>Typography</th>
                         <th>Notes</th>
                     </tr>
                     <tr>
                         <td>meta-llama/Llama-3.3-70B-Instruct</td>
                         <td>✅</td>
+                        <td></td>
                     </tr>
                     <tr>
                         <td>meta-llama/Llama-3.2-3B-Instruct</td>
                         <td>✅</td>
+                        <td></td>
                     </tr>
                     <tr>
+                        <td>meta-llama/Llama-3.2-1B-Instruct</td>
+                        <td>✅</td>
+                        <td></td>
+                    </tr>
+                    <tr>
+                        <td>meta-llama/Llama-3.1-8B-Instruct</td>
+                        <td>✅</td>
+                        <td></td>
+                    </tr>
+                    <tr>
+                        <td>NousResearch/Hermes-3-Llama-3.1-8B</td>
                         <td>✅</td>
+                        <td></td>
+                    </tr>
+                    <tr>
+                        <td>google/gemma-2-27b-it</td>
+                        <td>✅</td>
+                        <td></td>
+                    </tr>
+                    <tr>
+                        <td>google/gemma-2-9b-it</td>
+                        <td>✅</td>
+                        <td></td>
+                    </tr>
+                    <tr>
+                        <td>google/gemma-2-2b-it</td>
+                        <td>✅</td>
+                        <td></td>
+                    </tr>
+                    <tr>
+                        <td>mistralai/Mistral-Nemo-Instruct-2407</td>
+                        <td>✅</td>
+                        <td></td>
+                    </tr>
+                    <tr>
+                        <td>mistralai/Mixtral-8x7B-Instruct-v0.1</td>
+                        <td>✅</td>
+                        <td></td>
+                    </tr>
+                    <tr>
+                        <td>mistralai/Mistral-7B-Instruct-v0.3</td>
+                        <td>✅</td>
+                        <td></td>
                     </tr>
                     <tr>
                         <td>Qwen/Qwen2.5-72B-Instruct</td>
                         <td>✅</td>
+                        <td></td>
+                    </tr>
+                    <tr>
+                        <td>Qwen/QwQ-32B-Preview</td>
+                        <td>✅</td>
+                        <td></td>
+                    </tr>
+                    <tr>
+                        <td>PowerInfer/SmallThinker-3B-Preview</td>
+                        <td>✅</td>
+                        <td></td>
+                    </tr>
+                    <tr>
+                        <td>HuggingFaceTB/SmolLM2-1.7B-Instruct</td>
+                        <td>✅</td>
+                        <td></td>
+                    </tr>
+                    <tr>
+                        <td>TinyLlama/TinyLlama-1.1B-Chat-v1.0</td>
+                        <td>✅</td>
+                        <td></td>
+                    </tr>
+                    <tr>
+                        <td>microsoft/Phi-3.5-mini-instruct</td>
+                        <td>✅</td>
+                        <td></td>
                     </tr>
                 </table>
                 """
+                )
+            # Accordion for Parameters Overview
+            with gr.Accordion("Parameters Overview", open=False):
+                gr.Markdown(
                 """
+                ## System Message
+                ###### This box is for setting the initial context or instructions for the AI. It helps guide the AI on how to respond to your inputs.
+                ## Max New Tokens
+                ###### This slider allows you to specify the maximum number of tokens (words or parts of words) the AI can generate in a single response. The default value is 512, and the maximum is 4096.
+                ## Temperature
+                ###### Temperature controls the randomness of the AI's responses. A higher temperature makes the responses more creative and varied, while a lower temperature makes them more predictable and focused. The default value is 0.7.
+                ## Top-P (Nucleus Sampling)
+                ###### Top-P sampling is another way to control the diversity of the AI's responses. It ensures that the AI only considers the most likely tokens up to a cumulative probability of P. The default value is 0.95.
+                ## Frequency Penalty
+                ###### This penalty discourages the AI from repeating the same tokens (words or phrases) in its responses. A higher penalty reduces repetition. The default value is 0.0.
+                ## Seed
+                ###### The seed is a number that ensures the reproducibility of the AI's responses. If you set a specific seed, the AI will generate the same response every time for the same input. If you set it to -1, the AI will generate a random seed each time.
+                ## Custom Model
+                ###### You can specify a custom Hugging Face model path here. This will override any selected featured model. This is optional and allows you to use models not listed in the featured models.
+                ### Remember, these settings are all about giving you control over the text generation process. Feel free to experiment and see what each one does. And if you're ever in doubt, the default settings are a great place to start. Happy creating!
+                """
+                )
 print("Gradio interface initialized.")
 if __name__ == "__main__":
     print("Launching the demo application.")
     demo.launch()