Spaces:

Nymbo
/

Serverless-TextGen-Hub

Running

App Files Files Community

Nymbo commited on 25 days ago

Commit

e7683ca

verified ·

1 Parent(s): 6a2e496

Update app.py

Browse files

Files changed (1) hide show

app.py +224 -172

app.py CHANGED Viewed

@@ -13,25 +13,6 @@ client = OpenAI(
 )
 print("OpenAI client initialized.")
-# We'll define a list of placeholder featured models for demonstration.
-# In real usage, replace them with actual model names available on Hugging Face.
-models_list = [
-    "meta-llama/Llama-3.1-8B-Instruct",
-    "microsoft/Phi-3.5-mini-instruct",
-    "mistralai/Mistral-7B-Instruct-v0.3",
-    "Qwen/Qwen2.5-72B-Instruct"
-]
-def filter_featured_models(search_term):
-    """
-    Filters the 'models_list' based on text entered in the search box.
-    Returns a gr.update object that changes the choices available
-    in the 'featured_models_radio'.
-    """
-    filtered = [m for m in models_list if search_term.lower() in m.lower()]
-    return gr.update(choices=filtered)
 def respond(
     message,
     history: list[tuple[str, str]],
@@ -42,7 +23,7 @@ def respond(
     frequency_penalty,
     seed,
     custom_model,
-    selected_model
 ):
     """
     This function handles the chatbot response. It takes in:
@@ -54,8 +35,8 @@ def respond(
     - top_p: top-p (nucleus) sampling
     - frequency_penalty: penalize repeated tokens in the output
     - seed: a fixed seed for reproducibility; -1 will mean 'random'
-    - custom_model: a custom Hugging Face model name (if any)
-    - selected_model: a model name chosen from the featured models radio button
     """
     print(f"Received message: {message}")
@@ -64,12 +45,20 @@ def respond(
     print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
     print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
     print(f"Custom model: {custom_model}")
-    print(f"Selected featured model: {selected_model}")
     # Convert seed to None if -1 (meaning random)
     if seed == -1:
         seed = None
     # Construct the messages array required by the API
     messages = [{"role": "system", "content": system_message}]
@@ -87,171 +76,234 @@ def respond(
     # Append the latest user message
     messages.append({"role": "user", "content": message})
-    # Decide which model to use:
-    # 1) If the user provided a custom model, use it.
-    # 2) Else if they chose a featured model, use it.
-    # 3) Otherwise, fall back to a default model.
-    if custom_model.strip() != "":
-        model_to_use = custom_model.strip()
-    elif selected_model is not None and selected_model.strip() != "":
-        model_to_use = selected_model.strip()
-    else:
-        model_to_use = "meta-llama/Llama-3.3-70B-Instruct"  # Default fallback
-    print(f"Model selected for inference: {model_to_use}")
     # Start with an empty string to build the response as tokens stream in
     response = ""
     print("Sending request to OpenAI API.")
-    # Make the streaming request to the HF Inference API via openai-like client
-    for message_chunk in client.chat.completions.create(
-        model=model_to_use,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-        frequency_penalty=frequency_penalty,
-        seed=seed,
-        messages=messages,
-    ):
-        # Extract the token text from the response chunk
-        token_text = message_chunk.choices[0].delta.content
-        print(f"Received token: {token_text}")
-        response += token_text
-        # Yield the partial response to Gradio so it can display in real-time
-        yield response
     print("Completed response generation.")
-########################
-# GRADIO APP LAYOUT
-########################
-# We’ll build a custom Blocks layout so we can have:
-#  - A Featured Models accordion with a search box
-#  - Our ChatInterface to handle the conversation
-#  - Additional sliders and textboxes for settings (like the original code)
-########################
 with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
-    gr.Markdown("## Serverless Text Generation Hub")
     gr.Markdown(
-        "An all-in-one UI for chatting with text-generation models on Hugging Face's Inference API."
-    )
-    # We keep a Chatbot component for the conversation display
-    chatbot = gr.Chatbot(height=600, label="Chat Preview")
-    # Textbox for system message
-    system_message_box = gr.Textbox(
-        value="",
-        label="System Message",
-        placeholder="Enter a system prompt if you want (optional).",
-    )
-    # Slider for max_tokens
-    max_tokens_slider = gr.Slider(
-        minimum=1,
-        maximum=4096,
-        value=512,
-        step=1,
-        label="Max new tokens",
-    )
-    # Slider for temperature
-    temperature_slider = gr.Slider(
-        minimum=0.1,
-        maximum=4.0,
-        value=0.7,
-        step=0.1,
-        label="Temperature",
-    )
-    # Slider for top_p
-    top_p_slider = gr.Slider(
-        minimum=0.1,
-        maximum=1.0,
-        value=0.95,
-        step=0.05,
-        label="Top-P",
-    )
-    # Slider for frequency penalty
-    freq_penalty_slider = gr.Slider(
-        minimum=-2.0,
-        maximum=2.0,
-        value=0.0,
-        step=0.1,
-        label="Frequency Penalty",
-    )
-    # Slider for seed
-    seed_slider = gr.Slider(
-        minimum=-1,
-        maximum=65535,  # Arbitrary upper limit for demonstration
-        value=-1,
-        step=1,
-        label="Seed (-1 for random)",
-    )
-    # Custom Model textbox
-    custom_model_box = gr.Textbox(
-        value="",
-        label="Custom Model",
-        info="(Optional) Provide a custom Hugging Face model path. This will override the selected Featured Model if not empty."
     )
-    # Accordion for featured models
-    with gr.Accordion("Featured Models", open=False):
-        # Textbox for filtering the featured models
-        model_search_box = gr.Textbox(
-            label="Filter Models",
-            placeholder="Search for a featured model...",
-            lines=1,
         )
-        # Radio for selecting the desired model
-        featured_models_radio = gr.Radio(
-            label="Select a featured model below",
-            choices=models_list,  # Start with the entire list
-            value=None,           # No default
-            interactive=True
         )
-        # We connect the model_search_box to the filter function
-        model_search_box.change(
-            filter_featured_models,
-            inputs=model_search_box,
-            outputs=featured_models_radio
         )
-    # Now we create our ChatInterface
-    # We pass all the extra components as additional_inputs
-    interface = gr.ChatInterface(
-        fn=respond,
-        chatbot=chatbot,
-        additional_inputs=[
-            system_message_box,
-            max_tokens_slider,
-            temperature_slider,
-            top_p_slider,
-            freq_penalty_slider,
-            seed_slider,
-            custom_model_box,
-            featured_models_radio
         ],
-        theme="Nymbo/Nymbo_Theme",
-        title="Serverless TextGen Hub with Featured Models",
-        description=(
-            "Use the sliders and textboxes to control generation parameters. "
-            "Pick a model from 'Featured Models' or specify a custom model path."
-        ),
-        # Fill the screen height
-        fill_height=True
     )
-# If you want the script to be directly executable, launch the demo here:
-if __name__ == "__main__":
-    print("Launching the demo application...")
-    demo.launch()

 )
 print("OpenAI client initialized.")
 def respond(
     message,
     history: list[tuple[str, str]],
     frequency_penalty,
     seed,
     custom_model,
+    selected_featured_model
 ):
     """
     This function handles the chatbot response. It takes in:
     - top_p: top-p (nucleus) sampling
     - frequency_penalty: penalize repeated tokens in the output
     - seed: a fixed seed for reproducibility; -1 will mean 'random'
+    - custom_model: the user-provided custom model name (if any)
+    - selected_featured_model: the model selected from featured models
     """
     print(f"Received message: {message}")
     print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
     print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
     print(f"Custom model: {custom_model}")
+    print(f"Selected featured model: {selected_featured_model}")
     # Convert seed to None if -1 (meaning random)
     if seed == -1:
         seed = None
+    # Determine which model to use: either custom_model or selected featured model
+    if custom_model.strip() != "":
+        model_to_use = custom_model.strip()
+        print(f"Using Custom Model: {model_to_use}")
+    else:
+        model_to_use = selected_featured_model
+        print(f"Using Featured Model: {model_to_use}")
     # Construct the messages array required by the API
     messages = [{"role": "system", "content": system_message}]
     # Append the latest user message
     messages.append({"role": "user", "content": message})
     # Start with an empty string to build the response as tokens stream in
     response = ""
     print("Sending request to OpenAI API.")
+    try:
+        # Make the streaming request to the HF Inference API via openai-like client
+        for message_chunk in client.chat.completions.create(
+            model=model_to_use,              # Use either the user-provided custom model or selected featured model
+            max_tokens=max_tokens,
+            stream=True,                     # Stream the response
+            temperature=temperature,
+            top_p=top_p,
+            frequency_penalty=frequency_penalty,
+            seed=seed,
+            messages=messages,
+        ):
+            # Extract the token text from the response chunk
+            token_text = message_chunk.choices[0].delta.content
+            print(f"Received token: {token_text}")
+            response += token_text
+            # Yield the partial response to Gradio so it can display in real-time
+            yield response
+    except Exception as e:
+        print(f"Error during API call: {e}")
+        yield f"An error occurred: {e}"
     print("Completed response generation.")
+# Create a Chatbot component with a specified height
+chatbot = gr.Chatbot(height=600)
+print("Chatbot interface created.")
+# Placeholder featured models list
+FEATURED_MODELS_LIST = [
+    "gpt-3.5-turbo",
+    "gpt-4",
+    "bert-base-uncased",
+    "facebook/blenderbot-3B",
+    "EleutherAI/gpt-neo-2.7B",
+    "google/flan-t5-xxl",
+    "microsoft/DialoGPT-large",
+    "Salesforce/codegen-16B-multi",
+    "stabilityai/stablelm-tuned-alpha-7b",
+    "bigscience/bloom-560m",
+]
+# Define the Gradio Blocks interface
 with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
+    gr.Markdown("# Serverless-TextGen-Hub 📝🤖")
     gr.Markdown(
+        """
+        Welcome to the **Serverless-TextGen-Hub**! Chat with your favorite models seamlessly.
+        """
     )
+    with gr.Row():
+        # Chatbot component
+        chatbot_component = gr.Chatbot(height=600)
+    with gr.Row():
+        # System message input
+        system_message = gr.Textbox(
+            value="You are a helpful assistant.",
+            label="System Message",
+            placeholder="Enter system message here...",
+            lines=2,
         )
+    with gr.Row():
+        # User message input
+        user_message = gr.Textbox(
+            label="Your Message",
+            placeholder="Type your message here...",
+            lines=2,
         )
+        # Run button
+        run_button = gr.Button("Send", variant="primary")
+    with gr.Row():
+        # Additional settings
+        with gr.Column(scale=1):
+            max_tokens = gr.Slider(
+                minimum=1,
+                maximum=4096,
+                value=512,
+                step=1,
+                label="Max New Tokens",
+            )
+            temperature = gr.Slider(
+                minimum=0.1,
+                maximum=4.0,
+                value=0.7,
+                step=0.1,
+                label="Temperature",
+            )
+            top_p = gr.Slider(
+                minimum=0.1,
+                maximum=1.0,
+                value=0.95,
+                step=0.05,
+                label="Top-P",
+            )
+            frequency_penalty = gr.Slider(
+                minimum=-2.0,
+                maximum=2.0,
+                value=0.0,
+                step=0.1,
+                label="Frequency Penalty",
+            )
+            seed = gr.Slider(
+                minimum=-1,
+                maximum=65535,  # Arbitrary upper limit for demonstration
+                value=-1,
+                step=1,
+                label="Seed (-1 for random)",
+            )
+            custom_model = gr.Textbox(
+                value="",
+                label="Custom Model",
+                info="(Optional) Provide a custom Hugging Face model path. This will override the selected featured model if not empty.",
+                placeholder="e.g., meta-llama/Llama-3.3-70B-Instruct",
+            )
+    with gr.Accordion("Featured Models", open=True):
+        with gr.Column():
+            model_search = gr.Textbox(
+                label="Filter Models",
+                placeholder="Search for a featured model...",
+                lines=1,
+            )
+            featured_model = gr.Radio(
+                label="Select a model below",
+                value=FEATURED_MODELS_LIST[0],
+                choices=FEATURED_MODELS_LIST,
+                interactive=True,
+            )
+    # Function to filter featured models based on search input
+    def filter_featured_models(search_term):
+        if not search_term:
+            return gr.update(choices=FEATURED_MODELS_LIST, value=FEATURED_MODELS_LIST[0])
+        filtered = [model for model in FEATURED_MODELS_LIST if search_term.lower() in model.lower()]
+        if not filtered:
+            return gr.update(choices=[], value=None)
+        return gr.update(choices=filtered, value=filtered[0])
+    # Update featured_model choices based on search
+    model_search.change(
+        fn=filter_featured_models,
+        inputs=model_search,
+        outputs=featured_model,
+    )
+    # Function to handle the chatbot response
+    def handle_response(message, history, system_msg, max_tok, temp, tp, freq_pen, sd, custom_mod, selected_feat_mod):
+        # Append user message to history
+        history = history or []
+        history.append((message, None))
+        # Generate response using the respond function
+        response = respond(
+            message=message,
+            history=history,
+            system_message=system_msg,
+            max_tokens=max_tok,
+            temperature=temp,
+            top_p=tp,
+            frequency_penalty=freq_pen,
+            seed=sd,
+            custom_model=custom_mod,
+            selected_featured_model=selected_feat_mod,
         )
+        return response, history + [(message, response)]
+    # Handle button click
+    run_button.click(
+        fn=handle_response,
+        inputs=[
+            user_message,
+            chatbot_component,        # history
+            system_message,
+            max_tokens,
+            temperature,
+            top_p,
+            frequency_penalty,
+            seed,
+            custom_model,
+            featured_model,
+        ],
+        outputs=[
+            chatbot_component,
+            chatbot_component,        # Updated history
+        ],
+    )
+    # Allow pressing Enter to send the message
+    user_message.submit(
+        fn=handle_response,
+        inputs=[
+            user_message,
+            chatbot_component,        # history
+            system_message,
+            max_tokens,
+            temperature,
+            top_p,
+            frequency_penalty,
+            seed,
+            custom_model,
+            featured_model,
+        ],
+        outputs=[
+            chatbot_component,
+            chatbot_component,        # Updated history
         ],
     )
+    # Custom CSS to enhance the UI
+    demo.load(lambda: None, None, None, _js="""
+    () => {
+        const style = document.createElement('style');
+        style.innerHTML = `
+            footer {visibility: hidden !important;}
+            .gradio-container {background-color: #f9f9f9;}
+        `;
+        document.head.appendChild(style);
+    }
+    """)
+print("Launching Gradio interface...")  # Debug log
+# Launch the Gradio interface without showing the API or sharing externally
+demo.launch(show_api=False, share=False)