Spaces:

Nymbo
/

Serverless-TextGen-Hub

Running

App Files Files Community

Nymbo commited on Jan 4

Commit

7de1759

verified ·

1 Parent(s): 77298b9

Update app.py

Browse files

Files changed (1) hide show

app.py +234 -253

app.py CHANGED Viewed

@@ -1,10 +1,6 @@
 import gradio as gr
-import os
 from openai import OpenAI
-################################################
-#                INITIAL SETUP
-################################################
 # Retrieve the access token from the environment variable
 ACCESS_TOKEN = os.getenv("HF_TOKEN")
@@ -17,11 +13,10 @@ client = OpenAI(
 )
 print("OpenAI client initialized.")
-# Our main response-generating function
 def respond(
     user_message,
-    history,
-    system_message,
     max_tokens,
     temperature,
     top_p,
@@ -32,298 +27,284 @@ def respond(
 ):
     """
     This function handles the chatbot response. It takes in:
-    - user_message: the user's new message
-    - history: the list of previous messages, each as [user_text, assistant_text]
-    - system_message: the system prompt
-    - max_tokens: the maximum number of tokens to generate in the response
     - temperature: sampling temperature
     - top_p: top-p (nucleus) sampling
     - frequency_penalty: penalize repeated tokens in the output
-    - seed: a fixed seed for reproducibility; -1 will mean 'random'
-    - featured_model: the user-chosen model from the radio button
-    - custom_model: a user-specified custom model that overrides featured_model if not empty
     """
-    print(f"New user message: {user_message}")
-    print(f"History so far: {history}")
-    print(f"System message: {system_message}")
-    print(f"max_tokens: {max_tokens}, temperature: {temperature}, top_p: {top_p}")
-    print(f"frequency_penalty: {frequency_penalty}, seed: {seed}")
-    print(f"Featured Model: {featured_model}")
-    print(f"Custom Model: {custom_model}")
-    # Convert seed to None if -1 (meaning random)
     if seed == -1:
         seed = None
-    # Determine which model to use
-    # If the user typed something in custom_model, that overrides the featured model
-    # Otherwise we use the model selected in the radio. If neither, default to the example "meta-llama..."
-    model_to_use = None
-    if custom_model.strip():
-        model_to_use = custom_model.strip()
-    elif featured_model is not None and featured_model.strip():
-        model_to_use = featured_model.strip()
-    else:
         model_to_use = "meta-llama/Llama-3.3-70B-Instruct"
     print(f"Model selected for inference: {model_to_use}")
-    # Construct the conversation messages for the HF Inference API
-    messages = [{"role": "system", "content": system_message}]
-    for user_text, assistant_text in history:
         if user_text:
             messages.append({"role": "user", "content": user_text})
         if assistant_text:
             messages.append({"role": "assistant", "content": assistant_text})
     messages.append({"role": "user", "content": user_message})
-    # We'll collect and stream the response
     response_so_far = ""
     # Make the streaming request to the HF Inference API
-    print("Sending request to OpenAI/Hugging Face Inference API...")
-    for message_chunk in client.chat.completions.create(
-        model=model_to_use,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-        frequency_penalty=frequency_penalty,
-        seed=seed,
-        messages=messages,
-    ):
-        # The content for the partial chunk
-        token_text = message_chunk.choices[0].delta.content
-        response_so_far += token_text
-        # Return partial response to Gradio to display in real-time
-        yield response_so_far
     print("Completed response generation.")
-################################################
-#          GRADIO UI + STATE MANAGEMENT
-################################################
-def user_submit(user_message, history):
-    """
-    This function is called when the user sends a message.
-    We simply add the user message to the conversation history.
-    """
-    print("user_submit triggered.")
-    # Append the new user message to history
-    if not history:
-        history = []
-    history = history + [[user_message, None]]
-    return history, ""
-def bot_reply(history, system_message, max_tokens, temperature, top_p,
-              frequency_penalty, seed, featured_model, custom_model):
-    """
-    This function is triggered to produce the bot's response after the user has submitted.
-    We call 'respond' for streaming text.
-    """
-    print("bot_reply triggered.")
-    # The last conversation item has user_message, None
-    user_message = history[-1][0]
-    # We will stream the partial responses from 'respond'
-    bot_response = respond(
-        user_message=user_message,
-        history=history[:-1],  # all items except the last, because we pass the last user msg separately
-        system_message=system_message,
-        max_tokens=max_tokens,
-        temperature=temperature,
-        top_p=top_p,
-        frequency_penalty=frequency_penalty,
-        seed=seed,
-        featured_model=featured_model,
-        custom_model=custom_model
-    )
-    # As we yield from the generator, we update the last item in history with the partial response
-    # Gradio streaming logic: yield the partial updates as they come in
-    for partial_text in bot_response:
-        history[-1][1] = partial_text
-        yield history
-# We define a small list of placeholder featured models for demonstration
 models_list = [
-    "meta-llama/Llama-2-13B-Chat-hf",
     "bigscience/bloom",
-    "EleutherAI/gpt-neo-2.7B",
-    "meta-llama/Llama-3.3-70B-Instruct"
 ]
 def filter_models(search_term):
-    """
-    Filter function triggered when user types in the model_search box.
-    Returns an updated list of models that contain the search term.
-    """
     filtered = [m for m in models_list if search_term.lower() in m.lower()]
     return gr.update(choices=filtered)
-################################################
-#        BUILDING THE GRADIO LAYOUT
-################################################
-with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
-    gr.Markdown(
-        """
-        # Serverless-TextGen-Hub
-        **A UI for text generation using Hugging Face's Inference API.**
-        Below is a simple chat interface. You can pick from **Featured Models** or specify a **Custom Model**
-        to override the choice. If you're not sure, just use the default.
-        """
-    )
-    # State to hold the conversation history, will be a list of [user, bot]
-    conversation_state = gr.State([])
-    # Row for system message + advanced settings
-    with gr.Accordion("Advanced Settings", open=False):
-        system_message = gr.Textbox(
-            label="System Message",
-            value="You are a helpful assistant.",
-            lines=2,
-            info="Provides background or personality instructions to the model."
-        )
-        max_tokens = gr.Slider(
-            minimum=1,
-            maximum=4096,
-            value=512,
-            step=1,
-            label="Max new tokens"
-        )
-        temperature = gr.Slider(
-            minimum=0.1,
-            maximum=4.0,
-            value=0.7,
-            step=0.1,
-            label="Temperature"
-        )
-        top_p = gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-P"
-        )
-        frequency_penalty = gr.Slider(
-            minimum=-2.0,
-            maximum=2.0,
-            value=0.0,
-            step=0.1,
-            label="Frequency Penalty"
-        )
-        seed = gr.Slider(
-            minimum=-1,
-            maximum=65535,
-            value=-1,
-            step=1,
-            label="Seed (-1 for random)"
-        )
-    # Featured Models + filtering
-    with gr.Accordion("Featured Models", open=False):
-        model_search = gr.Textbox(
-            label="Filter Models",
-            placeholder="Search for a featured model...",
-            lines=1
-        )
-        featured_model_radio = gr.Radio(
-            label="Select a featured model below",
-            choices=models_list,
-            value=models_list[0],  # default selection
-            interactive=True
-        )
-        model_search.change(
-            filter_models,
-            inputs=model_search,
-            outputs=featured_model_radio
         )
-    # This is the Custom Model box (overrides Featured Models if not empty)
-    custom_model = gr.Textbox(
-        label="Custom Model",
-        value="",
-        info="(Optional) Provide a custom HF model path. If not empty, it overrides the Featured Model."
     )
-    # The main Chatbot interface
-    chatbot = gr.Chatbot(height=600)
-    # Textbox for the user to type a new message
-    with gr.Row():
-        user_input = gr.Textbox(
-            show_label=False,
-            placeholder="Type your message here (press enter or click 'Submit')",
-            lines=2
-        )
-        submit_btn = gr.Button("Submit", variant="primary")
-    # The user submits -> we update the conversation state
-    submit_btn.click(
-        fn=user_submit,
-        inputs=[user_input, conversation_state],
-        outputs=[conversation_state, user_input],
     )
-    # Then the bot replies, streaming the output
-    # We pass all required arguments from the advanced settings, plus the model selection boxes
-    submit_btn.click(
-        fn=bot_reply,
         inputs=[
-            conversation_state,
-            system_message,
-            max_tokens,
-            temperature,
-            top_p,
-            frequency_penalty,
-            seed,
-            featured_model_radio,
-            custom_model
         ],
-        outputs=[chatbot],
-        # 'bot_reply' is a generator, so we set streaming=True:
-        queue=True
     )
-    # We also allow pressing Enter in user_input to do the same thing
-    user_input.submit(
-        fn=user_submit,
-        inputs=[user_input, conversation_state],
-        outputs=[conversation_state, user_input],
-    )
-    user_input.submit(
-        fn=bot_reply,
-        inputs=[
-            conversation_state,
-            system_message,
-            max_tokens,
-            temperature,
-            top_p,
-            frequency_penalty,
-            seed,
-            featured_model_radio,
-            custom_model
-        ],
-        outputs=[chatbot],
-        queue=True
-    )
-    gr.HTML("""
-    <br>
-    <p style='text-align:center;'>
-        Developed by <strong>Nymbo</strong>.
-        Powered by <strong>Hugging Face Inference API</strong>.
-    </p>
-    """)
-# Finally, launch the app
 if __name__ == "__main__":
-    print("Launching the Serverless-TextGen-Hub application...")
     demo.launch()

 import gradio as gr
 from openai import OpenAI
+import os
 # Retrieve the access token from the environment variable
 ACCESS_TOKEN = os.getenv("HF_TOKEN")
 )
 print("OpenAI client initialized.")
 def respond(
     user_message,
+    chat_history,
+    system_msg,
     max_tokens,
     temperature,
     top_p,
 ):
     """
     This function handles the chatbot response. It takes in:
+    - user_message: the user's newly typed message
+    - chat_history: the list of (user, assistant) message pairs
+    - system_msg: the system instruction or system-level context
+    - max_tokens: the maximum number of tokens to generate
     - temperature: sampling temperature
     - top_p: top-p (nucleus) sampling
     - frequency_penalty: penalize repeated tokens in the output
+    - seed: a fixed seed for reproducibility; -1 means 'random'
+    - featured_model: the chosen model name from 'Featured Models' radio
+    - custom_model: the optional custom model that overrides the featured one if provided
     """
+    print(f"Received user message: {user_message}")
+    print(f"System message: {system_msg}")
+    print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}, Freq-Penalty: {frequency_penalty}, Seed: {seed}")
+    print(f"Featured model: {featured_model}")
+    print(f"Custom model: {custom_model}")
+    # Convert the seed to None if user set it to -1 (meaning random)
     if seed == -1:
         seed = None
+    # Decide which model to actually use
+    # If custom_model is non-empty, use that; otherwise use the chosen featured_model
+    model_to_use = custom_model.strip() if custom_model.strip() != "" else featured_model
+    # Provide a default fallback if for some reason both are empty
+    if model_to_use.strip() == "":
         model_to_use = "meta-llama/Llama-3.3-70B-Instruct"
     print(f"Model selected for inference: {model_to_use}")
+    # Construct the conversation history in the format required by HF's Inference API
+    messages = []
+    if system_msg.strip():
+        messages.append({"role": "system", "content": system_msg.strip()})
+    # Add the conversation history
+    for user_text, assistant_text in chat_history:
         if user_text:
             messages.append({"role": "user", "content": user_text})
         if assistant_text:
             messages.append({"role": "assistant", "content": assistant_text})
+    # Add the new user message to the conversation
     messages.append({"role": "user", "content": user_message})
+    # We'll build the response token-by-token in a streaming loop
     response_so_far = ""
+    print("Sending request to the Hugging Face Inference API...")
     # Make the streaming request to the HF Inference API
+    try:
+        for resp_chunk in client.chat.completions.create(
+            model=model_to_use,
+            max_tokens=max_tokens,
+            stream=True,
+            temperature=temperature,
+            top_p=top_p,
+            frequency_penalty=frequency_penalty,
+            seed=seed,
+            messages=messages,
+        ):
+            token_text = resp_chunk.choices[0].delta.content
+            response_so_far += token_text
+            # We yield back the updated message to display partial progress in the chatbot
+            yield response_so_far
+    except Exception as e:
+        # If there's an error, let's at least show it in the chat
+        error_text = f"[ERROR] {str(e)}"
+        print(error_text)
+        yield response_so_far + "\n\n" + error_text
     print("Completed response generation.")
+#
+# BUILDING THE GRADIO INTERFACE BELOW
+#
+# List of featured models; adjust or replace these placeholders with real text-generation models
 models_list = [
+    "meta-llama/Llama-3.3-70B-Instruct",
+    "meta-llama/Llama-2-13B-chat-hf",
     "bigscience/bloom",
+    "openlm-research/open_llama_7b",
+    "facebook/opt-6.7b",
+    "google/flan-t5-xxl",
 ]
 def filter_models(search_term):
+    """Filters the models_list by the given search_term and returns an update for the Radio component."""
     filtered = [m for m in models_list if search_term.lower() in m.lower()]
     return gr.update(choices=filtered)
+with gr.Blocks(theme="Nymbo/Nymbo_Theme_5") as demo:
+    gr.Markdown("# Serverless-TextGen-Hub (Enhanced)")
+    gr.Markdown("**A comprehensive UI for text generation with a featured-models dropdown and a custom override**.")
+    # We keep track of the conversation in a Gradio state variable (list of tuples)
+    chat_history = gr.State([])
+    # Tabs for organization
+    with gr.Tab("Basic Settings"):
+        with gr.Row():
+            with gr.Column(elem_id="prompt-container"):
+                # System Message
+                system_msg = gr.Textbox(
+                    label="System message",
+                    placeholder="Enter system-level instructions or context here.",
+                    lines=2
+                )
+                # Accordion for featured models
+                with gr.Accordion("Featured Models", open=True):
+                    model_search = gr.Textbox(
+                        label="Filter Models",
+                        placeholder="Search for a featured model...",
+                        lines=1
+                    )
+                    # The radio that lists our featured models
+                    model_radio = gr.Radio(
+                        label="Select a featured model below",
+                        choices=models_list,
+                        value=models_list[0],  # default
+                        interactive=True
+                    )
+                    # Link the search box to update the model_radio choices
+                    model_search.change(filter_models, inputs=model_search, outputs=model_radio)
+                # Custom Model
+                custom_model_box = gr.Textbox(
+                    label="Custom Model (Optional)",
+                    info="If provided, overrides the featured model above. e.g. 'meta-llama/Llama-3.3-70B-Instruct'",
+                    placeholder="Your huggingface.co/username/model_name path"
+                )
+    with gr.Tab("Advanced Settings"):
+        with gr.Row():
+            max_tokens_slider = gr.Slider(
+                minimum=1,
+                maximum=4096,
+                value=512,
+                step=1,
+                label="Max new tokens"
+            )
+            temperature_slider = gr.Slider(
+                minimum=0.1,
+                maximum=4.0,
+                value=0.7,
+                step=0.1,
+                label="Temperature"
+            )
+            top_p_slider = gr.Slider(
+                minimum=0.1,
+                maximum=1.0,
+                value=0.95,
+                step=0.05,
+                label="Top-P"
+            )
+        with gr.Row():
+            freq_penalty_slider = gr.Slider(
+                minimum=-2.0,
+                maximum=2.0,
+                value=0.0,
+                step=0.1,
+                label="Frequency Penalty"
+            )
+            seed_slider = gr.Slider(
+                minimum=-1,
+                maximum=65535,
+                value=-1,
+                step=1,
+                label="Seed (-1 for random)"
+            )
+    # Chat interface area: user input -> assistant output
+    with gr.Row():
+        chatbot = gr.Chatbot(
+            label="TextGen Chat",
+            height=500
         )
+    # The user types a message here
+    user_input = gr.Textbox(
+        label="Your message",
+        placeholder="Type your text prompt here..."
     )
+    # "Send" button triggers our respond() function, updates the chatbot
+    send_button = gr.Button("Send")
+    # A Clear Chat button to reset the conversation
+    clear_button = gr.Button("Clear Chat")
+    # Define how the Send button updates the state and chatbot
+    def user_submission(user_text, history):
+        """
+        This function gets called first to add the user's message to the chat.
+        We return the updated chat_history with the user's message appended,
+        plus an empty string for the next user input box.
+        """
+        if user_text.strip() == "":
+            return history, ""
+        # Append user message to chat
+        history = history + [(user_text, None)]
+        return history, ""
+    send_button.click(
+        fn=user_submission,
+        inputs=[user_input, chat_history],
+        outputs=[chat_history, user_input]
     )
+    # Then we run the respond function (streaming) to generate the assistant message
+    def bot_response(
+        history,
+        system_msg,
+        max_tokens,
+        temperature,
+        top_p,
+        freq_penalty,
+        seed,
+        featured_model,
+        custom_model
+    ):
+        """
+        This function is called to generate the assistant's response
+        based on the conversation so far, system message, etc.
+        We do the streaming here.
+        """
+        if not history:
+            yield history
+        # The last user message is in history[-1][0]
+        user_message = history[-1][0] if history else ""
+        # We pass everything to respond() generator
+        bot_stream = respond(
+            user_message=user_message,
+            chat_history=history[:-1],  # all except the newly appended user message
+            system_msg=system_msg,
+            max_tokens=max_tokens,
+            temperature=temperature,
+            top_p=top_p,
+            frequency_penalty=freq_penalty,
+            seed=seed,
+            featured_model=featured_model,
+            custom_model=custom_model
+        )
+        partial_text = ""
+        for partial_text in bot_stream:
+            # We'll keep updating the last message in the conversation with partial_text
+            updated_history = history[:-1] + [(history[-1][0], partial_text)]
+            yield updated_history
+    send_button.click(
+        fn=bot_response,
         inputs=[
+            chat_history,
+            system_msg,
+            max_tokens_slider,
+            temperature_slider,
+            top_p_slider,
+            freq_penalty_slider,
+            seed_slider,
+            model_radio,
+            custom_model_box
         ],
+        outputs=chatbot
     )
+    # Clear chat just resets the state
+    def clear_chat():
+        return [], ""
+    clear_button.click(
+        fn=clear_chat,
+        inputs=[],
+        outputs=[chat_history, user_input]
+    )
+# Launch the application
 if __name__ == "__main__":
+    print("Launching the Serverless-TextGen-Hub with Featured Models & Custom Model override.")
     demo.launch()