Spaces:
				
			
			
	
			
			
					
		Running
		
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
	Update app.py
Browse files
    	
        app.py
    CHANGED
    
    | @@ -2,6 +2,10 @@ import gradio as gr | |
| 2 | 
             
            from openai import OpenAI
         | 
| 3 | 
             
            import os
         | 
| 4 |  | 
|  | |
|  | |
|  | |
|  | |
| 5 | 
             
            # Retrieve the access token from the environment variable
         | 
| 6 | 
             
            ACCESS_TOKEN = os.getenv("HF_TOKEN")
         | 
| 7 | 
             
            print("Access token loaded.")
         | 
| @@ -13,6 +17,28 @@ client = OpenAI( | |
| 13 | 
             
            )
         | 
| 14 | 
             
            print("OpenAI client initialized.")
         | 
| 15 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 16 | 
             
            def respond(
         | 
| 17 | 
             
                message,
         | 
| 18 | 
             
                history: list[tuple[str, str]],
         | 
| @@ -22,34 +48,32 @@ def respond( | |
| 22 | 
             
                top_p,
         | 
| 23 | 
             
                frequency_penalty,
         | 
| 24 | 
             
                seed,
         | 
| 25 | 
            -
                custom_model
         | 
|  | |
| 26 | 
             
            ):
         | 
| 27 | 
             
                """
         | 
| 28 | 
             
                This function handles the chatbot response. It takes in:
         | 
| 29 | 
             
                - message: the user's new message
         | 
| 30 | 
             
                - history: the list of previous messages, each as a tuple (user_msg, assistant_msg)
         | 
| 31 | 
             
                - system_message: the system prompt
         | 
| 32 | 
            -
                - max_tokens | 
| 33 | 
            -
                -  | 
| 34 | 
            -
                -  | 
| 35 | 
            -
                - frequency_penalty: penalize repeated tokens in the output
         | 
| 36 | 
            -
                - seed: a fixed seed for reproducibility; -1 will mean 'random'
         | 
| 37 | 
            -
                - custom_model: the user-provided custom model name (if any)
         | 
| 38 | 
             
                """
         | 
| 39 | 
            -
             | 
| 40 | 
             
                print(f"Received message: {message}")
         | 
| 41 | 
             
                print(f"History: {history}")
         | 
| 42 | 
             
                print(f"System message: {system_message}")
         | 
| 43 | 
             
                print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
         | 
| 44 | 
             
                print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
         | 
| 45 | 
             
                print(f"Custom model: {custom_model}")
         | 
|  | |
| 46 |  | 
| 47 | 
             
                # Convert seed to None if -1 (meaning random)
         | 
| 48 | 
             
                if seed == -1:
         | 
| 49 | 
             
                    seed = None
         | 
| 50 |  | 
| 51 | 
             
                # Construct the messages array required by the API
         | 
| 52 | 
            -
                messages = [{"role": "system", "content": system_message}]
         | 
| 53 |  | 
| 54 | 
             
                # Add conversation history to the context
         | 
| 55 | 
             
                for val in history:
         | 
| @@ -65,19 +89,27 @@ def respond( | |
| 65 | 
             
                # Append the latest user message
         | 
| 66 | 
             
                messages.append({"role": "user", "content": message})
         | 
| 67 |  | 
| 68 | 
            -
                # Determine which model to use: | 
| 69 | 
            -
                 | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 70 | 
             
                print(f"Model selected for inference: {model_to_use}")
         | 
| 71 |  | 
| 72 | 
            -
                # Start  | 
| 73 | 
             
                response = ""
         | 
| 74 | 
             
                print("Sending request to OpenAI API.")
         | 
| 75 |  | 
| 76 | 
             
                # Make the streaming request to the HF Inference API via openai-like client
         | 
| 77 | 
             
                for message_chunk in client.chat.completions.create(
         | 
| 78 | 
            -
                    model=model_to_use, | 
| 79 | 
             
                    max_tokens=max_tokens,
         | 
| 80 | 
            -
                    stream=True, | 
| 81 | 
             
                    temperature=temperature,
         | 
| 82 | 
             
                    top_p=top_p,
         | 
| 83 | 
             
                    frequency_penalty=frequency_penalty,
         | 
| @@ -86,70 +118,168 @@ def respond( | |
| 86 | 
             
                ):
         | 
| 87 | 
             
                    # Extract the token text from the response chunk
         | 
| 88 | 
             
                    token_text = message_chunk.choices[0].delta.content
         | 
| 89 | 
            -
                    print(f"Received token: {token_text}")
         | 
| 90 | 
             
                    response += token_text
         | 
| 91 | 
             
                    # Yield the partial response to Gradio so it can display in real-time
         | 
| 92 | 
             
                    yield response
         | 
| 93 |  | 
| 94 | 
             
                print("Completed response generation.")
         | 
| 95 |  | 
| 96 | 
            -
            #  | 
| 97 | 
            -
             | 
| 98 | 
            -
             | 
| 99 | 
            -
             | 
| 100 | 
            -
             | 
| 101 | 
            -
             | 
| 102 | 
            -
             | 
| 103 | 
            -
             | 
| 104 | 
            -
             | 
| 105 | 
            -
             | 
| 106 | 
            -
             | 
| 107 | 
            -
             | 
| 108 | 
            -
             | 
| 109 | 
            -
             | 
| 110 | 
            -
             | 
| 111 | 
            -
             | 
| 112 | 
            -
                    ) | 
| 113 | 
            -
             | 
| 114 | 
            -
             | 
| 115 | 
            -
                         | 
| 116 | 
            -
                         | 
| 117 | 
            -
                         | 
| 118 | 
            -
                         | 
| 119 | 
            -
             | 
| 120 | 
            -
             | 
| 121 | 
            -
             | 
| 122 | 
            -
             | 
| 123 | 
            -
             | 
| 124 | 
            -
             | 
| 125 | 
            -
             | 
| 126 | 
            -
             | 
| 127 | 
            -
             | 
| 128 | 
            -
             | 
| 129 | 
            -
             | 
| 130 | 
            -
             | 
| 131 | 
            -
             | 
| 132 | 
            -
             | 
| 133 | 
            -
             | 
| 134 | 
            -
             | 
| 135 | 
            -
                         | 
| 136 | 
            -
                         | 
| 137 | 
            -
             | 
| 138 | 
            -
             | 
| 139 | 
            -
                         | 
| 140 | 
            -
             | 
| 141 | 
            -
             | 
| 142 | 
            -
             | 
| 143 | 
            -
             | 
| 144 | 
            -
                         | 
| 145 | 
            -
             | 
| 146 | 
            -
             | 
| 147 | 
            -
             | 
| 148 | 
            -
             | 
| 149 | 
            -
             | 
| 150 | 
            -
            )
         | 
| 151 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 152 |  | 
|  | |
| 153 | 
             
            if __name__ == "__main__":
         | 
| 154 | 
            -
                print(" | 
| 155 | 
            -
                 | 
|  | 
|  | |
| 2 | 
             
            from openai import OpenAI
         | 
| 3 | 
             
            import os
         | 
| 4 |  | 
| 5 | 
            +
            # =============================
         | 
| 6 | 
            +
            #     GLOBAL SETUP / CLIENT
         | 
| 7 | 
            +
            # =============================
         | 
| 8 | 
            +
             | 
| 9 | 
             
            # Retrieve the access token from the environment variable
         | 
| 10 | 
             
            ACCESS_TOKEN = os.getenv("HF_TOKEN")
         | 
| 11 | 
             
            print("Access token loaded.")
         | 
|  | |
| 17 | 
             
            )
         | 
| 18 | 
             
            print("OpenAI client initialized.")
         | 
| 19 |  | 
| 20 | 
            +
            # =============================
         | 
| 21 | 
            +
            #     MODEL CONFIG / LOGIC
         | 
| 22 | 
            +
            # =============================
         | 
| 23 | 
            +
             | 
| 24 | 
            +
            # Sample placeholder list of "featured" models for demonstration
         | 
| 25 | 
            +
            featured_models_list = [
         | 
| 26 | 
            +
                "meta-llama/Llama-2-13B-chat-hf",
         | 
| 27 | 
            +
                "bigscience/bloom",
         | 
| 28 | 
            +
                "microsoft/DialoGPT-large",
         | 
| 29 | 
            +
                "OpenAssistant/oasst-sft-1-pythia-12b",
         | 
| 30 | 
            +
                "tiiuae/falcon-7b-instruct",
         | 
| 31 | 
            +
                "meta-llama/Llama-3.3-70B-Instruct"
         | 
| 32 | 
            +
            ]
         | 
| 33 | 
            +
             | 
| 34 | 
            +
            def filter_featured_models(search_term: str):
         | 
| 35 | 
            +
                """
         | 
| 36 | 
            +
                Returns a list of models that contain the search term (case-insensitive).
         | 
| 37 | 
            +
                """
         | 
| 38 | 
            +
                filtered = [m for m in featured_models_list if search_term.lower() in m.lower()]
         | 
| 39 | 
            +
                return gr.update(choices=filtered)
         | 
| 40 | 
            +
             | 
| 41 | 
            +
             | 
| 42 | 
             
            def respond(
         | 
| 43 | 
             
                message,
         | 
| 44 | 
             
                history: list[tuple[str, str]],
         | 
|  | |
| 48 | 
             
                top_p,
         | 
| 49 | 
             
                frequency_penalty,
         | 
| 50 | 
             
                seed,
         | 
| 51 | 
            +
                custom_model,
         | 
| 52 | 
            +
                selected_featured_model
         | 
| 53 | 
             
            ):
         | 
| 54 | 
             
                """
         | 
| 55 | 
             
                This function handles the chatbot response. It takes in:
         | 
| 56 | 
             
                - message: the user's new message
         | 
| 57 | 
             
                - history: the list of previous messages, each as a tuple (user_msg, assistant_msg)
         | 
| 58 | 
             
                - system_message: the system prompt
         | 
| 59 | 
            +
                - max_tokens, temperature, top_p, frequency_penalty, seed: generation params
         | 
| 60 | 
            +
                - custom_model: user-provided custom model path/name
         | 
| 61 | 
            +
                - selected_featured_model: model chosen from the featured radio list
         | 
|  | |
|  | |
|  | |
| 62 | 
             
                """
         | 
|  | |
| 63 | 
             
                print(f"Received message: {message}")
         | 
| 64 | 
             
                print(f"History: {history}")
         | 
| 65 | 
             
                print(f"System message: {system_message}")
         | 
| 66 | 
             
                print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
         | 
| 67 | 
             
                print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
         | 
| 68 | 
             
                print(f"Custom model: {custom_model}")
         | 
| 69 | 
            +
                print(f"Selected featured model: {selected_featured_model}")
         | 
| 70 |  | 
| 71 | 
             
                # Convert seed to None if -1 (meaning random)
         | 
| 72 | 
             
                if seed == -1:
         | 
| 73 | 
             
                    seed = None
         | 
| 74 |  | 
| 75 | 
             
                # Construct the messages array required by the API
         | 
| 76 | 
            +
                messages = [{"role": "system", "content": system_message}] if system_message.strip() else []
         | 
| 77 |  | 
| 78 | 
             
                # Add conversation history to the context
         | 
| 79 | 
             
                for val in history:
         | 
|  | |
| 89 | 
             
                # Append the latest user message
         | 
| 90 | 
             
                messages.append({"role": "user", "content": message})
         | 
| 91 |  | 
| 92 | 
            +
                # Determine which model to use:
         | 
| 93 | 
            +
                # 1) If custom_model is non-empty, it overrides everything.
         | 
| 94 | 
            +
                # 2) Otherwise, use the selected featured model from the radio button if available.
         | 
| 95 | 
            +
                # 3) If both are empty, fall back to the default.
         | 
| 96 | 
            +
                model_to_use = "meta-llama/Llama-3.3-70B-Instruct"  # Default
         | 
| 97 | 
            +
                if custom_model.strip() != "":
         | 
| 98 | 
            +
                    model_to_use = custom_model.strip()
         | 
| 99 | 
            +
                elif selected_featured_model.strip() != "":
         | 
| 100 | 
            +
                    model_to_use = selected_featured_model.strip()
         | 
| 101 | 
            +
             | 
| 102 | 
             
                print(f"Model selected for inference: {model_to_use}")
         | 
| 103 |  | 
| 104 | 
            +
                # Start building the streaming response
         | 
| 105 | 
             
                response = ""
         | 
| 106 | 
             
                print("Sending request to OpenAI API.")
         | 
| 107 |  | 
| 108 | 
             
                # Make the streaming request to the HF Inference API via openai-like client
         | 
| 109 | 
             
                for message_chunk in client.chat.completions.create(
         | 
| 110 | 
            +
                    model=model_to_use,
         | 
| 111 | 
             
                    max_tokens=max_tokens,
         | 
| 112 | 
            +
                    stream=True,  # Stream the response
         | 
| 113 | 
             
                    temperature=temperature,
         | 
| 114 | 
             
                    top_p=top_p,
         | 
| 115 | 
             
                    frequency_penalty=frequency_penalty,
         | 
|  | |
| 118 | 
             
                ):
         | 
| 119 | 
             
                    # Extract the token text from the response chunk
         | 
| 120 | 
             
                    token_text = message_chunk.choices[0].delta.content
         | 
| 121 | 
            +
                    print(f"Received token: {token_text}", flush=True)
         | 
| 122 | 
             
                    response += token_text
         | 
| 123 | 
             
                    # Yield the partial response to Gradio so it can display in real-time
         | 
| 124 | 
             
                    yield response
         | 
| 125 |  | 
| 126 | 
             
                print("Completed response generation.")
         | 
| 127 |  | 
| 128 | 
            +
            # =============================
         | 
| 129 | 
            +
            #         MAIN UI
         | 
| 130 | 
            +
            # =============================
         | 
| 131 | 
            +
             | 
| 132 | 
            +
            def build_app():
         | 
| 133 | 
            +
                """
         | 
| 134 | 
            +
                Build the Gradio Blocks interface containing:
         | 
| 135 | 
            +
                  - A Chat tab (ChatInterface)
         | 
| 136 | 
            +
                  - A Featured Models tab
         | 
| 137 | 
            +
                  - An Information tab
         | 
| 138 | 
            +
                """
         | 
| 139 | 
            +
                with gr.Blocks(theme="Nymbo/Nymbo_Theme") as main_interface:
         | 
| 140 | 
            +
             | 
| 141 | 
            +
                    # We define a Gr.State to hold the user's chosen featured model
         | 
| 142 | 
            +
                    selected_featured_model_state = gr.State("")
         | 
| 143 | 
            +
             | 
| 144 | 
            +
                    with gr.Tab("Chat Interface"):
         | 
| 145 | 
            +
                        gr.Markdown("## Serverless-TextGen-Hub")
         | 
| 146 | 
            +
             | 
| 147 | 
            +
                        # Here we embed the ChatInterface for streaming conversation
         | 
| 148 | 
            +
                        # We add extra inputs for "Selected Featured Model" as hidden,
         | 
| 149 | 
            +
                        # so the user can't directly edit but it flows into respond().
         | 
| 150 | 
            +
                        demo = gr.ChatInterface(
         | 
| 151 | 
            +
                            fn=respond,
         | 
| 152 | 
            +
                            additional_inputs=[
         | 
| 153 | 
            +
                                gr.Textbox(value="", label="System message", lines=2),
         | 
| 154 | 
            +
                                gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max new tokens"),
         | 
| 155 | 
            +
                                gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
         | 
| 156 | 
            +
                                gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P"),
         | 
| 157 | 
            +
                                gr.Slider(minimum=-2.0, maximum=2.0, value=0.0, step=0.1, label="Frequency Penalty"),
         | 
| 158 | 
            +
                                gr.Slider(minimum=-1, maximum=65535, value=-1, step=1, label="Seed (-1 for random)"),
         | 
| 159 | 
            +
                                gr.Textbox(value="", label="Custom Model", info="(Optional) Provide a custom HF model path"),
         | 
| 160 | 
            +
                                gr.Textbox(value="", label="Selected Featured Model (from tab)", visible=False),
         | 
| 161 | 
            +
                            ],
         | 
| 162 | 
            +
                            fill_height=True,
         | 
| 163 | 
            +
                            chatbot=gr.Chatbot(height=600),
         | 
| 164 | 
            +
                            theme="Nymbo/Nymbo_Theme",
         | 
| 165 | 
            +
                        )
         | 
| 166 | 
            +
             | 
| 167 | 
            +
                        # We want to connect the selected_featured_model_state to that hidden text box
         | 
| 168 | 
            +
                        def set_featured_model_in_chatbox(val):
         | 
| 169 | 
            +
                            return val
         | 
| 170 | 
            +
             | 
| 171 | 
            +
                        # Whenever the selected_featured_model_state changes, update the hidden field in the ChatInterface
         | 
| 172 | 
            +
                        selected_featured_model_state.change(
         | 
| 173 | 
            +
                            fn=set_featured_model_in_chatbox,
         | 
| 174 | 
            +
                            inputs=selected_featured_model_state,
         | 
| 175 | 
            +
                            outputs=demo.additional_inputs[-1],  # The last additional input is the "Selected Featured Model"
         | 
| 176 | 
            +
                        )
         | 
| 177 | 
            +
             | 
| 178 | 
            +
                    # ==========================
         | 
| 179 | 
            +
                    #   Featured Models Tab
         | 
| 180 | 
            +
                    # ==========================
         | 
| 181 | 
            +
                    with gr.Tab("Featured Models"):
         | 
| 182 | 
            +
                        gr.Markdown("### Choose from our Featured Models")
         | 
| 183 | 
            +
             | 
| 184 | 
            +
                        # A text box for searching/filtering
         | 
| 185 | 
            +
                        model_search = gr.Textbox(
         | 
| 186 | 
            +
                            label="Filter Models",
         | 
| 187 | 
            +
                            placeholder="Search for a featured model..."
         | 
| 188 | 
            +
                        )
         | 
| 189 | 
            +
             | 
| 190 | 
            +
                        # A radio component listing the featured models (default to first)
         | 
| 191 | 
            +
                        model_radio = gr.Radio(
         | 
| 192 | 
            +
                            choices=featured_models_list,
         | 
| 193 | 
            +
                            label="Select a model below",
         | 
| 194 | 
            +
                            value=featured_models_list[0],
         | 
| 195 | 
            +
                            interactive=True
         | 
| 196 | 
            +
                        )
         | 
| 197 | 
            +
             | 
| 198 | 
            +
                        # Define how to update the radio choices when the search box changes
         | 
| 199 | 
            +
                        model_search.change(
         | 
| 200 | 
            +
                            fn=filter_featured_models,
         | 
| 201 | 
            +
                            inputs=model_search,
         | 
| 202 | 
            +
                            outputs=model_radio
         | 
| 203 | 
            +
                        )
         | 
| 204 | 
            +
             | 
| 205 | 
            +
                        # Button to confirm the selection
         | 
| 206 | 
            +
                        def select_featured_model(radio_val):
         | 
| 207 | 
            +
                            """
         | 
| 208 | 
            +
                            Updates the hidden state with the user-chosen featured model.
         | 
| 209 | 
            +
                            """
         | 
| 210 | 
            +
                            return radio_val
         | 
| 211 | 
            +
             | 
| 212 | 
            +
                        choose_btn = gr.Button("Use this Featured Model", variant="primary")
         | 
| 213 | 
            +
             | 
| 214 | 
            +
                        choose_btn.click(
         | 
| 215 | 
            +
                            fn=select_featured_model,
         | 
| 216 | 
            +
                            inputs=model_radio,
         | 
| 217 | 
            +
                            outputs=selected_featured_model_state
         | 
| 218 | 
            +
                        )
         | 
| 219 | 
            +
             | 
| 220 | 
            +
                        gr.Markdown(
         | 
| 221 | 
            +
                            """
         | 
| 222 | 
            +
                            **Tip**: If you type a Custom Model in the "Chat Interface" tab, it overrides the
         | 
| 223 | 
            +
                            featured model you selected here.
         | 
| 224 | 
            +
                            """
         | 
| 225 | 
            +
                        )
         | 
| 226 | 
            +
             | 
| 227 | 
            +
                    # ==========================
         | 
| 228 | 
            +
                    #   Information Tab
         | 
| 229 | 
            +
                    # ==========================
         | 
| 230 | 
            +
                    with gr.Tab("Information"):
         | 
| 231 | 
            +
                        gr.Markdown("## Learn More About These Models and Parameters")
         | 
| 232 | 
            +
             | 
| 233 | 
            +
                        with gr.Accordion("Featured Models (Table)", open=False):
         | 
| 234 | 
            +
                            gr.HTML(
         | 
| 235 | 
            +
                                """
         | 
| 236 | 
            +
                                <p>Below is a small sample table showing some featured models.</p>
         | 
| 237 | 
            +
                                <table style="width:100%; text-align:center; margin:auto;">
         | 
| 238 | 
            +
                                    <tr>
         | 
| 239 | 
            +
                                        <th>Model Name</th>
         | 
| 240 | 
            +
                                        <th>Type</th>
         | 
| 241 | 
            +
                                        <th>Notes</th>
         | 
| 242 | 
            +
                                    </tr>
         | 
| 243 | 
            +
                                    <tr>
         | 
| 244 | 
            +
                                        <td>meta-llama/Llama-2-13B-chat-hf</td>
         | 
| 245 | 
            +
                                        <td>Chat</td>
         | 
| 246 | 
            +
                                        <td>Good for multi-turn dialogue.</td>
         | 
| 247 | 
            +
                                    </tr>
         | 
| 248 | 
            +
                                    <tr>
         | 
| 249 | 
            +
                                        <td>bigscience/bloom</td>
         | 
| 250 | 
            +
                                        <td>Language Model</td>
         | 
| 251 | 
            +
                                        <td>Large multilingual model.</td>
         | 
| 252 | 
            +
                                    </tr>
         | 
| 253 | 
            +
                                    <tr>
         | 
| 254 | 
            +
                                        <td>microsoft/DialoGPT-large</td>
         | 
| 255 | 
            +
                                        <td>Chat</td>
         | 
| 256 | 
            +
                                        <td>Well-known smaller chat model.</td>
         | 
| 257 | 
            +
                                    </tr>
         | 
| 258 | 
            +
                                </table>
         | 
| 259 | 
            +
                                """
         | 
| 260 | 
            +
                            )
         | 
| 261 | 
            +
             | 
| 262 | 
            +
                        with gr.Accordion("Parameters Overview", open=False):
         | 
| 263 | 
            +
                            gr.Markdown(
         | 
| 264 | 
            +
                                """
         | 
| 265 | 
            +
                                ### Explanation of Key Parameters
         | 
| 266 | 
            +
             | 
| 267 | 
            +
                                - **System Message**: Provides context or initial instructions to the model.  
         | 
| 268 | 
            +
                                - **Max Tokens**: The maximum number of tokens (roughly pieces of words) in the generated response.  
         | 
| 269 | 
            +
                                - **Temperature**: Higher values produce more random/creative outputs, while lower values make the output more focused and deterministic.  
         | 
| 270 | 
            +
                                - **Top-P**: Controls nucleus sampling. The model considers only the tokens whose probability mass exceeds this value.  
         | 
| 271 | 
            +
                                - **Frequency Penalty**: Penalizes repeated tokens. Positive values (like 1.0) reduce repetition in the output. Negative values can increase repetition.  
         | 
| 272 | 
            +
                                - **Seed**: Determines reproducibility. Set it to a fixed integer for consistent results; `-1` is random each time.  
         | 
| 273 | 
            +
                                - **Custom Model**: Overwrites the featured model. Provide the Hugging Face path (e.g., `openai/whisper-base`) for your own usage.  
         | 
| 274 | 
            +
             | 
| 275 | 
            +
                                Use these settings to guide how the model generates text. If in doubt, stick to defaults and experiment in small increments.
         | 
| 276 | 
            +
                                """
         | 
| 277 | 
            +
                            )
         | 
| 278 | 
            +
             | 
| 279 | 
            +
                return main_interface
         | 
| 280 |  | 
| 281 | 
            +
            # If run as a standalone script, just launch.
         | 
| 282 | 
             
            if __name__ == "__main__":
         | 
| 283 | 
            +
                print("Building and launching the Serverless-TextGen-Hub interface...")
         | 
| 284 | 
            +
                ui = build_app()
         | 
| 285 | 
            +
                ui.launch()
         | 
