Spaces:
Sleeping
Sleeping
| import requests | |
| check_ipinfo = requests.get("https://ipinfo.io").json()['country'] | |
| print("Run-Location-As: ",check_ipinfo) | |
| import gradio as gr | |
| import ollama | |
| # List of available models for selection. | |
| # IMPORTANT: These names must correspond to models that have been either | |
| # Model from run.sh | |
| AVAILABLE_MODELS = [ | |
| 'hf.co/bartowski/Qwen_Qwen3-4B-Instruct-2507-GGUF:Q4_K_M', | |
| #'hf.co/bartowski/Qwen_Qwen3-4B-Thinking-2507-GGUF:Q4_K_M', | |
| 'smollm2:360m-instruct-q5_K_M', | |
| 'hf.co/bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M', # OK speed with CPU | |
| #'gemma3n:e2b-it-q4_K_M', | |
| 'granite3.3:2b', | |
| 'hf.co/bartowski/tencent_Hunyuan-4B-Instruct-GGUF:Q4_K_M' | |
| ] | |
| #---fail to run | |
| #'hf.co/ggml-org/SmolLM3-3B-GGUF:Q4_K_M', | |
| #'hf.co/bartowski/nvidia_OpenReasoning-Nemotron-1.5B-GGUF:Q5_K_M', | |
| # Default System Prompt | |
| DEFAULT_SYSTEM_PROMPT = """Answer everything in simple, smart, relevant and accurate style. No chatty! Besides, pls: | |
| 1. 如果查詢是以中文輸入,使用標準繁體中文回答,符合官方文書規範 | |
| 2. 要提供引用規則依据 | |
| 3. 如果查詢是以英文輸入,使用英文回答""" | |
| # --- Gradio Interface --- | |
| with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="neutral")) as demo: | |
| gr.Markdown(f"## Small Language Model (SLM) run with CPU") # Changed title to be more generic | |
| gr.Markdown(f"(Run-Location-As: `{check_ipinfo}`)") | |
| gr.Markdown("Chat with the model, customize its behavior with a system prompt, and toggle streaming output.") | |
| # Model Selection | |
| with gr.Row(): | |
| selected_model = gr.Radio( | |
| choices=AVAILABLE_MODELS, | |
| value=AVAILABLE_MODELS[0], # Default to the first model in the list | |
| label="Select Model", | |
| info="Choose the LLM model to chat with.", | |
| interactive=True | |
| ) | |
| chatbot = gr.Chatbot( | |
| label="Conversation", | |
| height=400, | |
| type='messages', | |
| layout="bubble" | |
| ) | |
| with gr.Row(): | |
| msg = gr.Textbox( | |
| show_label=False, | |
| placeholder="Type your message here and press Enter...", | |
| lines=1, | |
| scale=4, | |
| container=False | |
| ) | |
| with gr.Accordion("Advanced Options", open=False): | |
| with gr.Row(): | |
| stream_checkbox = gr.Checkbox( | |
| label="Stream Output", | |
| value=True, | |
| info="Enable to see the response generate in real-time." | |
| ) | |
| use_custom_prompt_checkbox = gr.Checkbox( | |
| label="Use Custom System Prompt", | |
| value=False, | |
| info="Check this box to provide your own system prompt below." | |
| ) | |
| system_prompt_textbox = gr.Textbox( | |
| label="System Prompt", | |
| value=DEFAULT_SYSTEM_PROMPT, | |
| lines=3, | |
| placeholder="Enter a system prompt to guide the model's behavior...", | |
| interactive=False | |
| ) | |
| # Function to toggle the interactivity of the system prompt textbox | |
| def toggle_system_prompt(use_custom): | |
| return gr.update(interactive=use_custom) | |
| use_custom_prompt_checkbox.change( | |
| fn=toggle_system_prompt, | |
| inputs=use_custom_prompt_checkbox, | |
| outputs=system_prompt_textbox, | |
| queue=False | |
| ) | |
| # --- Core Chat Logic --- | |
| # This function is the heart of the application. | |
| def respond(history, system_prompt, stream_output, current_selected_model): # Added current_selected_model | |
| """ | |
| This is the single function that handles the entire chat process. | |
| It takes the history, prepends the system prompt, calls the Ollama API, | |
| and streams the response back to the chatbot. | |
| """ | |
| #Disable Qwen3 thinking | |
| if "Qwen3".lower() in current_selected_model: | |
| system_prompt = system_prompt+" /no_think" | |
| # The 'history' variable from Gradio contains the entire conversation. | |
| # We prepend the system prompt to this history to form the final payload. | |
| messages = [{"role": "system", "content": system_prompt}] + history | |
| # Add a placeholder for the assistant's response to the UI history. | |
| # This creates the space where the streamed response will be displayed. | |
| history.append({"role": "assistant", "content": ""}) | |
| # Stream the response from the Ollama API using the currently selected model | |
| response_stream = ollama.chat( | |
| model=current_selected_model, # Use the dynamically selected model | |
| messages=messages, | |
| stream=True | |
| ) | |
| # Iterate through the stream, updating the placeholder with each new chunk. | |
| for chunk in response_stream: | |
| if chunk['message']['content']: | |
| history[-1]['content'] += chunk['message']['content'] | |
| # Yield the updated history to the chatbot for a real-time effect. | |
| yield history | |
| # This function handles the user's submission. | |
| def user_submit(history, user_message): | |
| """ | |
| Adds the user's message to the chat history and clears the input box. | |
| This prepares the state for the main 'respond' function. | |
| """ | |
| return history + [{"role": "user", "content": user_message}], "" | |
| # Gradio Event Wiring | |
| msg.submit( | |
| user_submit, | |
| inputs=[chatbot, msg], | |
| outputs=[chatbot, msg], | |
| queue=False | |
| ).then( | |
| respond, | |
| inputs=[chatbot, system_prompt_textbox, stream_checkbox, selected_model], # Pass selected_model here | |
| outputs=[chatbot] | |
| ) | |
| # Launch the Gradio interface | |
| demo.launch(server_name="0.0.0.0", server_port=7860) | |
| """ | |
| #--------------------------------------------------------------- | |
| # v20250625, OK run with CPU, Gemma 3 4b it qat gguf, history support. | |
| import gradio as gr | |
| import ollama | |
| # The model name must exactly match what was pulled from Hugging Face | |
| MODEL_NAME = 'hf.co/unsloth/gemma-3-4b-it-qat-GGUF:Q4_K_M' | |
| # Default System Prompt | |
| DEFAULT_SYSTEM_PROMPT = "You must response in zh-TW. Answer everything in simple, smart, relevant and accurate style. No chatty!" | |
| # --- Gradio Interface --- | |
| with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="neutral")) as demo: | |
| gr.Markdown(f"## LLM GGUF Chat with `{MODEL_NAME}`") | |
| gr.Markdown("Chat with the model, customize its behavior with a system prompt, and toggle streaming output.") | |
| # Use the modern 'messages' type for the Chatbot component | |
| chatbot = gr.Chatbot( | |
| label="Conversation", | |
| height=500, | |
| type='messages', | |
| layout="bubble" | |
| ) | |
| with gr.Row(): | |
| msg = gr.Textbox( | |
| show_label=False, | |
| placeholder="Type your message here and press Enter...", | |
| lines=1, | |
| scale=4, | |
| container=False | |
| ) | |
| with gr.Accordion("Advanced Options", open=False): | |
| with gr.Row(): | |
| stream_checkbox = gr.Checkbox( | |
| label="Stream Output", | |
| value=True, | |
| info="Enable to see the response generate in real-time." | |
| ) | |
| use_custom_prompt_checkbox = gr.Checkbox( | |
| label="Use Custom System Prompt", | |
| value=False, | |
| info="Check this box to provide your own system prompt below." | |
| ) | |
| system_prompt_textbox = gr.Textbox( | |
| label="System Prompt", | |
| value=DEFAULT_SYSTEM_PROMPT, | |
| lines=3, | |
| placeholder="Enter a system prompt to guide the model's behavior...", | |
| interactive=False | |
| ) | |
| # Function to toggle the interactivity of the system prompt textbox | |
| def toggle_system_prompt(use_custom): | |
| return gr.update(interactive=use_custom) | |
| use_custom_prompt_checkbox.change( | |
| fn=toggle_system_prompt, | |
| inputs=use_custom_prompt_checkbox, | |
| outputs=system_prompt_textbox, | |
| queue=False | |
| ) | |
| # --- Core Chat Logic --- | |
| # This function is the heart of the application. | |
| def respond(history, system_prompt, stream_output): | |
| #This is the single function that handles the entire chat process. | |
| #It takes the history, prepends the system prompt, calls the Ollama API, | |
| #and streams the response back to the chatbot. | |
| # --- FINAL FIX: Construct the API payload correctly --- | |
| # The 'history' variable from Gradio contains the entire conversation. | |
| # We prepend the system prompt to this history to form the final payload. | |
| messages = [{"role": "system", "content": system_prompt}] + history | |
| # Add a placeholder for the assistant's response to the UI history. | |
| # This creates the space where the streamed response will be displayed. | |
| history.append({"role": "assistant", "content": ""}) | |
| # Stream the response from the Ollama API | |
| response_stream = ollama.chat( | |
| model=MODEL_NAME, | |
| messages=messages, | |
| stream=True | |
| ) | |
| # Iterate through the stream, updating the placeholder with each new chunk. | |
| for chunk in response_stream: | |
| if chunk['message']['content']: | |
| history[-1]['content'] += chunk['message']['content'] | |
| # Yield the updated history to the chatbot for a real-time effect. | |
| yield history | |
| # This function handles the user's submission. | |
| def user_submit(history, user_message): | |
| #Adds the user's message to the chat history and clears the input box. | |
| #This prepares the state for the main 'respond' function. | |
| return history + [{"role": "user", "content": user_message}], "" | |
| # Gradio Event Wiring | |
| msg.submit( | |
| user_submit, | |
| inputs=[chatbot, msg], | |
| outputs=[chatbot, msg], | |
| queue=False | |
| ).then( | |
| respond, | |
| inputs=[chatbot, system_prompt_textbox, stream_checkbox], | |
| outputs=[chatbot] | |
| ) | |
| # Launch the Gradio interface | |
| demo.launch(server_name="0.0.0.0", server_port=7860) | |
| #--------------------------------------------------------------- | |
| """ | |
| """ | |
| #--------------------------------------------------------------- | |
| # Backup, OK: history, user sys prompt, cpu.: | |
| #--------------------------------------------------------------- | |
| import gradio as gr | |
| import ollama | |
| # The model name must exactly match what was pulled from Hugging Face | |
| MODEL_NAME = 'hf.co/unsloth/gemma-3-4b-it-qat-GGUF:Q4_K_M' | |
| # Default System Prompt | |
| DEFAULT_SYSTEM_PROMPT = "You must response in zh-TW. Answer everything in simple, smart, relevant and accurate style. No chatty!" | |
| # --- Gradio Interface --- | |
| with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="neutral")) as demo: | |
| gr.Markdown(f"## LLM GGUF Chat with `{MODEL_NAME}`") | |
| gr.Markdown("Chat with the model, customize its behavior with a system prompt, and toggle streaming output.") | |
| # Use the modern 'messages' type for the Chatbot component | |
| chatbot = gr.Chatbot( | |
| label="Conversation", | |
| height=500, | |
| type='messages', | |
| layout="bubble" | |
| ) | |
| with gr.Row(): | |
| msg = gr.Textbox( | |
| show_label=False, | |
| placeholder="Type your message here and press Enter...", | |
| lines=1, | |
| scale=4, | |
| container=False | |
| ) | |
| with gr.Accordion("Advanced Options", open=False): | |
| with gr.Row(): | |
| stream_checkbox = gr.Checkbox( | |
| label="Stream Output", | |
| value=True, | |
| info="Enable to see the response generate in real-time." | |
| ) | |
| use_custom_prompt_checkbox = gr.Checkbox( | |
| label="Use Custom System Prompt", | |
| value=False, | |
| info="Check this box to provide your own system prompt below." | |
| ) | |
| system_prompt_textbox = gr.Textbox( | |
| label="System Prompt", | |
| value=DEFAULT_SYSTEM_PROMPT, | |
| lines=3, | |
| placeholder="Enter a system prompt to guide the model's behavior...", | |
| interactive=False | |
| ) | |
| # Function to toggle the interactivity of the system prompt textbox | |
| def toggle_system_prompt(use_custom): | |
| return gr.update(interactive=use_custom) | |
| use_custom_prompt_checkbox.change( | |
| fn=toggle_system_prompt, | |
| inputs=use_custom_prompt_checkbox, | |
| outputs=system_prompt_textbox, | |
| queue=False | |
| ) | |
| # --- Core Chat Logic --- | |
| # This function is the heart of the application. | |
| def respond(history, system_prompt, stream_output): | |
| #This is the single function that handles the entire chat process. | |
| #It takes the history, prepends the system prompt, calls the Ollama API, | |
| #and streams the response back to the chatbot. | |
| # --- FINAL FIX: Construct the API payload correctly --- | |
| # The 'history' variable from Gradio contains the entire conversation. | |
| # We prepend the system prompt to this history to form the final payload. | |
| messages = [{"role": "system", "content": system_prompt}] + history | |
| # Add a placeholder for the assistant's response to the UI history. | |
| # This creates the space where the streamed response will be displayed. | |
| history.append({"role": "assistant", "content": ""}) | |
| # Stream the response from the Ollama API | |
| response_stream = ollama.chat( | |
| model=MODEL_NAME, | |
| messages=messages, | |
| stream=True | |
| ) | |
| # Iterate through the stream, updating the placeholder with each new chunk. | |
| for chunk in response_stream: | |
| if chunk['message']['content']: | |
| history[-1]['content'] += chunk['message']['content'] | |
| # Yield the updated history to the chatbot for a real-time effect. | |
| yield history | |
| # This function handles the user's submission. | |
| def user_submit(history, user_message): | |
| #Adds the user's message to the chat history and clears the input box. | |
| #This prepares the state for the main 'respond' function. | |
| return history + [{"role": "user", "content": user_message}], "" | |
| # Gradio Event Wiring | |
| msg.submit( | |
| user_submit, | |
| inputs=[chatbot, msg], | |
| outputs=[chatbot, msg], | |
| queue=False | |
| ).then( | |
| respond, | |
| inputs=[chatbot, system_prompt_textbox, stream_checkbox], | |
| outputs=[chatbot] | |
| ) | |
| # Launch the Gradio interface | |
| demo.launch(server_name="0.0.0.0", server_port=7860) | |
| """ |