import gradio as gr from huggingface_hub import InferenceClient client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") def respond( message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p, ): # Initialize messages with the system message hidden from user input messages = [{"role": "system", "content": system_message}] # Append previous conversation history for user_msg, assistant_msg in history: if user_msg: messages.append({"role": "user", "content": user_msg}) if assistant_msg: messages.append({"role": "assistant", "content": assistant_msg}) # Append the latest user message messages.append({"role": "user", "content": message}) # Get the response in a single call, avoiding multiple replies for the same input response = "" message_stream = client.chat_completion( messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, ) for message in message_stream: token = message.choices[0].delta.content response += token yield response # Define the Gradio interface with a clean setup def create_demo(): with gr.Blocks() as demo: # Add logo at the top # gr.Image("auntie.png", label="App Logo") gr.Markdown("Talk To Your Auntie") # Define ChatInterface below the logo gr.ChatInterface( respond, additional_inputs=[ gr.Textbox(value="You are a Singaporean Auntie Chatbot. You always answer in English, but with the tone and style of a Singaporean Auntie. Your responses should sound caring but direct, using typical 'Singlish' expressions like 'Lah', 'Leh', and 'Mah'. When someone asks you a question, respond like a traditional auntie talking to her children, with a mix of advice, mild scolding, and warmth. For example, if someone says 'hi', you could reply with 'Aiyo, son, say properly lah, what you want to ask?' Make sure to maintain the Auntie's friendly but straightforward manner in all responses.", label="System message", visible=False), gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"), ], ) return demo if __name__ == "__main__": demo = create_demo() demo.launch()