from huggingface_hub import InferenceClient import gradio as gr client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3") def format_prompt(message, history): prompt = "" for user_prompt, bot_response in history: prompt += f"[INST] {user_prompt} [/INST] {bot_response} " prompt += f"[INST] {message} [/INST]" return prompt def generate( prompt, history, temperature=0.7, max_new_tokens=256, top_p=0.9, repetition_penalty=1.2, ): temperature = max(0.01, float(temperature)) top_p = max(0.0, float(top_p)) generate_kwargs = dict( temperature=temperature, max_new_tokens=max_new_tokens, top_p=top_p, repetition_penalty=repetition_penalty, do_sample=True, seed=42, ) formatted_prompt = format_prompt(prompt, history) stream = client.text_generation( formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False ) output = "" for response in stream: output += response.token.text yield output return output additional_inputs = [ gr.Slider( label="Temperature", value=0.7, minimum=0.1, maximum=1.0, step=0.05, interactive=True, info="Controls the creativity of the response. Higher values mean more randomness.", ), gr.Slider( label="Max Tokens", value=256, minimum=32, maximum=1024, step=32, interactive=True, info="The maximum number of tokens to generate.", ), gr.Slider( label="Top-p", value=0.9, minimum=0.1, maximum=1.0, step=0.05, interactive=True, info="Nucleus sampling. Lower values generate more deterministic responses.", ), gr.Slider( label="Repetition Penalty", value=1.2, minimum=1.0, maximum=2.0, step=0.1, interactive=True, info="Discourages repeated phrases in the output.", ), ] css = """ #chatbox { height: 600px; overflow: auto; border: none; box-shadow: 0px 4px 12px rgba(0, 0, 0, 0.1); border-radius: 8px; } """ with gr.Blocks(css=css) as demo: gr.HTML( """

Mispel V1.0 Beta

Powered by the MedlarAI model. Your assistant for answering questions, generating text, and exploring ideas.

""" ) # Replace gr.Box() with gr.Column() with gr.Column(): gr.ChatInterface( generate, additional_inputs=additional_inputs, examples=[ ["Explain quantum mechanics in simple terms."], ["What are the key principles of effective leadership?"], ["Write a short story about a time traveler."], ["Generate a Python function to calculate factorial using recursion."], ["What are the benefits of mindfulness meditation?"], ], ) demo.queue().launch(debug=True)