| import gradio as gr | |
| from huggingface_hub import hf_hub_download | |
| from llama_cpp import Llama | |
| model_name = "SantaBot/Jokestral_4bit_guff" | |
| model_file = "unsloth.Q4_K_M.gguf" | |
| model_path = hf_hub_download(model_name, filename=model_file) | |
| llm = Llama(model_path=model_path) | |
| def make_inference(User_prompt, temperature=0.8, max_new_tokens=64,number_of_outputs=3): | |
| outputs='' | |
| for i in range(number_of_outputs): | |
| output = llm( | |
| User_prompt, | |
| max_tokens= max_new_tokens, | |
| stop=["</s>", "<s>"], | |
| echo=True, | |
| temperature=temperature | |
| ) | |
| outputs+=f"""{i+1}. {output["choices"][0]["text"]}""" | |
| return outputs | |
| demo = gr.Interface( | |
| fn=make_inference, | |
| inputs=[ | |
| gr.Text(value="Whats the difference", label="Your prompt"), | |
| gr.Slider(minimum=0,maximum=1,value=0.8,step=0.05), | |
| gr.Number(minimum=10,maximum=1024,value=64, label="Max new tokens"), | |
| gr.Number(minimum=1,maximum=10,value=3, label="Number of outputs") | |
| ], | |
| outputs=[gr.Text(label="Output")], | |
| examples=[ | |
| ["Whats the difference",0.8,64,1], | |
| ["Once a priest",0.8,64,1], | |
| ["My doctor",0.8,64,1], | |
| ["I saw",0.8,64,1], | |
| ], | |
| allow_flagging="never", | |
| title ="Jokestral 🤣🫵🤡", | |
| description="Jokestral - this is Mistral-7b-v0.3 fine-tuned on [Short jokes dataset](https://www.kaggle.com/datasets/abhinavmoudgil95/short-jokes). Just write the first few words and get your joke. [More information](https://huggingface.co/SantaBot/Jokestral_16bit)" | |
| ) | |
| demo.launch() |