import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer import torch # Use LLaMA architecture explicitly model_name = "polyglots/SinLlama_v01" tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False) model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype=torch.float16, device_map="auto" ) def chat_with_sinllama(message, history=[]): inputs = tokenizer(message, return_tensors="pt").to(model.device) outputs = model.generate( **inputs, max_new_tokens=200, temperature=0.7, do_sample=True ) reply = tokenizer.decode(outputs[0], skip_special_tokens=True) history.append((message, reply)) return history, history # Gradio app with gr.Blocks() as demo: chatbot = gr.Chatbot(label="SinLlama (Sinhala Chatbot)") msg = gr.Textbox(label="Type your message in Sinhala") clear = gr.Button("Clear") msg.submit(chat_with_sinllama, [msg, chatbot], [chatbot, chatbot]) clear.click(lambda: None, None, chatbot, queue=False) demo.launch()