from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import gradio as gr

# 1) Load the Central Kurdish (Arabic) Goldfish model
MODEL_ID = "goldfish-models/ckb_arab_full"
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForCausalLM.from_pretrained(MODEL_ID)
model.eval()

# 2) Chat function: maintains history and generates replies
def chat_fn(user_message, history):
    # Prepend [CLS] token and append [SEP]
    prompt = tokenizer.cls_token + user_message + tokenizer.sep_token
    inputs = tokenizer(prompt, return_tensors="pt")
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=128,
            pad_token_id=tokenizer.eos_token_id,
            do_sample=True,
            top_p=0.9,
            temperature=0.8
        )
    # Decode only the newly generated tokens
    reply = tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True)
    history = history + [(user_message, reply)]
    return history, history

# 3) Build Gradio Chat UI
with gr.Blocks() as demo:
    gr.Markdown("## Chat with Goldfish’s Central Kurdish (Arabic) Model")
    chatbot = gr.Chatbot()
    txt = gr.Textbox(placeholder="Type your message here...")
    clear = gr.Button("Clear")
    txt.submit(chat_fn, [txt, chatbot], [chatbot, chatbot])
    clear.click(lambda: None, None, chatbot)
    
# 4) Launch the app
if __name__ == "__main__":
    demo.launch()