import gradio as gr
import torch
from transformers import pipeline

model_name = "eljanmahammadli/AzLlama-152M-Alpaca"
model = pipeline("text-generation", model=model_name, torch_dtype=torch.float16)
logo_path = "/Users/eljan/Documents/AzLlama/AzLlama-logo.webp"


def get_prompt(question):
    base_instruction = "Aşağıda tapşırığı təsvir edən təlimat və əlavə kontekst təmin edən giriş verilmiştir. Sorğunu uyğun şəkildə tamamlayan cavab yazın."
    prompt = f"""{base_instruction}

### Təlimat:
{question}

### Cavab:
"""
    return prompt


def get_answer(llm_output):
    return llm_output.split("### Cavab:")[1].strip()


def answer_question(history, temperature, top_p, repetition_penalty, top_k, question):
    model_params = {
        "temperature": temperature,
        "top_p": top_p,
        "repetition_penalty": repetition_penalty,
        "top_k": top_k,
        "max_length": 512,  # Adjust based on your needs
        "do_sample": True,
    }
    prompt = get_prompt(question)
    llm_output = model(prompt, **model_params)[0]
    answer = get_answer(llm_output["generated_text"])
    divider = "\n\n" if history else ""
    print(answer)
    new_history = history + divider + f"USER: {question}\nASSISTANT: {answer}\n"
    return new_history, ""  # Return updated history and clear the question input


def send_action(_=None):
    send_button.click()


with gr.Blocks() as app:
    gr.Markdown("# AzLlama-150M Chatbot\n\n")

    with gr.Row():
        with gr.Column(scale=0.2, min_width=200):
            gr.Markdown("### Model Logo")
            gr.Image(
                value=logo_path,
            )
            # write info about the model
            gr.Markdown(
                "### Model Info\n"
                "This model is a 150M paramater LLaMA2 model trained from scratch on Azerbaijani text. It can be used to generate text based on the given prompt. "
            )
        with gr.Column(scale=0.6):
            gr.Markdown("### Chat with the Assistant")
            history = gr.Textbox(
                label="Chat History", value="", lines=20, interactive=False
            )
            question = gr.Textbox(
                label="Your question",
                placeholder="Type your question and press enter",
            )
            send_button = gr.Button("Send")
        with gr.Column(scale=0.2, min_width=200):
            gr.Markdown("### Model Settings")
            temperature = gr.Slider(
                minimum=0.1, maximum=1.0, value=0.9, label="Temperature"
            )
            gr.Markdown(
                "Controls the randomness of predictions. Lower values make the model more deterministic."
            )
            top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, label="Top P")
            gr.Markdown(
                "Nucleus sampling. Lower values focus on more likely predictions."
            )
            repetition_penalty = gr.Slider(
                minimum=1.0, maximum=2.0, value=1.2, label="Repetition Penalty"
            )
            gr.Markdown(
                "Penalizes repeated words. Higher values discourage repetition."
            )
            top_k = gr.Slider(minimum=0, maximum=100, value=50, label="Top K")
            gr.Markdown("Keeps only the top k predictions. Set to 0 for no limit.")

    question.submit(send_action)

    send_button.click(
        fn=answer_question,
        inputs=[history, temperature, top_p, repetition_penalty, top_k, question],
        outputs=[history, question],
    )

app.launch()