import gradio as gr
from unsloth import FastLanguageModel


model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "muzammil-eds/Meta-Llama-3.1-8B-Instruct-English-to-French-v2",
    dtype = None,
    load_in_4bit = True,
)
FastLanguageModel.for_inference(model)

def process_input(model, tokenizer,  input_text):
    prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
Translate the following English text to French.

### Input:
{}

### Response:
"""

    formatted_prompt = prompt.format( input_text)
    inputs = tokenizer([formatted_prompt], return_tensors="pt").to("cuda")
    outputs = model.generate(**inputs, max_new_tokens=512, use_cache=True)
    decoded_output = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
    response_start = "### Response:"
    response = decoded_output.split(response_start)[-1].strip()

    return response


# Define the Gradio interface
def gradio_app(input_text):
    output = process_input(model, tokenizer,  input_text)
    return output

# Create the Gradio interface
interface = gr.Interface(
    fn=gradio_app,
    inputs=gr.Textbox(label="Enter your input text"),
    outputs=gr.Textbox(label="Generated Output"),
    title="Text to Response Generator",
    description="Enter input text and get a response."
)

interface.launch()