"""
app.py
"""

import json
import os

import requests
import torch
import gradio as gr
import spaces
from openai import OpenAI
from transformers import AutoModelForCausalLM, AutoTokenizer

# Constants
HF_API_KEY: str = os.getenv("HF_API_KEY")
MODEL_ID: str = "meta-llama/Llama-Guard-3-1B"
SENTINEL_API_KEY: str = os.getenv("SENTINEL_API_KEY")
SENTINEL_ENDPOINT: str = os.getenv("SENTINEL_ENDPOINT")

# Load model and tokenizer
model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    token=HF_API_KEY
).to('cuda')
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=HF_API_KEY)


@spaces.GPU
def llama_guard_moderation(input_text: str) -> str:
    """
    Use the Llama Guard model to perform content moderation on input text.

    Args:
    - input_text (str): The text to be moderated.

    Returns:
    - str: The model's moderation result.
    """
    conversation = [
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": input_text
                },
            ],
        }
    ]
    input_ids = tokenizer.apply_chat_template(
        conversation, return_tensors="pt"
    ).to(model.device)

    prompt_len = input_ids.shape[1]
    output = model.generate(
        input_ids,
        max_new_tokens=20,
        pad_token_id=0,
    )
    generated_tokens = output[:, prompt_len:]
    return tokenizer.decode(generated_tokens[0])


def openai_moderation(input_text: str) -> dict:
    """
    Use the OpenAI Omni Moderation model to analyze the input text.

    Args:
    - input_text (str): The text to be moderated.

    Returns:
    - dict: A dictionary of moderation categories detected in the text.
    """
    client = OpenAI()
    response = client.moderations.create(
        model="omni-moderation-latest",
        input=input_text,
    )
    return response.results[0].categories.to_dict()


def sentinel_moderation(input_text: str) -> dict:
    """
    Use the Sentinel LionGuard API to perform content moderation on input text.

    Args:
    - input_text (str): The text to be moderated.

    Returns:
    - dict: The moderation result from the Sentinel LionGuard model.
    """
    headers = {
        "x-api-key": SENTINEL_API_KEY,
        "Content-Type": "application/json"
    }

    payload = {
        "filters": ["lionguard"],
        "text": input_text
    }

    response = requests.post(
        url=SENTINEL_ENDPOINT,
        headers=headers,
        data=json.dumps(payload)
    )

    return response.json()["outputs"]["lionguard"]


def moderate_text(input_text: str) -> tuple:
    """
    Perform content moderation using three different models: 
    Llama Guard, OpenAI Omni Moderation, and Sentinel LionGuard.

    Args:
    - input_text (str): The text to be moderated.

    Returns:
    - tuple: Results from the three models.
    """
    llama_guard_result = llama_guard_moderation(input_text)
    openai_result = openai_moderation(input_text)
    sentinel_result = sentinel_moderation(input_text)

    return llama_guard_result, openai_result, sentinel_result


# Gradio App
iface = gr.Interface(
    fn=moderate_text,
    inputs=gr.Textbox(lines=5, label="Enter Text for Moderation"),
    outputs=[
        gr.Textbox(label="LlamaGuard Result"),
        gr.Textbox(label="OpenAI Omni Moderation Result"),
        gr.Textbox(label="LionGuard Result"),
    ],
    title="Content Moderation Model Comparison",
    description="Compare the performance of 3 content moderation models: LlamaGuard, OpenAI Omni Moderation, and LionGuard."
)

if __name__ == "__main__":
    iface.launch()