# app.py

import gradio as gr
import torch
from unsloth import FastLanguageModel
import langid

# 1. 모델과 토크나이저를 전역적으로 한 번만 로드합니다.
# Zero-GPU 환경에 맞게 4비트로 모델을 로드합니다.
max_seq_length = 2048
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/DeepSeek-R1-0528-Qwen3-8B",
    max_seq_length=max_seq_length,
    load_in_4bit=True,
    # Zero-GPU(CPU) 환경이므로 vLLM 비활성화
    fast_inference=False, 
    # LoRA 어댑터를 로드하기 위해 미리 최대 랭크를 지정합니다.
    max_lora_rank=32,
)

# PEFT 모델에 LoRA 모듈을 추가합니다.
# 이 단계는 추후 model.load_lora()를 사용하기 위해 필요합니다.
model = FastLanguageModel.get_peft_model(
    model,
    r=32,
    target_modules=[
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj",
    ],
    lora_alpha=64,
    use_gradient_checkpointing="unsloth",
    random_state=3407,
)

# 2. 시스템 프롬프트 정의
# 노트북에서 사용된 것과 동일한 시스템 프롬프트입니다.
system_prompt = (
    "You are given a problem.\n"
    "Think about the problem and provide your working out.\n"
    "You must think in Bahasa Indonesia."
)

# 3. 추론 함수 정의
def generate_response(user_prompt, use_lora):
    """
    사용자 입력과 LoRA 사용 여부에 따라 모델 응답을 생성합니다.
    """
    lora_request = None
    if use_lora:
        try:
            # Hugging Face Space에 함께 업로드된 LoRA 어댑터를 로드합니다.
            # 폴더 이름은 노트북에서 저장한 'grpo_lora'와 일치해야 합니다.
            lora_request = model.load_lora("grpo_lora")
        except Exception as e:
            return f"LoRA 어댑터를 로드하는 데 실패했습니다: {e}\n'grpo_lora' 폴더를 Space에 업로드했는지 확인하세요.", "오류"

    # 채팅 템플릿 형식에 맞게 입력 메시지를 구성합니다.
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt},
    ]

    # 토크나이저를 사용하여 입력 텍스트를 포맷팅합니다.
    input_text = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=True,
        tokenize=False,
    )
    
    inputs = tokenizer(input_text, return_tensors="pt").to("cpu")

    # 모델을 사용하여 텍스트 생성
    # Unsloth는 CPU에서도 빠른 생성을 지원합니다.
    outputs = model.generate(
        **inputs,
        max_new_tokens=512,
        use_cache=True,
        pad_token_id=tokenizer.eos_token_id
    )
    
    generated_text = tokenizer.batch_decode(outputs)[0]
    
    # 생성된 텍스트에서 프롬프트를 제외하고 순수 응답만 추출합니다.
    response_only = generated_text[len(input_text):]

    # 생성된 응답의 언어를 감지합니다.
    lang, score = langid.classify(response_only)
    lang_info = f"감지된 언어: {lang} (신뢰도: {score:.2f})"

    return response_only, lang_info


# 4. Gradio 인터페이스 구성
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown(
        """
        # 🇮🇩 DeepSeek-R1-Qwen3-8B 모델 추론 (GRPO 튜닝)
        이 모델은 수학 문제에 대해 인도네시아어로 추론 과정을 설명하도록 미세 조정되었습니다.
        - **'인도네시아어 추론 LoRA 적용'** 체크박스를 활성화하면, 학습된 LoRA 가중치가 적용되어 인도네시아어로 된 답변을 생성하도록 유도합니다.
        - 체크박스를 비활성화하면 원본 모델의 추론 능력을 확인할 수 있습니다.
        """
    )
    
    with gr.Row():
        with gr.Column(scale=2):
            prompt_input = gr.Textbox(
                label="질문 입력", 
                placeholder="예: Solve (x + 2)^2 = 0"
            )
            lora_checkbox = gr.Checkbox(
                label="인도네시아어 추론 LoRA 적용", 
                value=True
            )
            submit_button = gr.Button("생성하기", variant="primary")
        
        with gr.Column(scale=3):
            output_text = gr.Textbox(
                label="모델 응답", 
                interactive=False
            )
            language_info = gr.Textbox(
                label="언어 감지 결과", 
                interactive=False
            )
    
    submit_button.click(
        fn=generate_response,
        inputs=[prompt_input, lora_checkbox],
        outputs=[output_text, language_info]
    )
    
    gr.Examples(
        [
            ["Solve (x + 2)^2 = 0", True],
            ["What is the square root of 101?", True],
            ["In triangle $ABC$, $\\sin \\angle A = \\frac{4}{5}$ and $\\angle A < 90^\\circ$. Let $D$ be a point outside triangle $ABC$ such that $\\angle BAD = \\angle DAC$ and $\\angle BDC = 90^\\circ$. Suppose that $AD = 1$ and that $\\frac{BD}{CD} = \\frac{3}{2}$. If $AB + AC$ can be expressed in the form $\\frac{a\\sqrt{b}}{c}$ where $a, b, c$ are pairwise relatively prime integers, find $a + b + c$.", True]
        ],
        inputs=[prompt_input, lora_checkbox],
        outputs=[output_text, language_info],
        fn=generate_response,
        cache_examples=False,
    )

# Gradio 앱 실행
demo.launch()