import gradio as gr
import json
from transformers import pipeline, AutoTokenizer

# Model seçenekleri
MODEL_OPTIONS = {
    "BART (10k-BART)": {
        "model_id": "zaferisikli/10k-bart-summary",
        "max_tokens": 1020,
        "max_length": 250,
        "min_length": 70
    },
    "PEGASUS (10k-Pegasus)": {
        "model_id": "zaferisikli/10k-pegasus-summary",
        "max_tokens": 512,
        "max_length": 256,
        "min_length": 64
    }
}

# Hedeflenen bölümler
target_items = ["item_1", "item_1A", "item_7", "item_8", "item_9A"]

# Yanlış şirket adlarını düzeltme fonksiyonu
def fix_company_names(text, correct_name="Quotient Technology Inc."):
    wrong_names = [
        "Bridgeline", "Guidewire", "AppFolio", "GTY",
        "GTY Technology Holdings Inc."
    ]
    for wrong in wrong_names:
        text = text.replace(wrong, correct_name)
    return text

# Token sayısına göre metni kırpma
def safe_trim(text, tokenizer, max_tokens):
    input_ids = tokenizer.encode(text, truncation=False)
    input_ids = input_ids[:max_tokens]
    return tokenizer.decode(input_ids, skip_special_tokens=True)

# Özetleme işlemi
def summarize_json(json_file, selected_model, is_turkish):
    model_cfg = MODEL_OPTIONS[selected_model]
    model_id = model_cfg["model_id"]
    max_tokens = model_cfg["max_tokens"]
    max_len = model_cfg["max_length"]
    min_len = model_cfg["min_length"]

    summarizer = pipeline("summarization", model=model_id)
    tokenizer = AutoTokenizer.from_pretrained(model_id)

    if is_turkish:
        tr2en = pipeline("translation", model="Helsinki-NLP/opus-mt-tc-big-tr-en")
        en2tr = pipeline("translation", model="Helsinki-NLP/opus-mt-tc-big-en-tr")

    with open(json_file.name, "r", encoding="utf-8") as f:
        data = json.load(f)

    summary_output = {}
    for item in target_items:
        raw = data.get(item, "").strip()
        if not raw:
            summary_output[item] = "EMPTY"
            continue

        if is_turkish:
            raw = tr2en(raw)[0]["translation_text"]

        trimmed = safe_trim(raw, tokenizer, max_tokens)
        eng_summary = summarizer(trimmed, max_length=max_len, min_length=min_len, do_sample=False)[0]["summary_text"]

        final = en2tr(eng_summary)[0]["translation_text"] if is_turkish else eng_summary

        # Şirket adı düzeltmesi burada uygulanır
        summary_output[item] = fix_company_names(final)

    output_path = "summary_output.json"
    with open(output_path, "w", encoding="utf-8") as f:
        json.dump({"summary": summary_output}, f, indent=2)

    readable_text = f" Summary generated by: {selected_model}\n\n"
    readable_text += "\n\n".join([f"### {k.upper()}\n{v}" for k, v in summary_output.items()])
    return output_path, readable_text

# Gradio Arayüz
with gr.Blocks(title="10-K Summary Generator") as demo:
    gr.HTML("<h2 style='text-align: center;'> 10-K Financial Report Summarizer</h2>")

    with gr.Row():
        model_selector = gr.Dropdown(choices=list(MODEL_OPTIONS.keys()), value="BART (10k-BART)", label="Select Model")
        is_turkish = gr.Checkbox(label="Report is in Turkish 🇹🇷", value=False)

    with gr.Row():
        input_file = gr.File(label="📤 Upload 10-K JSON", file_types=[".json"])
        output_file = gr.File(label="📥 Download Summary JSON")

    with gr.Row():
        summarize_btn = gr.Button(" Generate Summary")

    with gr.Accordion(" Show Summary Text", open=False):
        summary_text = gr.Textbox(label="", lines=25, show_label=False)

    summarize_btn.click(
        fn=summarize_json,
        inputs=[input_file, model_selector, is_turkish],
        outputs=[output_file, summary_text]
    )

demo.launch()