import gradio as gr import json from transformers import pipeline, AutoTokenizer # Model seçenekleri MODEL_OPTIONS = { "BART (10k-BART)": { "model_id": "zaferisikli/10k-bart-summary", "max_tokens": 1020, "max_length": 250, "min_length": 70 }, "PEGASUS (10k-Pegasus)": { "model_id": "zaferisikli/10k-pegasus-summary", "max_tokens": 512, "max_length": 256, "min_length": 64 } } # Hedeflenen bölümler target_items = ["item_1", "item_1A", "item_7", "item_8", "item_9A"] # Yanlış şirket adlarını düzeltme fonksiyonu def fix_company_names(text, correct_name="Quotient Technology Inc."): wrong_names = [ "Bridgeline", "Guidewire", "AppFolio", "GTY", "GTY Technology Holdings Inc." ] for wrong in wrong_names: text = text.replace(wrong, correct_name) return text # Token sayısına göre metni kırpma def safe_trim(text, tokenizer, max_tokens): input_ids = tokenizer.encode(text, truncation=False) input_ids = input_ids[:max_tokens] return tokenizer.decode(input_ids, skip_special_tokens=True) # Özetleme işlemi def summarize_json(json_file, selected_model, is_turkish): model_cfg = MODEL_OPTIONS[selected_model] model_id = model_cfg["model_id"] max_tokens = model_cfg["max_tokens"] max_len = model_cfg["max_length"] min_len = model_cfg["min_length"] summarizer = pipeline("summarization", model=model_id) tokenizer = AutoTokenizer.from_pretrained(model_id) if is_turkish: tr2en = pipeline("translation", model="Helsinki-NLP/opus-mt-tc-big-tr-en") en2tr = pipeline("translation", model="Helsinki-NLP/opus-mt-tc-big-en-tr") with open(json_file.name, "r", encoding="utf-8") as f: data = json.load(f) summary_output = {} for item in target_items: raw = data.get(item, "").strip() if not raw: summary_output[item] = "EMPTY" continue if is_turkish: raw = tr2en(raw)[0]["translation_text"] trimmed = safe_trim(raw, tokenizer, max_tokens) eng_summary = summarizer(trimmed, max_length=max_len, min_length=min_len, do_sample=False)[0]["summary_text"] final = en2tr(eng_summary)[0]["translation_text"] if is_turkish else eng_summary # Şirket adı düzeltmesi burada uygulanır summary_output[item] = fix_company_names(final) output_path = "summary_output.json" with open(output_path, "w", encoding="utf-8") as f: json.dump({"summary": summary_output}, f, indent=2) readable_text = f" Summary generated by: {selected_model}\n\n" readable_text += "\n\n".join([f"### {k.upper()}\n{v}" for k, v in summary_output.items()]) return output_path, readable_text # Gradio Arayüz with gr.Blocks(title="10-K Summary Generator") as demo: gr.HTML("

10-K Financial Report Summarizer

") with gr.Row(): model_selector = gr.Dropdown(choices=list(MODEL_OPTIONS.keys()), value="BART (10k-BART)", label="Select Model") is_turkish = gr.Checkbox(label="Report is in Turkish 🇹🇷", value=False) with gr.Row(): input_file = gr.File(label="📤 Upload 10-K JSON", file_types=[".json"]) output_file = gr.File(label="📥 Download Summary JSON") with gr.Row(): summarize_btn = gr.Button(" Generate Summary") with gr.Accordion(" Show Summary Text", open=False): summary_text = gr.Textbox(label="", lines=25, show_label=False) summarize_btn.click( fn=summarize_json, inputs=[input_file, model_selector, is_turkish], outputs=[output_file, summary_text] ) demo.launch()