Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -21,6 +21,7 @@ MODEL_OPTIONS = {
|
|
21 |
# Hedeflenen bölümler
|
22 |
target_items = ["item_1", "item_1A", "item_7", "item_8", "item_9A"]
|
23 |
|
|
|
24 |
def fix_company_names(text, correct_name="Quotient Technology Inc."):
|
25 |
wrong_names = [
|
26 |
"Bridgeline", "Guidewire", "AppFolio", "GTY",
|
@@ -30,11 +31,13 @@ def fix_company_names(text, correct_name="Quotient Technology Inc."):
|
|
30 |
text = text.replace(wrong, correct_name)
|
31 |
return text
|
32 |
|
|
|
33 |
def safe_trim(text, tokenizer, max_tokens):
|
34 |
input_ids = tokenizer.encode(text, truncation=False)
|
35 |
input_ids = input_ids[:max_tokens]
|
36 |
return tokenizer.decode(input_ids, skip_special_tokens=True)
|
37 |
|
|
|
38 |
def summarize_json(json_file, selected_model, is_turkish):
|
39 |
model_cfg = MODEL_OPTIONS[selected_model]
|
40 |
model_id = model_cfg["model_id"]
|
@@ -66,40 +69,21 @@ def summarize_json(json_file, selected_model, is_turkish):
|
|
66 |
eng_summary = summarizer(trimmed, max_length=max_len, min_length=min_len, do_sample=False)[0]["summary_text"]
|
67 |
|
68 |
final = en2tr(eng_summary)[0]["translation_text"] if is_turkish else eng_summary
|
|
|
|
|
69 |
summary_output[item] = fix_company_names(final)
|
70 |
|
71 |
output_path = "summary_output.json"
|
72 |
with open(output_path, "w", encoding="utf-8") as f:
|
73 |
json.dump({"summary": summary_output}, f, indent=2)
|
74 |
|
75 |
-
readable_text = f"
|
76 |
readable_text += "\n\n".join([f"### {k.upper()}\n{v}" for k, v in summary_output.items()])
|
77 |
return output_path, readable_text
|
78 |
|
79 |
# Gradio Arayüz
|
80 |
with gr.Blocks(title="10-K Summary Generator") as demo:
|
81 |
-
gr.HTML(""
|
82 |
-
<style>
|
83 |
-
body {
|
84 |
-
background-color: #f9f9f9;
|
85 |
-
color: #2c3e50;
|
86 |
-
}
|
87 |
-
h2 {
|
88 |
-
text-align: center;
|
89 |
-
color: #2c3e50;
|
90 |
-
font-family: Arial, sans-serif;
|
91 |
-
}
|
92 |
-
.gr-button {
|
93 |
-
background-color: #f39c12 !important;
|
94 |
-
color: white !important;
|
95 |
-
font-weight: bold;
|
96 |
-
}
|
97 |
-
.gr-button:hover {
|
98 |
-
background-color: #e67e22 !important;
|
99 |
-
}
|
100 |
-
</style>
|
101 |
-
<h2> 10-K Financial Report Summarizer</h2>
|
102 |
-
""")
|
103 |
|
104 |
with gr.Row():
|
105 |
model_selector = gr.Dropdown(choices=list(MODEL_OPTIONS.keys()), value="BART (10k-BART)", label="Select Model")
|
@@ -112,7 +96,7 @@ with gr.Blocks(title="10-K Summary Generator") as demo:
|
|
112 |
with gr.Row():
|
113 |
summarize_btn = gr.Button(" Generate Summary")
|
114 |
|
115 |
-
with gr.Accordion("
|
116 |
summary_text = gr.Textbox(label="", lines=25, show_label=False)
|
117 |
|
118 |
summarize_btn.click(
|
|
|
21 |
# Hedeflenen bölümler
|
22 |
target_items = ["item_1", "item_1A", "item_7", "item_8", "item_9A"]
|
23 |
|
24 |
+
# Yanlış şirket adlarını düzeltme fonksiyonu
|
25 |
def fix_company_names(text, correct_name="Quotient Technology Inc."):
|
26 |
wrong_names = [
|
27 |
"Bridgeline", "Guidewire", "AppFolio", "GTY",
|
|
|
31 |
text = text.replace(wrong, correct_name)
|
32 |
return text
|
33 |
|
34 |
+
# Token sayısına göre metni kırpma
|
35 |
def safe_trim(text, tokenizer, max_tokens):
|
36 |
input_ids = tokenizer.encode(text, truncation=False)
|
37 |
input_ids = input_ids[:max_tokens]
|
38 |
return tokenizer.decode(input_ids, skip_special_tokens=True)
|
39 |
|
40 |
+
# Özetleme işlemi
|
41 |
def summarize_json(json_file, selected_model, is_turkish):
|
42 |
model_cfg = MODEL_OPTIONS[selected_model]
|
43 |
model_id = model_cfg["model_id"]
|
|
|
69 |
eng_summary = summarizer(trimmed, max_length=max_len, min_length=min_len, do_sample=False)[0]["summary_text"]
|
70 |
|
71 |
final = en2tr(eng_summary)[0]["translation_text"] if is_turkish else eng_summary
|
72 |
+
|
73 |
+
# Şirket adı düzeltmesi burada uygulanır
|
74 |
summary_output[item] = fix_company_names(final)
|
75 |
|
76 |
output_path = "summary_output.json"
|
77 |
with open(output_path, "w", encoding="utf-8") as f:
|
78 |
json.dump({"summary": summary_output}, f, indent=2)
|
79 |
|
80 |
+
readable_text = f" Summary generated by: {selected_model}\n\n"
|
81 |
readable_text += "\n\n".join([f"### {k.upper()}\n{v}" for k, v in summary_output.items()])
|
82 |
return output_path, readable_text
|
83 |
|
84 |
# Gradio Arayüz
|
85 |
with gr.Blocks(title="10-K Summary Generator") as demo:
|
86 |
+
gr.HTML("<h2 style='text-align: center;'> 10-K Financial Report Summarizer</h2>")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
|
88 |
with gr.Row():
|
89 |
model_selector = gr.Dropdown(choices=list(MODEL_OPTIONS.keys()), value="BART (10k-BART)", label="Select Model")
|
|
|
96 |
with gr.Row():
|
97 |
summarize_btn = gr.Button(" Generate Summary")
|
98 |
|
99 |
+
with gr.Accordion(" Show Summary Text", open=False):
|
100 |
summary_text = gr.Textbox(label="", lines=25, show_label=False)
|
101 |
|
102 |
summarize_btn.click(
|