Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,36 +1,36 @@
|
|
1 |
-
# app.py
|
2 |
import gradio as gr
|
3 |
import torch
|
4 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
5 |
|
6 |
-
# --- 1.
|
7 |
model_name = "doganbilir/mt5-Turkish-English-Summarizer"
|
8 |
-
|
9 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
10 |
|
11 |
-
# --- 2.
|
12 |
try:
|
13 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
14 |
model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)
|
15 |
-
print("Model
|
16 |
except Exception as e:
|
17 |
-
print(f"
|
18 |
model = None
|
19 |
|
20 |
-
# --- 3. Define
|
21 |
def summarize(text, language_choice, min_length):
|
22 |
if model is None:
|
23 |
-
return "Model
|
24 |
if not text or not text.strip():
|
25 |
-
return "
|
26 |
-
|
27 |
-
|
|
|
28 |
prompt = f"summarize: {text}"
|
29 |
-
elif language_choice == "
|
30 |
prompt = f"summarize: {text}"
|
31 |
-
else: # TR -> EN (
|
32 |
prompt = f"summarize Turkish to English: {text}"
|
33 |
-
|
34 |
try:
|
35 |
inputs = tokenizer(prompt, return_tensors="pt", max_length=1024, truncation=True).to(device)
|
36 |
|
@@ -39,7 +39,7 @@ def summarize(text, language_choice, min_length):
|
|
39 |
attention_mask=inputs.attention_mask,
|
40 |
max_new_tokens=150,
|
41 |
num_beams=5,
|
42 |
-
min_new_tokens=int(min_length), #
|
43 |
early_stopping=True,
|
44 |
no_repeat_ngram_size=3
|
45 |
)
|
@@ -47,32 +47,34 @@ def summarize(text, language_choice, min_length):
|
|
47 |
summary = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True)[0]
|
48 |
return summary.strip()
|
49 |
except Exception as e:
|
50 |
-
return f"
|
51 |
|
52 |
-
# --- 4. Create
|
53 |
iface = gr.Interface(
|
54 |
fn=summarize,
|
55 |
inputs=[
|
56 |
gr.Textbox(lines=15, placeholder="Paste the text you want to summarize here...", label="Text Input"),
|
57 |
gr.Radio(
|
58 |
["Turkish Text -> Turkish Summary", "English Text -> English Summary", "Turkish Text -> English Summary (Experimental)"],
|
59 |
-
label="
|
60 |
-
value="Turkish Text -> Turkish Summary"
|
61 |
),
|
62 |
gr.Slider(
|
63 |
minimum=10,
|
64 |
maximum=100,
|
65 |
-
value=10, #
|
66 |
step=5,
|
67 |
label="Minimum Summary Length (Tokens)",
|
68 |
info="Increase this value to force the model to generate longer summaries."
|
69 |
)
|
70 |
],
|
71 |
-
outputs=gr.Textbox(lines=5, label="
|
72 |
title="Multilingual Text Summarization Model",
|
73 |
description="""This demo generates headline-style summaries for Turkish or English texts.
|
74 |
-
The model
|
75 |
-
based on the `google/mt5-small` model.""",
|
|
|
|
|
76 |
examples=[
|
77 |
["İstanbul’da açılan yeni bilim merkezi, hem çocuklar hem de yetişkinler için interaktif deneyimler sunuyor. Ziyaretçiler, sanal gerçeklik ve artırılmış gerçeklik teknolojilerini kullanarak uzay, biyoloji ve fizik konularını deneyimleyebiliyor. Merkez, özellikle eğitim kurumlarıyla yaptığı iş birliği sayesinde öğrencilerin bilimsel merakını artırmayı hedefliyor. Açılışından bu yana, binlerce kişi merkezi ziyaret etti ve sosyal medyada paylaşılan videolar büyük ilgi gördü.", "Turkish Text -> Turkish Summary", 10],
|
78 |
["NASA recently announced that the James Webb Space Telescope has captured highly detailed images of distant exoplanets, allowing scientists to analyze their atmospheres in unprecedented detail. The observations reveal clues about planetary composition, weather patterns, and potential habitability. This milestone represents a significant step forward in our understanding of planets outside our solar system, and researchers plan to continue studying these celestial bodies to uncover more about the diversity and nature of exoplanets.", "English Text -> English Summary", 10]
|
|
|
1 |
+
# app.py - Final Version with English Comments and UI Text
|
2 |
import gradio as gr
|
3 |
import torch
|
4 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
5 |
|
6 |
+
# --- 1. Define model and tokenizer paths ---
|
7 |
model_name = "doganbilir/mt5-Turkish-English-Summarizer"
|
|
|
8 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
9 |
|
10 |
+
# --- 2. Load model and tokenizer from the Hub ---
|
11 |
try:
|
12 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
13 |
model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)
|
14 |
+
print("Model and tokenizer loaded successfully!")
|
15 |
except Exception as e:
|
16 |
+
print(f"An error occurred while loading the model: {e}")
|
17 |
model = None
|
18 |
|
19 |
+
# --- 3. Define the summarization function ---
|
20 |
def summarize(text, language_choice, min_length):
|
21 |
if model is None:
|
22 |
+
return "Model could not be loaded. Please check the Space logs."
|
23 |
if not text or not text.strip():
|
24 |
+
return "Please enter text to summarize."
|
25 |
+
|
26 |
+
# Set the prompt based on the language choice
|
27 |
+
if language_choice == "Turkish Text -> Turkish Summary":
|
28 |
prompt = f"summarize: {text}"
|
29 |
+
elif language_choice == "English Text -> English Summary":
|
30 |
prompt = f"summarize: {text}"
|
31 |
+
else: # TR -> EN (Experimental)
|
32 |
prompt = f"summarize Turkish to English: {text}"
|
33 |
+
|
34 |
try:
|
35 |
inputs = tokenizer(prompt, return_tensors="pt", max_length=1024, truncation=True).to(device)
|
36 |
|
|
|
39 |
attention_mask=inputs.attention_mask,
|
40 |
max_new_tokens=150,
|
41 |
num_beams=5,
|
42 |
+
min_new_tokens=int(min_length), # Use the value from the slider
|
43 |
early_stopping=True,
|
44 |
no_repeat_ngram_size=3
|
45 |
)
|
|
|
47 |
summary = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True)[0]
|
48 |
return summary.strip()
|
49 |
except Exception as e:
|
50 |
+
return f"An error occurred during summarization: {str(e)}"
|
51 |
|
52 |
+
# --- 4. Create the Gradio Interface ---
|
53 |
iface = gr.Interface(
|
54 |
fn=summarize,
|
55 |
inputs=[
|
56 |
gr.Textbox(lines=15, placeholder="Paste the text you want to summarize here...", label="Text Input"),
|
57 |
gr.Radio(
|
58 |
["Turkish Text -> Turkish Summary", "English Text -> English Summary", "Turkish Text -> English Summary (Experimental)"],
|
59 |
+
label="Summarization Type",
|
60 |
+
value="Turkish Text -> Turkish Summary"
|
61 |
),
|
62 |
gr.Slider(
|
63 |
minimum=10,
|
64 |
maximum=100,
|
65 |
+
value=10, # Default value for natural, short summaries
|
66 |
step=5,
|
67 |
label="Minimum Summary Length (Tokens)",
|
68 |
info="Increase this value to force the model to generate longer summaries."
|
69 |
)
|
70 |
],
|
71 |
+
outputs=gr.Textbox(lines=5, label="Generated Summary"),
|
72 |
title="Multilingual Text Summarization Model",
|
73 |
description="""This demo generates headline-style summaries for Turkish or English texts.
|
74 |
+
The model was fine-tuned on 30,000 Turkish-English summary pairs using the LoRA technique,
|
75 |
+
based on the `google/mt5-small` model.""",
|
76 |
+
|
77 |
+
# The values in the examples list are now 100% consistent with the Radio button choices.
|
78 |
examples=[
|
79 |
["İstanbul’da açılan yeni bilim merkezi, hem çocuklar hem de yetişkinler için interaktif deneyimler sunuyor. Ziyaretçiler, sanal gerçeklik ve artırılmış gerçeklik teknolojilerini kullanarak uzay, biyoloji ve fizik konularını deneyimleyebiliyor. Merkez, özellikle eğitim kurumlarıyla yaptığı iş birliği sayesinde öğrencilerin bilimsel merakını artırmayı hedefliyor. Açılışından bu yana, binlerce kişi merkezi ziyaret etti ve sosyal medyada paylaşılan videolar büyük ilgi gördü.", "Turkish Text -> Turkish Summary", 10],
|
80 |
["NASA recently announced that the James Webb Space Telescope has captured highly detailed images of distant exoplanets, allowing scientists to analyze their atmospheres in unprecedented detail. The observations reveal clues about planetary composition, weather patterns, and potential habitability. This milestone represents a significant step forward in our understanding of planets outside our solar system, and researchers plan to continue studying these celestial bodies to uncover more about the diversity and nature of exoplanets.", "English Text -> English Summary", 10]
|