Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,55 +1,84 @@
|
|
|
|
|
|
1 |
import gradio as gr
|
|
|
2 |
import mimetypes
|
3 |
import pdfminer.high_level
|
4 |
-
from
|
|
|
5 |
|
6 |
-
|
7 |
-
|
|
|
|
|
|
|
8 |
|
9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
if text.strip():
|
11 |
return text
|
12 |
if file is None:
|
13 |
return ""
|
|
|
14 |
mime = mimetypes.guess_type(file.name)[0]
|
15 |
-
binary = file.read()
|
16 |
if mime and "pdf" in mime:
|
17 |
-
return pdfminer.high_level.extract_text(file)
|
18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
|
|
20 |
def detect_sentiment(file, text):
|
21 |
-
content =
|
22 |
if not content:
|
23 |
-
return "
|
24 |
-
|
|
|
|
|
25 |
|
|
|
26 |
def summarize_text(file, text):
|
27 |
-
content =
|
28 |
if not content:
|
29 |
-
return "
|
30 |
-
|
|
|
|
|
31 |
|
32 |
-
|
33 |
-
|
|
|
34 |
if not content:
|
35 |
-
return "
|
36 |
-
|
37 |
-
|
38 |
-
|
|
|
39 |
|
|
|
40 |
def reset_fields():
|
41 |
return "", None, "", ""
|
42 |
|
43 |
-
|
44 |
-
|
|
|
45 |
|
46 |
with gr.Row():
|
47 |
-
input_text = gr.Textbox(label="Текст отзыва", lines=8, placeholder="Введите
|
48 |
-
input_file = gr.File(label="Файл (.pdf
|
49 |
|
50 |
with gr.Row():
|
51 |
-
|
52 |
-
|
53 |
btn_both = gr.Button("Анализировать оба")
|
54 |
btn_clear = gr.Button("Очистить")
|
55 |
|
@@ -57,9 +86,9 @@ with gr.Blocks() as demo:
|
|
57 |
sentiment_box = gr.Textbox(label="Результат анализа тональности", lines=2)
|
58 |
summary_box = gr.Textbox(label="Результат резюмирования", lines=4)
|
59 |
|
60 |
-
|
61 |
-
|
62 |
-
btn_both.click(fn=
|
63 |
btn_clear.click(fn=reset_fields, outputs=[input_text, input_file, sentiment_box, summary_box])
|
64 |
|
65 |
-
demo.launch()
|
|
|
1 |
+
!pip install -q transformers gradio pdfminer.six langdetect
|
2 |
+
|
3 |
import gradio as gr
|
4 |
+
from transformers import pipeline
|
5 |
import mimetypes
|
6 |
import pdfminer.high_level
|
7 |
+
from langdetect import detect
|
8 |
+
import io
|
9 |
|
10 |
+
# Модели
|
11 |
+
sentiment_models = {
|
12 |
+
"en": pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english"),
|
13 |
+
"ru": pipeline("sentiment-analysis", model="blanchefort/rubert-base-cased-sentiment")
|
14 |
+
}
|
15 |
|
16 |
+
summary_models = {
|
17 |
+
"en": pipeline("summarization", model="facebook/bart-large-cnn"),
|
18 |
+
"ru": pipeline("summarization", model="csebuetnlp/mT5_multilingual_XLSum")
|
19 |
+
}
|
20 |
+
|
21 |
+
# Чтение текста из текстового поля или файла
|
22 |
+
def get_text(file, text):
|
23 |
if text.strip():
|
24 |
return text
|
25 |
if file is None:
|
26 |
return ""
|
27 |
+
|
28 |
mime = mimetypes.guess_type(file.name)[0]
|
|
|
29 |
if mime and "pdf" in mime:
|
30 |
+
return pdfminer.high_level.extract_text(io.BytesIO(file.read()))
|
31 |
+
else:
|
32 |
+
return file.read().decode("utf-8", errors="ignore")
|
33 |
+
|
34 |
+
# Язык - модели
|
35 |
+
def detect_language_model(text):
|
36 |
+
lang = detect(text)
|
37 |
+
return "ru" if lang.startswith("ru") else "en"
|
38 |
|
39 |
+
# Анализ тональности
|
40 |
def detect_sentiment(file, text):
|
41 |
+
content = get_text(file, text).strip()
|
42 |
if not content:
|
43 |
+
return "Введите текст или загрузите файл"
|
44 |
+
lang = detect_language_model(content)
|
45 |
+
result = sentiment_models[lang](content)[0]
|
46 |
+
return f"Тональность: {result['label']}"
|
47 |
|
48 |
+
# Суммаризация
|
49 |
def summarize_text(file, text):
|
50 |
+
content = get_text(file, text).strip()
|
51 |
if not content:
|
52 |
+
return "Введите текст или загрузите файл"
|
53 |
+
lang = detect_language_model(content)
|
54 |
+
result = summary_models[lang](content, max_length=65, min_length=25, do_sample=False)[0]
|
55 |
+
return result['summary_text']
|
56 |
|
57 |
+
# Оба анализа
|
58 |
+
def analyze_all(file, text):
|
59 |
+
content = get_text(file, text).strip()
|
60 |
if not content:
|
61 |
+
return "Введите текст или загрузите файл", "Введите текст или загрузите файл"
|
62 |
+
lang = detect_language_model(content)
|
63 |
+
sent = sentiment_models[lang](content)[0]['label']
|
64 |
+
summ = summary_models[lang](content, max_length=65, min_length=25, do_sample=False)[0]['summary_text']
|
65 |
+
return f"Тональность: {sent}", summ
|
66 |
|
67 |
+
# Очистка
|
68 |
def reset_fields():
|
69 |
return "", None, "", ""
|
70 |
|
71 |
+
# Интерфейс
|
72 |
+
with gr.Blocks(title="ReviewSmart") as demo:
|
73 |
+
gr.Markdown("## ReviewSmart — анализ отзывов на основе NLP")
|
74 |
|
75 |
with gr.Row():
|
76 |
+
input_text = gr.Textbox(label="Текст отзыва", lines=8, placeholder="Введите отзыв вручную...")
|
77 |
+
input_file = gr.File(label="Файл (.pdf или .txt)", file_types=[".pdf", ".txt"])
|
78 |
|
79 |
with gr.Row():
|
80 |
+
btn_sent = gr.Button("Определить тональность")
|
81 |
+
btn_sum = gr.Button("Создать резюме")
|
82 |
btn_both = gr.Button("Анализировать оба")
|
83 |
btn_clear = gr.Button("Очистить")
|
84 |
|
|
|
86 |
sentiment_box = gr.Textbox(label="Результат анализа тональности", lines=2)
|
87 |
summary_box = gr.Textbox(label="Результат резюмирования", lines=4)
|
88 |
|
89 |
+
btn_sent.click(fn=detect_sentiment, inputs=[input_file, input_text], outputs=sentiment_box)
|
90 |
+
btn_sum.click(fn=summarize_text, inputs=[input_file, input_text], outputs=summary_box)
|
91 |
+
btn_both.click(fn=analyze_all, inputs=[input_file, input_text], outputs=[sentiment_box, summary_box])
|
92 |
btn_clear.click(fn=reset_fields, outputs=[input_text, input_file, sentiment_box, summary_box])
|
93 |
|
94 |
+
demo.launch(share=True, debug=True)
|