Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -4,6 +4,7 @@ import mimetypes
|
|
4 |
import pdfminer.high_level
|
5 |
from langdetect import detect
|
6 |
import io
|
|
|
7 |
|
8 |
# Модели тональности
|
9 |
sentiment_models = {
|
@@ -21,29 +22,30 @@ summary_models = {
|
|
21 |
)
|
22 |
}
|
23 |
|
24 |
-
#
|
25 |
-
def get_text(
|
26 |
if text.strip():
|
27 |
return text
|
28 |
-
if
|
29 |
return ""
|
30 |
|
31 |
try:
|
32 |
-
|
33 |
-
mime = mimetypes.guess_type(file.name)[0]
|
34 |
if mime and "pdf" in mime:
|
35 |
-
|
|
|
36 |
else:
|
37 |
-
|
|
|
38 |
except Exception as e:
|
39 |
-
return f"
|
40 |
|
41 |
# Детекция языка
|
42 |
def detect_language_model(text):
|
43 |
lang = detect(text)
|
44 |
return "ru" if lang.startswith("ru") else "en"
|
45 |
|
46 |
-
#
|
47 |
def detect_sentiment(file, text):
|
48 |
content = get_text(file, text).strip()
|
49 |
if not content:
|
@@ -52,7 +54,7 @@ def detect_sentiment(file, text):
|
|
52 |
result = sentiment_models[lang](content)[0]
|
53 |
return f"Тональность: {result['label']}"
|
54 |
|
55 |
-
#
|
56 |
def summarize_text(file, text):
|
57 |
content = get_text(file, text).strip()
|
58 |
if not content:
|
@@ -61,7 +63,7 @@ def summarize_text(file, text):
|
|
61 |
result = summary_models[lang](content, max_length=65, min_length=25, do_sample=False)[0]
|
62 |
return result['summary_text']
|
63 |
|
64 |
-
#
|
65 |
def analyze_all(file, text):
|
66 |
content = get_text(file, text).strip()
|
67 |
if not content:
|
@@ -71,7 +73,7 @@ def analyze_all(file, text):
|
|
71 |
summ = summary_models[lang](content, max_length=65, min_length=25, do_sample=False)[0]['summary_text']
|
72 |
return f"Тональность: {sent}", summ
|
73 |
|
74 |
-
# Очистка
|
75 |
def reset_fields():
|
76 |
return "", None, "", ""
|
77 |
|
@@ -81,7 +83,7 @@ with gr.Blocks(title="ReviewSmart") as demo:
|
|
81 |
|
82 |
with gr.Row():
|
83 |
input_text = gr.Textbox(label="Текст отзыва", lines=8, placeholder="Введите отзыв вручную...")
|
84 |
-
input_file = gr.File(label="Файл (.pdf или .txt)", file_types=[".pdf", ".txt"])
|
85 |
|
86 |
with gr.Row():
|
87 |
btn_sent = gr.Button("Определить тональность")
|
|
|
4 |
import pdfminer.high_level
|
5 |
from langdetect import detect
|
6 |
import io
|
7 |
+
import os
|
8 |
|
9 |
# Модели тональности
|
10 |
sentiment_models = {
|
|
|
22 |
)
|
23 |
}
|
24 |
|
25 |
+
# Универсальное чтение текста
|
26 |
+
def get_text(file_path, text):
|
27 |
if text.strip():
|
28 |
return text
|
29 |
+
if file_path is None:
|
30 |
return ""
|
31 |
|
32 |
try:
|
33 |
+
mime = mimetypes.guess_type(file_path)[0]
|
|
|
34 |
if mime and "pdf" in mime:
|
35 |
+
with open(file_path, "rb") as f:
|
36 |
+
return pdfminer.high_level.extract_text(f)
|
37 |
else:
|
38 |
+
with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
|
39 |
+
return f.read()
|
40 |
except Exception as e:
|
41 |
+
return f"Ошибка чтения файла: {str(e)}"
|
42 |
|
43 |
# Детекция языка
|
44 |
def detect_language_model(text):
|
45 |
lang = detect(text)
|
46 |
return "ru" if lang.startswith("ru") else "en"
|
47 |
|
48 |
+
# Тональность
|
49 |
def detect_sentiment(file, text):
|
50 |
content = get_text(file, text).strip()
|
51 |
if not content:
|
|
|
54 |
result = sentiment_models[lang](content)[0]
|
55 |
return f"Тональность: {result['label']}"
|
56 |
|
57 |
+
# Резюме
|
58 |
def summarize_text(file, text):
|
59 |
content = get_text(file, text).strip()
|
60 |
if not content:
|
|
|
63 |
result = summary_models[lang](content, max_length=65, min_length=25, do_sample=False)[0]
|
64 |
return result['summary_text']
|
65 |
|
66 |
+
# Оба анализа
|
67 |
def analyze_all(file, text):
|
68 |
content = get_text(file, text).strip()
|
69 |
if not content:
|
|
|
73 |
summ = summary_models[lang](content, max_length=65, min_length=25, do_sample=False)[0]['summary_text']
|
74 |
return f"Тональность: {sent}", summ
|
75 |
|
76 |
+
# Очистка
|
77 |
def reset_fields():
|
78 |
return "", None, "", ""
|
79 |
|
|
|
83 |
|
84 |
with gr.Row():
|
85 |
input_text = gr.Textbox(label="Текст отзыва", lines=8, placeholder="Введите отзыв вручную...")
|
86 |
+
input_file = gr.File(label="Файл (.pdf или .txt)", file_types=[".pdf", ".txt"], type="filepath")
|
87 |
|
88 |
with gr.Row():
|
89 |
btn_sent = gr.Button("Определить тональность")
|