Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM | |
import mimetypes | |
import pdfminer.high_level | |
from langdetect import detect | |
import io | |
import os | |
# Модели тональности | |
sentiment_models = { | |
"en": pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english"), | |
"ru": pipeline("sentiment-analysis", model="blanchefort/rubert-base-cased-sentiment") | |
} | |
# Модели суммаризации | |
summary_models = { | |
"en": pipeline("summarization", model="facebook/bart-large-cnn"), | |
"ru": pipeline( | |
"summarization", | |
model=AutoModelForSeq2SeqLM.from_pretrained("csebuetnlp/mT5_multilingual_XLSum"), | |
tokenizer=AutoTokenizer.from_pretrained("csebuetnlp/mT5_multilingual_XLSum", use_fast=False) | |
) | |
} | |
# Универсальное чтение текста | |
def get_text(file_path, text): | |
if text.strip(): | |
return text | |
if file_path is None: | |
return "" | |
try: | |
mime = mimetypes.guess_type(file_path)[0] | |
if mime and "pdf" in mime: | |
with open(file_path, "rb") as f: | |
return pdfminer.high_level.extract_text(f) | |
else: | |
with open(file_path, "r", encoding="utf-8", errors="ignore") as f: | |
return f.read() | |
except Exception as e: | |
return f"Ошибка чтения файла: {str(e)}" | |
# Детекция языка | |
def detect_language_model(text): | |
lang = detect(text) | |
return "ru" if lang.startswith("ru") else "en" | |
# Тональность | |
def detect_sentiment(file, text): | |
content = get_text(file, text).strip() | |
if not content: | |
return "Введите текст или загрузите файл" | |
lang = detect_language_model(content) | |
result = sentiment_models[lang](content)[0] | |
return f"Тональность: {result['label']}" | |
# Резюме | |
def summarize_text(file, text): | |
content = get_text(file, text).strip() | |
if not content: | |
return "Введите текст или загрузите файл" | |
lang = detect_language_model(content) | |
result = summary_models[lang](content, max_length=65, min_length=25, do_sample=False)[0] | |
return result['summary_text'] | |
# Оба анализа | |
def analyze_all(file, text): | |
content = get_text(file, text).strip() | |
if not content: | |
return "Введите текст или загрузите файл", "Введите текст или загрузите файл" | |
lang = detect_language_model(content) | |
sent = sentiment_models[lang](content)[0]['label'] | |
summ = summary_models[lang](content, max_length=65, min_length=25, do_sample=False)[0]['summary_text'] | |
return f"Тональность: {sent}", summ | |
# Очистка | |
def reset_fields(): | |
return "", None, "", "" | |
# Интерфейс | |
with gr.Blocks(title="ReviewSmart") as demo: | |
gr.Markdown("## ReviewSmart — анализ отзывов на основе NLP") | |
with gr.Row(): | |
input_text = gr.Textbox(label="Текст отзыва", lines=8, placeholder="Введите отзыв вручную...") | |
input_file = gr.File(label="Файл (.pdf или .txt)", file_types=[".pdf", ".txt"], type="filepath") | |
with gr.Row(): | |
btn_sent = gr.Button("Определить тональность") | |
btn_sum = gr.Button("Создать резюме") | |
btn_both = gr.Button("Анализировать оба") | |
btn_clear = gr.Button("Очистить") | |
with gr.Row(): | |
sentiment_box = gr.Textbox(label="Результат анализа тональности", lines=2) | |
summary_box = gr.Textbox(label="Результат резюмирования", lines=4) | |
btn_sent.click(fn=detect_sentiment, inputs=[input_file, input_text], outputs=sentiment_box) | |
btn_sum.click(fn=summarize_text, inputs=[input_file, input_text], outputs=summary_box) | |
btn_both.click(fn=analyze_all, inputs=[input_file, input_text], outputs=[sentiment_box, summary_box]) | |
btn_clear.click(fn=reset_fields, outputs=[input_text, input_file, sentiment_box, summary_box]) | |
demo.launch(share=True, debug=True) | |