Spaces:

Yazi3333
/

reviewsmart

Sleeping

File size: 4,171 Bytes

import gradio as gr
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
import mimetypes
import pdfminer.high_level
from langdetect import detect
import io
import os

# Модели тональности
sentiment_models = {
    "en": pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english"),
    "ru": pipeline("sentiment-analysis", model="blanchefort/rubert-base-cased-sentiment")
}

# Модели суммаризации
summary_models = {
    "en": pipeline("summarization", model="facebook/bart-large-cnn"),
    "ru": pipeline(
        "summarization",
        model=AutoModelForSeq2SeqLM.from_pretrained("csebuetnlp/mT5_multilingual_XLSum"),
        tokenizer=AutoTokenizer.from_pretrained("csebuetnlp/mT5_multilingual_XLSum", use_fast=False)
    )
}

# Универсальное чтение текста
def get_text(file_path, text):
    if text.strip():
        return text
    if file_path is None:
        return ""

    try:
        mime = mimetypes.guess_type(file_path)[0]
        if mime and "pdf" in mime:
            with open(file_path, "rb") as f:
                return pdfminer.high_level.extract_text(f)
        else:
            with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
                return f.read()
    except Exception as e:
        return f"Ошибка чтения файла: {str(e)}"

# Детекция языка
def detect_language_model(text):
    lang = detect(text)
    return "ru" if lang.startswith("ru") else "en"

# Тональность
def detect_sentiment(file, text):
    content = get_text(file, text).strip()
    if not content:
        return "Введите текст или загрузите файл"
    lang = detect_language_model(content)
    result = sentiment_models[lang](content)[0]
    return f"Тональность: {result['label']}"

# Резюме
def summarize_text(file, text):
    content = get_text(file, text).strip()
    if not content:
        return "Введите текст или загрузите файл"
    lang = detect_language_model(content)
    result = summary_models[lang](content, max_length=65, min_length=25, do_sample=False)[0]
    return result['summary_text']

# Оба анализа
def analyze_all(file, text):
    content = get_text(file, text).strip()
    if not content:
        return "Введите текст или загрузите файл", "Введите текст или загрузите файл"
    lang = detect_language_model(content)
    sent = sentiment_models[lang](content)[0]['label']
    summ = summary_models[lang](content, max_length=65, min_length=25, do_sample=False)[0]['summary_text']
    return f"Тональность: {sent}", summ

# Очистка
def reset_fields():
    return "", None, "", ""

# Интерфейс
with gr.Blocks(title="ReviewSmart") as demo:
    gr.Markdown("## ReviewSmart — анализ отзывов на основе NLP")

    with gr.Row():
        input_text = gr.Textbox(label="Текст отзыва", lines=8, placeholder="Введите отзыв вручную...")
        input_file = gr.File(label="Файл (.pdf или .txt)", file_types=[".pdf", ".txt"], type="filepath")

    with gr.Row():
        btn_sent = gr.Button("Определить тональность")
        btn_sum = gr.Button("Создать резюме")
        btn_both = gr.Button("Анализировать оба")
        btn_clear = gr.Button("Очистить")

    with gr.Row():
        sentiment_box = gr.Textbox(label="Результат анализа тональности", lines=2)
        summary_box = gr.Textbox(label="Результат резюмирования", lines=4)

    btn_sent.click(fn=detect_sentiment, inputs=[input_file, input_text], outputs=sentiment_box)
    btn_sum.click(fn=summarize_text, inputs=[input_file, input_text], outputs=summary_box)
    btn_both.click(fn=analyze_all, inputs=[input_file, input_text], outputs=[sentiment_box, summary_box])
    btn_clear.click(fn=reset_fields, outputs=[input_text, input_file, sentiment_box, summary_box])

demo.launch(share=True, debug=True)