Spaces:
Sleeping
Sleeping
File size: 4,171 Bytes
e919b60 185f2a1 e919b60 6493d76 99069d0 e919b60 185f2a1 6493d76 e919b60 185f2a1 6493d76 185f2a1 72ccf2e 185f2a1 6493d76 99069d0 e919b60 99069d0 e919b60 7c9f682 99069d0 7c9f682 99069d0 7c9f682 99069d0 7c9f682 99069d0 6493d76 7c9f682 6493d76 e919b60 99069d0 e919b60 6493d76 e919b60 6493d76 e919b60 99069d0 e919b60 6493d76 e919b60 6493d76 e919b60 99069d0 6493d76 e919b60 6493d76 e919b60 99069d0 e919b60 7c9f682 6493d76 e919b60 6493d76 99069d0 e919b60 6493d76 e919b60 6493d76 e919b60 6493d76 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
import gradio as gr
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
import mimetypes
import pdfminer.high_level
from langdetect import detect
import io
import os
# Модели тональности
sentiment_models = {
"en": pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english"),
"ru": pipeline("sentiment-analysis", model="blanchefort/rubert-base-cased-sentiment")
}
# Модели суммаризации
summary_models = {
"en": pipeline("summarization", model="facebook/bart-large-cnn"),
"ru": pipeline(
"summarization",
model=AutoModelForSeq2SeqLM.from_pretrained("csebuetnlp/mT5_multilingual_XLSum"),
tokenizer=AutoTokenizer.from_pretrained("csebuetnlp/mT5_multilingual_XLSum", use_fast=False)
)
}
# Универсальное чтение текста
def get_text(file_path, text):
if text.strip():
return text
if file_path is None:
return ""
try:
mime = mimetypes.guess_type(file_path)[0]
if mime and "pdf" in mime:
with open(file_path, "rb") as f:
return pdfminer.high_level.extract_text(f)
else:
with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
return f.read()
except Exception as e:
return f"Ошибка чтения файла: {str(e)}"
# Детекция языка
def detect_language_model(text):
lang = detect(text)
return "ru" if lang.startswith("ru") else "en"
# Тональность
def detect_sentiment(file, text):
content = get_text(file, text).strip()
if not content:
return "Введите текст или загрузите файл"
lang = detect_language_model(content)
result = sentiment_models[lang](content)[0]
return f"Тональность: {result['label']}"
# Резюме
def summarize_text(file, text):
content = get_text(file, text).strip()
if not content:
return "Введите текст или загрузите файл"
lang = detect_language_model(content)
result = summary_models[lang](content, max_length=65, min_length=25, do_sample=False)[0]
return result['summary_text']
# Оба анализа
def analyze_all(file, text):
content = get_text(file, text).strip()
if not content:
return "Введите текст или загрузите файл", "Введите текст или загрузите файл"
lang = detect_language_model(content)
sent = sentiment_models[lang](content)[0]['label']
summ = summary_models[lang](content, max_length=65, min_length=25, do_sample=False)[0]['summary_text']
return f"Тональность: {sent}", summ
# Очистка
def reset_fields():
return "", None, "", ""
# Интерфейс
with gr.Blocks(title="ReviewSmart") as demo:
gr.Markdown("## ReviewSmart — анализ отзывов на основе NLP")
with gr.Row():
input_text = gr.Textbox(label="Текст отзыва", lines=8, placeholder="Введите отзыв вручную...")
input_file = gr.File(label="Файл (.pdf или .txt)", file_types=[".pdf", ".txt"], type="filepath")
with gr.Row():
btn_sent = gr.Button("Определить тональность")
btn_sum = gr.Button("Создать резюме")
btn_both = gr.Button("Анализировать оба")
btn_clear = gr.Button("Очистить")
with gr.Row():
sentiment_box = gr.Textbox(label="Результат анализа тональности", lines=2)
summary_box = gr.Textbox(label="Результат резюмирования", lines=4)
btn_sent.click(fn=detect_sentiment, inputs=[input_file, input_text], outputs=sentiment_box)
btn_sum.click(fn=summarize_text, inputs=[input_file, input_text], outputs=summary_box)
btn_both.click(fn=analyze_all, inputs=[input_file, input_text], outputs=[sentiment_box, summary_box])
btn_clear.click(fn=reset_fields, outputs=[input_text, input_file, sentiment_box, summary_box])
demo.launch(share=True, debug=True)
|