Spaces:

Yazi3333
/

reviewsmart

Sleeping

App Files Files Community

reviewsmart / app.py

Yazi3333

Update app.py

72ccf2e verified about 2 months ago

raw

history blame contribute delete

4.17 kB

	import gradio as gr
	from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
	import mimetypes
	import pdfminer.high_level
	from langdetect import detect
	import io
	import os

	# Модели тональности
	sentiment_models = {
	"en": pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english"),
	"ru": pipeline("sentiment-analysis", model="blanchefort/rubert-base-cased-sentiment")
	}

	# Модели суммаризации
	summary_models = {
	"en": pipeline("summarization", model="facebook/bart-large-cnn"),
	"ru": pipeline(
	"summarization",
	model=AutoModelForSeq2SeqLM.from_pretrained("csebuetnlp/mT5_multilingual_XLSum"),
	tokenizer=AutoTokenizer.from_pretrained("csebuetnlp/mT5_multilingual_XLSum", use_fast=False)
	)
	}

	# Универсальное чтение текста
	def get_text(file_path, text):
	if text.strip():
	return text
	if file_path is None:
	return ""

	try:
	mime = mimetypes.guess_type(file_path)[0]
	if mime and "pdf" in mime:
	with open(file_path, "rb") as f:
	return pdfminer.high_level.extract_text(f)
	else:
	with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
	return f.read()
	except Exception as e:
	return f"Ошибка чтения файла: {str(e)}"

	# Детекция языка
	def detect_language_model(text):
	lang = detect(text)
	return "ru" if lang.startswith("ru") else "en"

	# Тональность
	def detect_sentiment(file, text):
	content = get_text(file, text).strip()
	if not content:
	return "Введите текст или загрузите файл"
	lang = detect_language_model(content)
	result = sentiment_models[lang](content)[0]
	return f"Тональность: {result['label']}"

	# Резюме
	def summarize_text(file, text):
	content = get_text(file, text).strip()
	if not content:
	return "Введите текст или загрузите файл"
	lang = detect_language_model(content)
	result = summary_models[lang](content, max_length=65, min_length=25, do_sample=False)[0]
	return result['summary_text']

	# Оба анализа
	def analyze_all(file, text):
	content = get_text(file, text).strip()
	if not content:
	return "Введите текст или загрузите файл", "Введите текст или загрузите файл"
	lang = detect_language_model(content)
	sent = sentiment_models[lang](content)[0]['label']
	summ = summary_models[lang](content, max_length=65, min_length=25, do_sample=False)[0]['summary_text']
	return f"Тональность: {sent}", summ

	# Очистка
	def reset_fields():
	return "", None, "", ""

	# Интерфейс
	with gr.Blocks(title="ReviewSmart") as demo:
	gr.Markdown("## ReviewSmart — анализ отзывов на основе NLP")

	with gr.Row():
	input_text = gr.Textbox(label="Текст отзыва", lines=8, placeholder="Введите отзыв вручную...")
	input_file = gr.File(label="Файл (.pdf или .txt)", file_types=[".pdf", ".txt"], type="filepath")

	with gr.Row():
	btn_sent = gr.Button("Определить тональность")
	btn_sum = gr.Button("Создать резюме")
	btn_both = gr.Button("Анализировать оба")
	btn_clear = gr.Button("Очистить")

	with gr.Row():
	sentiment_box = gr.Textbox(label="Результат анализа тональности", lines=2)
	summary_box = gr.Textbox(label="Результат резюмирования", lines=4)

	btn_sent.click(fn=detect_sentiment, inputs=[input_file, input_text], outputs=sentiment_box)
	btn_sum.click(fn=summarize_text, inputs=[input_file, input_text], outputs=summary_box)
	btn_both.click(fn=analyze_all, inputs=[input_file, input_text], outputs=[sentiment_box, summary_box])
	btn_clear.click(fn=reset_fields, outputs=[input_text, input_file, sentiment_box, summary_box])

	demo.launch(share=True, debug=True)