Spaces:
Sleeping
Sleeping
import streamlit as st | |
import requests | |
import threading | |
from flask import Flask, request, jsonify | |
from transformers import AutoTokenizer, AutoModelForQuestionAnswering | |
import torch | |
# Modell betöltése | |
tokenizer = AutoTokenizer.from_pretrained("nlpaueb/legal-bert-base-uncased") | |
model = AutoModelForQuestionAnswering.from_pretrained("nlpaueb/legal-bert-base-uncased") | |
# Flask API létrehozása háttérfolyamatként | |
app = Flask(__name__) | |
def answer(): | |
data = request.json | |
context = data.get("context") | |
question = data.get("question") | |
# Tokenizálás és válasz előállítás | |
inputs = tokenizer.encode_plus(question, context, return_tensors="pt") | |
answer_start_scores, answer_end_scores = model(**inputs).values() | |
# Legjobb válasz kiválasztása | |
answer_start = torch.argmax(answer_start_scores) | |
answer_end = torch.argmax(answer_end_scores) + 1 | |
answer = tokenizer.convert_tokens_to_string( | |
tokenizer.convert_ids_to_tokens(inputs["input_ids"][0][answer_start:answer_end]) | |
) | |
return jsonify({"answer": answer}) | |
def run_flask_app(): | |
app.run(port=5000) | |
# Flask szerver indítása háttérszálon | |
flask_thread = threading.Thread(target=run_flask_app) | |
flask_thread.start() | |
# Streamlit alkalmazás | |
def split_text_into_chunks(text, max_length=512): | |
paragraphs = text.split("\n\n") | |
chunks = [] | |
current_chunk = "" | |
for paragraph in paragraphs: | |
if len(current_chunk) + len(paragraph) <= max_length: | |
current_chunk += paragraph + "\n\n" | |
else: | |
chunks.append(current_chunk) | |
current_chunk = paragraph + "\n\n" | |
if current_chunk: | |
chunks.append(current_chunk) | |
return chunks | |
st.title("AI Jogi Chatbot") | |
# Dokumentum feltöltése | |
uploaded_file = st.file_uploader("Töltsön fel egy dokumentumot", type=["txt", "pdf"]) | |
if uploaded_file: | |
context = uploaded_file.read().decode("utf-8") # Szöveg kinyerése | |
chunks = split_text_into_chunks(context) | |
st.write(f"A dokumentum {len(chunks)} részre bontva.") | |
# Felhasználói kérdés | |
question = st.text_input("Írja be a kérdését a dokumentumról:") | |
if question: | |
answers = [] | |
for i, chunk in enumerate(chunks): | |
response = requests.post( | |
"http://localhost:5000/answer", | |
json={"context": chunk, "question": question}, | |
) | |
if response.status_code == 200: | |
answer = response.json().get("answer") | |
answers.append(f"Rész {i+1}: {answer}") | |
st.write("Válaszok:") | |
for ans in answers: | |
st.write(ans) | |