import streamlit as st import requests import threading from flask import Flask, request, jsonify from transformers import AutoTokenizer, AutoModelForQuestionAnswering import torch # Modell betöltése tokenizer = AutoTokenizer.from_pretrained("nlpaueb/legal-bert-base-uncased") model = AutoModelForQuestionAnswering.from_pretrained("nlpaueb/legal-bert-base-uncased") # Flask API létrehozása háttérfolyamatként app = Flask(__name__) @app.route("/answer", methods=["POST"]) def answer(): data = request.json context = data.get("context") question = data.get("question") # Tokenizálás és válasz előállítás inputs = tokenizer.encode_plus(question, context, return_tensors="pt") answer_start_scores, answer_end_scores = model(**inputs).values() # Legjobb válasz kiválasztása answer_start = torch.argmax(answer_start_scores) answer_end = torch.argmax(answer_end_scores) + 1 answer = tokenizer.convert_tokens_to_string( tokenizer.convert_ids_to_tokens(inputs["input_ids"][0][answer_start:answer_end]) ) return jsonify({"answer": answer}) def run_flask_app(): app.run(port=5000) # Flask szerver indítása háttérszálon flask_thread = threading.Thread(target=run_flask_app) flask_thread.start() # Streamlit alkalmazás def split_text_into_chunks(text, max_length=512): paragraphs = text.split("\n\n") chunks = [] current_chunk = "" for paragraph in paragraphs: if len(current_chunk) + len(paragraph) <= max_length: current_chunk += paragraph + "\n\n" else: chunks.append(current_chunk) current_chunk = paragraph + "\n\n" if current_chunk: chunks.append(current_chunk) return chunks st.title("AI Jogi Chatbot") # Dokumentum feltöltése uploaded_file = st.file_uploader("Töltsön fel egy dokumentumot", type=["txt", "pdf"]) if uploaded_file: context = uploaded_file.read().decode("utf-8") # Szöveg kinyerése chunks = split_text_into_chunks(context) st.write(f"A dokumentum {len(chunks)} részre bontva.") # Felhasználói kérdés question = st.text_input("Írja be a kérdését a dokumentumról:") if question: answers = [] for i, chunk in enumerate(chunks): response = requests.post( "http://localhost:5000/answer", json={"context": chunk, "question": question}, ) if response.status_code == 200: answer = response.json().get("answer") answers.append(f"Rész {i+1}: {answer}") st.write("Válaszok:") for ans in answers: st.write(ans)