legalgeekapp / app.py
albhu's picture
Update app.py
e00a4eb verified
raw
history blame
2.71 kB
import streamlit as st
import requests
import threading
from flask import Flask, request, jsonify
from transformers import AutoTokenizer, AutoModelForQuestionAnswering
import torch
# Modell betöltése
tokenizer = AutoTokenizer.from_pretrained("nlpaueb/legal-bert-base-uncased")
model = AutoModelForQuestionAnswering.from_pretrained("nlpaueb/legal-bert-base-uncased")
# Flask API létrehozása háttérfolyamatként
app = Flask(__name__)
@app.route("/answer", methods=["POST"])
def answer():
data = request.json
context = data.get("context")
question = data.get("question")
# Tokenizálás és válasz előállítás
inputs = tokenizer.encode_plus(question, context, return_tensors="pt")
answer_start_scores, answer_end_scores = model(**inputs).values()
# Legjobb válasz kiválasztása
answer_start = torch.argmax(answer_start_scores)
answer_end = torch.argmax(answer_end_scores) + 1
answer = tokenizer.convert_tokens_to_string(
tokenizer.convert_ids_to_tokens(inputs["input_ids"][0][answer_start:answer_end])
)
return jsonify({"answer": answer})
def run_flask_app():
app.run(port=5000)
# Flask szerver indítása háttérszálon
flask_thread = threading.Thread(target=run_flask_app)
flask_thread.start()
# Streamlit alkalmazás
def split_text_into_chunks(text, max_length=512):
paragraphs = text.split("\n\n")
chunks = []
current_chunk = ""
for paragraph in paragraphs:
if len(current_chunk) + len(paragraph) <= max_length:
current_chunk += paragraph + "\n\n"
else:
chunks.append(current_chunk)
current_chunk = paragraph + "\n\n"
if current_chunk:
chunks.append(current_chunk)
return chunks
st.title("AI Jogi Chatbot")
# Dokumentum feltöltése
uploaded_file = st.file_uploader("Töltsön fel egy dokumentumot", type=["txt", "pdf"])
if uploaded_file:
context = uploaded_file.read().decode("utf-8") # Szöveg kinyerése
chunks = split_text_into_chunks(context)
st.write(f"A dokumentum {len(chunks)} részre bontva.")
# Felhasználói kérdés
question = st.text_input("Írja be a kérdését a dokumentumról:")
if question:
answers = []
for i, chunk in enumerate(chunks):
response = requests.post(
"http://localhost:5000/answer",
json={"context": chunk, "question": question},
)
if response.status_code == 200:
answer = response.json().get("answer")
answers.append(f"Rész {i+1}: {answer}")
st.write("Válaszok:")
for ans in answers:
st.write(ans)