Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -2,80 +2,101 @@ import os
|
|
2 |
import tempfile
|
3 |
import streamlit as st
|
4 |
from langchain_community.document_loaders import PyPDFLoader
|
5 |
-
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
6 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
7 |
from langchain_community.vectorstores import FAISS
|
8 |
-
from langchain.chains import RetrievalQA
|
9 |
from langchain_community.chat_models import ChatOpenAI
|
10 |
|
11 |
-
#
|
12 |
-
st.title("
|
13 |
|
14 |
-
|
15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
-
|
18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
os.environ["DEEPSEEK_API_KEY"] = api_key
|
20 |
|
21 |
-
|
22 |
-
|
|
|
23 |
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
|
|
35 |
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
|
40 |
-
|
41 |
-
|
42 |
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
|
47 |
-
|
48 |
-
|
49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
|
51 |
-
|
|
|
|
|
|
|
52 |
except Exception as e:
|
53 |
-
st.error(f"Error
|
54 |
-
st.stop()
|
55 |
-
|
56 |
-
# Step 3: Ask Questions About the Document
|
57 |
-
if st.session_state.vector_store:
|
58 |
-
st.subheader("💬 Chat with Your Document")
|
59 |
-
user_query = st.text_input("Ask a question:")
|
60 |
-
|
61 |
-
if user_query:
|
62 |
-
try:
|
63 |
-
# Set up the RAG pipeline with DeepSeek LLM
|
64 |
-
retriever = st.session_state.vector_store.as_retriever()
|
65 |
-
llm = ChatOpenAI(
|
66 |
-
model="deepseek-chat",
|
67 |
-
openai_api_key=api_key,
|
68 |
-
openai_api_base="https://api.deepseek.com/v1",
|
69 |
-
temperature=0.85,
|
70 |
-
max_tokens=1000 # Adjust token limit for safety
|
71 |
-
)
|
72 |
-
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
|
73 |
-
|
74 |
-
# Generate response
|
75 |
-
with st.spinner("Generating response..."):
|
76 |
-
response = qa_chain.run(user_query)
|
77 |
-
st.write(f"**Answer:** {response}")
|
78 |
-
except Exception as e:
|
79 |
-
st.error(f"Error generating response: {e}")
|
80 |
else:
|
81 |
-
st.
|
|
|
2 |
import tempfile
|
3 |
import streamlit as st
|
4 |
from langchain_community.document_loaders import PyPDFLoader
|
5 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
6 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
7 |
from langchain_community.vectorstores import FAISS
|
8 |
+
from langchain.chains import RetrievalQA
|
9 |
from langchain_community.chat_models import ChatOpenAI
|
10 |
|
11 |
+
# Título de la aplicación
|
12 |
+
st.title("✨ DataKlug ")
|
13 |
|
14 |
+
st.markdown(
|
15 |
+
"""
|
16 |
+
<style>
|
17 |
+
.title {
|
18 |
+
text-align: center;
|
19 |
+
color: #FF4B4B;
|
20 |
+
}
|
21 |
+
</style>
|
22 |
+
""",
|
23 |
+
unsafe_allow_html=True
|
24 |
+
)
|
25 |
|
26 |
+
st.markdown("### Bienvenido a tu asistente DataKlug")
|
27 |
+
st.markdown("Sube tu PDF y conversa con él utilizando inteligencia artificial. ¡Disfruta de la experiencia!")
|
28 |
+
|
29 |
+
# Obtenemos la API Key desde los secretos de Streamlit
|
30 |
+
api_key = st.secrets.get("DEEPSEEK_API_KEY", None)
|
31 |
+
|
32 |
+
# Verificamos si existe la API Key en los secretos
|
33 |
+
if not api_key:
|
34 |
+
st.error("No se encontró la variable `DEEPSEEK_API_KEY` en los secretos de Streamlit. Por favor, configúrala antes de continuar.")
|
35 |
+
st.stop()
|
36 |
+
else:
|
37 |
+
# Almacenamos la API Key en las variables de entorno (opcional)
|
38 |
os.environ["DEEPSEEK_API_KEY"] = api_key
|
39 |
|
40 |
+
# Paso 1: Subir el documento PDF
|
41 |
+
st.markdown("### 1. Sube un documento PDF para analizar")
|
42 |
+
uploaded_file = st.file_uploader("Arrastra o haz clic para subir un PDF", type=["pdf"])
|
43 |
|
44 |
+
# Utilizamos el estado de sesión para mantener el vector_store
|
45 |
+
if "vector_store" not in st.session_state:
|
46 |
+
st.session_state.vector_store = None
|
47 |
|
48 |
+
# Procesamos el PDF al subirlo (solo si no se ha creado antes el vector_store)
|
49 |
+
if uploaded_file and st.session_state.vector_store is None:
|
50 |
+
try:
|
51 |
+
with st.spinner("Procesando tu documento, por favor espera..."):
|
52 |
+
# Guardamos el archivo temporalmente
|
53 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
|
54 |
+
tmp_file.write(uploaded_file.getvalue())
|
55 |
+
tmp_file_path = tmp_file.name
|
56 |
|
57 |
+
# Cargamos el PDF con PyPDFLoader
|
58 |
+
loader = PyPDFLoader(tmp_file_path)
|
59 |
+
documents = loader.load()
|
60 |
|
61 |
+
# Borramos el archivo temporal
|
62 |
+
os.unlink(tmp_file_path)
|
63 |
|
64 |
+
# Dividimos el texto en fragmentos
|
65 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
|
66 |
+
chunks = text_splitter.split_documents(documents)
|
67 |
|
68 |
+
# Generamos embeddings y los almacenamos en una base vectorial
|
69 |
+
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
70 |
+
st.session_state.vector_store = FAISS.from_documents(chunks, embeddings)
|
71 |
+
|
72 |
+
st.success("¡Documento procesado con éxito!")
|
73 |
+
except Exception as e:
|
74 |
+
st.error(f"Error al procesar el documento: {e}")
|
75 |
+
st.stop()
|
76 |
+
|
77 |
+
# Paso 2: Hacer preguntas sobre el documento
|
78 |
+
if st.session_state.vector_store:
|
79 |
+
st.markdown("### 2. Chatea con tu documento")
|
80 |
+
user_query = st.text_input("Escribe tu pregunta aquí:")
|
81 |
+
|
82 |
+
if user_query:
|
83 |
+
try:
|
84 |
+
# Configuramos el proceso de Recuperación + Generación (RAG) con DeepSeek
|
85 |
+
retriever = st.session_state.vector_store.as_retriever()
|
86 |
+
llm = ChatOpenAI(
|
87 |
+
model="deepseek-chat",
|
88 |
+
openai_api_key=api_key,
|
89 |
+
openai_api_base="https://api.deepseek.com/v1",
|
90 |
+
temperature=0.85,
|
91 |
+
max_tokens=1000 # Ajusta este valor según tus necesidades
|
92 |
+
)
|
93 |
+
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
|
94 |
|
95 |
+
# Obtenemos la respuesta
|
96 |
+
with st.spinner("Generando respuesta..."):
|
97 |
+
response = qa_chain.run(user_query)
|
98 |
+
st.write(f"**Respuesta:** {response}")
|
99 |
except Exception as e:
|
100 |
+
st.error(f"Error al generar la respuesta: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
else:
|
102 |
+
st.info("Por favor, sube tu PDF para comenzar.")
|