Spaces:

jeysshon
/

deepseek_chat_pdf

Runtime error

App Files Files Community

jeysshon commited on Feb 26

Commit

11dc91a

verified ·

1 Parent(s): 1af833b

Update app.py

Browse files

Files changed (1) hide show

app.py +81 -60

app.py CHANGED Viewed

@@ -2,80 +2,101 @@ import os
 import tempfile
 import streamlit as st
 from langchain_community.document_loaders import PyPDFLoader
-from langchain.text_splitter import RecursiveCharacterTextSplitter  # Correct import
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain_community.vectorstores import FAISS
-from langchain.chains import RetrievalQA  # Correct import
 from langchain_community.chat_models import ChatOpenAI
-# Streamlit App Title
-st.title("📄 DeepSeek-Powered RAG Chatbot")
-# Step 1: Input API Key
-api_key = st.text_input("🔑 Enter your DeepSeek API Key:", type="password")
-if api_key:
-    # Set the API key as an environment variable (optional)
     os.environ["DEEPSEEK_API_KEY"] = api_key
-    # Step 2: Upload PDF Document
-    uploaded_file = st.file_uploader("📂 Upload a PDF document", type=["pdf"])
-    # Use session state to persist the vector_store
-    if "vector_store" not in st.session_state:
-        st.session_state.vector_store = None
-    if uploaded_file and st.session_state.vector_store is None:
-        try:
-            with st.spinner("Processing document..."):
-                # Save the uploaded file temporarily
-                with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
-                    tmp_file.write(uploaded_file.getvalue())
-                    tmp_file_path = tmp_file.name
-                # Use the temporary file path with PyPDFLoader
-                loader = PyPDFLoader(tmp_file_path)
-                documents = loader.load()
-                # Remove the temporary file
-                os.unlink(tmp_file_path)
-                # Split the document into chunks
-                text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
-                chunks = text_splitter.split_documents(documents)
-                # Generate embeddings and store them in a vector database
-                embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
-                st.session_state.vector_store = FAISS.from_documents(chunks, embeddings)
-            st.success("Document processed successfully!")
         except Exception as e:
-            st.error(f"Error processing document: {e}")
-            st.stop()
-    # Step 3: Ask Questions About the Document
-    if st.session_state.vector_store:
-        st.subheader("💬 Chat with Your Document")
-        user_query = st.text_input("Ask a question:")
-        if user_query:
-            try:
-                # Set up the RAG pipeline with DeepSeek LLM
-                retriever = st.session_state.vector_store.as_retriever()
-                llm = ChatOpenAI(
-                    model="deepseek-chat",
-                    openai_api_key=api_key,
-                    openai_api_base="https://api.deepseek.com/v1",
-                    temperature=0.85,
-                    max_tokens=1000  # Adjust token limit for safety
-                )
-                qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
-                # Generate response
-                with st.spinner("Generating response..."):
-                    response = qa_chain.run(user_query)
-                    st.write(f"**Answer:** {response}")
-            except Exception as e:
-                st.error(f"Error generating response: {e}")
 else:
-    st.warning("Please enter your DeepSeek API key to proceed.")

 import tempfile
 import streamlit as st
 from langchain_community.document_loaders import PyPDFLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain_community.vectorstores import FAISS
+from langchain.chains import RetrievalQA
 from langchain_community.chat_models import ChatOpenAI
+# Título de la aplicación
+st.title("✨ DataKlug ")
+st.markdown(
+    """
+    <style>
+    .title {
+        text-align: center;
+        color: #FF4B4B;
+    }
+    </style>
+    """,
+    unsafe_allow_html=True
+)
+st.markdown("### Bienvenido a tu asistente DataKlug")
+st.markdown("Sube tu PDF y conversa con él utilizando inteligencia artificial. ¡Disfruta de la experiencia!")
+# Obtenemos la API Key desde los secretos de Streamlit
+api_key = st.secrets.get("DEEPSEEK_API_KEY", None)
+# Verificamos si existe la API Key en los secretos
+if not api_key:
+    st.error("No se encontró la variable `DEEPSEEK_API_KEY` en los secretos de Streamlit. Por favor, configúrala antes de continuar.")
+    st.stop()
+else:
+    # Almacenamos la API Key en las variables de entorno (opcional)
     os.environ["DEEPSEEK_API_KEY"] = api_key
+# Paso 1: Subir el documento PDF
+st.markdown("### 1. Sube un documento PDF para analizar")
+uploaded_file = st.file_uploader("Arrastra o haz clic para subir un PDF", type=["pdf"])
+# Utilizamos el estado de sesión para mantener el vector_store
+if "vector_store" not in st.session_state:
+    st.session_state.vector_store = None
+# Procesamos el PDF al subirlo (solo si no se ha creado antes el vector_store)
+if uploaded_file and st.session_state.vector_store is None:
+    try:
+        with st.spinner("Procesando tu documento, por favor espera..."):
+            # Guardamos el archivo temporalmente
+            with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
+                tmp_file.write(uploaded_file.getvalue())
+                tmp_file_path = tmp_file.name
+            # Cargamos el PDF con PyPDFLoader
+            loader = PyPDFLoader(tmp_file_path)
+            documents = loader.load()
+            # Borramos el archivo temporal
+            os.unlink(tmp_file_path)
+            # Dividimos el texto en fragmentos
+            text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
+            chunks = text_splitter.split_documents(documents)
+            # Generamos embeddings y los almacenamos en una base vectorial
+            embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+            st.session_state.vector_store = FAISS.from_documents(chunks, embeddings)
+        st.success("¡Documento procesado con éxito!")
+    except Exception as e:
+        st.error(f"Error al procesar el documento: {e}")
+        st.stop()
+# Paso 2: Hacer preguntas sobre el documento
+if st.session_state.vector_store:
+    st.markdown("### 2. Chatea con tu documento")
+    user_query = st.text_input("Escribe tu pregunta aquí:")
+    if user_query:
+        try:
+            # Configuramos el proceso de Recuperación + Generación (RAG) con DeepSeek
+            retriever = st.session_state.vector_store.as_retriever()
+            llm = ChatOpenAI(
+                model="deepseek-chat",
+                openai_api_key=api_key,
+                openai_api_base="https://api.deepseek.com/v1",
+                temperature=0.85,
+                max_tokens=1000  # Ajusta este valor según tus necesidades
+            )
+            qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
+            # Obtenemos la respuesta
+            with st.spinner("Generando respuesta..."):
+                response = qa_chain.run(user_query)
+                st.write(f"**Respuesta:** {response}")
         except Exception as e:
+            st.error(f"Error al generar la respuesta: {e}")
 else:
+    st.info("Por favor, sube tu PDF para comenzar.")