jeysshon commited on
Commit
11dc91a
·
verified ·
1 Parent(s): 1af833b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -60
app.py CHANGED
@@ -2,80 +2,101 @@ import os
2
  import tempfile
3
  import streamlit as st
4
  from langchain_community.document_loaders import PyPDFLoader
5
- from langchain.text_splitter import RecursiveCharacterTextSplitter # Correct import
6
  from langchain_community.embeddings import HuggingFaceEmbeddings
7
  from langchain_community.vectorstores import FAISS
8
- from langchain.chains import RetrievalQA # Correct import
9
  from langchain_community.chat_models import ChatOpenAI
10
 
11
- # Streamlit App Title
12
- st.title("📄 DeepSeek-Powered RAG Chatbot")
13
 
14
- # Step 1: Input API Key
15
- api_key = st.text_input("🔑 Enter your DeepSeek API Key:", type="password")
 
 
 
 
 
 
 
 
 
16
 
17
- if api_key:
18
- # Set the API key as an environment variable (optional)
 
 
 
 
 
 
 
 
 
 
19
  os.environ["DEEPSEEK_API_KEY"] = api_key
20
 
21
- # Step 2: Upload PDF Document
22
- uploaded_file = st.file_uploader("📂 Upload a PDF document", type=["pdf"])
 
23
 
24
- # Use session state to persist the vector_store
25
- if "vector_store" not in st.session_state:
26
- st.session_state.vector_store = None
27
 
28
- if uploaded_file and st.session_state.vector_store is None:
29
- try:
30
- with st.spinner("Processing document..."):
31
- # Save the uploaded file temporarily
32
- with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
33
- tmp_file.write(uploaded_file.getvalue())
34
- tmp_file_path = tmp_file.name
 
35
 
36
- # Use the temporary file path with PyPDFLoader
37
- loader = PyPDFLoader(tmp_file_path)
38
- documents = loader.load()
39
 
40
- # Remove the temporary file
41
- os.unlink(tmp_file_path)
42
 
43
- # Split the document into chunks
44
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
45
- chunks = text_splitter.split_documents(documents)
46
 
47
- # Generate embeddings and store them in a vector database
48
- embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
49
- st.session_state.vector_store = FAISS.from_documents(chunks, embeddings)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
- st.success("Document processed successfully!")
 
 
 
52
  except Exception as e:
53
- st.error(f"Error processing document: {e}")
54
- st.stop()
55
-
56
- # Step 3: Ask Questions About the Document
57
- if st.session_state.vector_store:
58
- st.subheader("💬 Chat with Your Document")
59
- user_query = st.text_input("Ask a question:")
60
-
61
- if user_query:
62
- try:
63
- # Set up the RAG pipeline with DeepSeek LLM
64
- retriever = st.session_state.vector_store.as_retriever()
65
- llm = ChatOpenAI(
66
- model="deepseek-chat",
67
- openai_api_key=api_key,
68
- openai_api_base="https://api.deepseek.com/v1",
69
- temperature=0.85,
70
- max_tokens=1000 # Adjust token limit for safety
71
- )
72
- qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
73
-
74
- # Generate response
75
- with st.spinner("Generating response..."):
76
- response = qa_chain.run(user_query)
77
- st.write(f"**Answer:** {response}")
78
- except Exception as e:
79
- st.error(f"Error generating response: {e}")
80
  else:
81
- st.warning("Please enter your DeepSeek API key to proceed.")
 
2
  import tempfile
3
  import streamlit as st
4
  from langchain_community.document_loaders import PyPDFLoader
5
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
6
  from langchain_community.embeddings import HuggingFaceEmbeddings
7
  from langchain_community.vectorstores import FAISS
8
+ from langchain.chains import RetrievalQA
9
  from langchain_community.chat_models import ChatOpenAI
10
 
11
+ # Título de la aplicación
12
+ st.title(" DataKlug ")
13
 
14
+ st.markdown(
15
+ """
16
+ <style>
17
+ .title {
18
+ text-align: center;
19
+ color: #FF4B4B;
20
+ }
21
+ </style>
22
+ """,
23
+ unsafe_allow_html=True
24
+ )
25
 
26
+ st.markdown("### Bienvenido a tu asistente DataKlug")
27
+ st.markdown("Sube tu PDF y conversa con él utilizando inteligencia artificial. ¡Disfruta de la experiencia!")
28
+
29
+ # Obtenemos la API Key desde los secretos de Streamlit
30
+ api_key = st.secrets.get("DEEPSEEK_API_KEY", None)
31
+
32
+ # Verificamos si existe la API Key en los secretos
33
+ if not api_key:
34
+ st.error("No se encontró la variable `DEEPSEEK_API_KEY` en los secretos de Streamlit. Por favor, configúrala antes de continuar.")
35
+ st.stop()
36
+ else:
37
+ # Almacenamos la API Key en las variables de entorno (opcional)
38
  os.environ["DEEPSEEK_API_KEY"] = api_key
39
 
40
+ # Paso 1: Subir el documento PDF
41
+ st.markdown("### 1. Sube un documento PDF para analizar")
42
+ uploaded_file = st.file_uploader("Arrastra o haz clic para subir un PDF", type=["pdf"])
43
 
44
+ # Utilizamos el estado de sesión para mantener el vector_store
45
+ if "vector_store" not in st.session_state:
46
+ st.session_state.vector_store = None
47
 
48
+ # Procesamos el PDF al subirlo (solo si no se ha creado antes el vector_store)
49
+ if uploaded_file and st.session_state.vector_store is None:
50
+ try:
51
+ with st.spinner("Procesando tu documento, por favor espera..."):
52
+ # Guardamos el archivo temporalmente
53
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
54
+ tmp_file.write(uploaded_file.getvalue())
55
+ tmp_file_path = tmp_file.name
56
 
57
+ # Cargamos el PDF con PyPDFLoader
58
+ loader = PyPDFLoader(tmp_file_path)
59
+ documents = loader.load()
60
 
61
+ # Borramos el archivo temporal
62
+ os.unlink(tmp_file_path)
63
 
64
+ # Dividimos el texto en fragmentos
65
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
66
+ chunks = text_splitter.split_documents(documents)
67
 
68
+ # Generamos embeddings y los almacenamos en una base vectorial
69
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
70
+ st.session_state.vector_store = FAISS.from_documents(chunks, embeddings)
71
+
72
+ st.success("¡Documento procesado con éxito!")
73
+ except Exception as e:
74
+ st.error(f"Error al procesar el documento: {e}")
75
+ st.stop()
76
+
77
+ # Paso 2: Hacer preguntas sobre el documento
78
+ if st.session_state.vector_store:
79
+ st.markdown("### 2. Chatea con tu documento")
80
+ user_query = st.text_input("Escribe tu pregunta aquí:")
81
+
82
+ if user_query:
83
+ try:
84
+ # Configuramos el proceso de Recuperación + Generación (RAG) con DeepSeek
85
+ retriever = st.session_state.vector_store.as_retriever()
86
+ llm = ChatOpenAI(
87
+ model="deepseek-chat",
88
+ openai_api_key=api_key,
89
+ openai_api_base="https://api.deepseek.com/v1",
90
+ temperature=0.85,
91
+ max_tokens=1000 # Ajusta este valor según tus necesidades
92
+ )
93
+ qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
94
 
95
+ # Obtenemos la respuesta
96
+ with st.spinner("Generando respuesta..."):
97
+ response = qa_chain.run(user_query)
98
+ st.write(f"**Respuesta:** {response}")
99
  except Exception as e:
100
+ st.error(f"Error al generar la respuesta: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  else:
102
+ st.info("Por favor, sube tu PDF para comenzar.")