Spaces:

jeysshon
/

deepseek_chat_pdf

Runtime error

App Files Files Community

deepseek_chat_pdf / app.py

jeysshon

Update app.py

11dc91a verified 6 months ago

raw

history blame contribute delete

3.95 kB

	import os
	import tempfile
	import streamlit as st
	from langchain_community.document_loaders import PyPDFLoader
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain_community.embeddings import HuggingFaceEmbeddings
	from langchain_community.vectorstores import FAISS
	from langchain.chains import RetrievalQA
	from langchain_community.chat_models import ChatOpenAI

	# Título de la aplicación
	st.title("✨ DataKlug ")

	st.markdown(
	"""
	<style>
	.title {
	text-align: center;
	color: #FF4B4B;
	}
	</style>
	""",
	unsafe_allow_html=True
	)

	st.markdown("### Bienvenido a tu asistente DataKlug")
	st.markdown("Sube tu PDF y conversa con él utilizando inteligencia artificial. ¡Disfruta de la experiencia!")

	# Obtenemos la API Key desde los secretos de Streamlit
	api_key = st.secrets.get("DEEPSEEK_API_KEY", None)

	# Verificamos si existe la API Key en los secretos
	if not api_key:
	st.error("No se encontró la variable `DEEPSEEK_API_KEY` en los secretos de Streamlit. Por favor, configúrala antes de continuar.")
	st.stop()
	else:
	# Almacenamos la API Key en las variables de entorno (opcional)
	os.environ["DEEPSEEK_API_KEY"] = api_key

	# Paso 1: Subir el documento PDF
	st.markdown("### 1. Sube un documento PDF para analizar")
	uploaded_file = st.file_uploader("Arrastra o haz clic para subir un PDF", type=["pdf"])

	# Utilizamos el estado de sesión para mantener el vector_store
	if "vector_store" not in st.session_state:
	st.session_state.vector_store = None

	# Procesamos el PDF al subirlo (solo si no se ha creado antes el vector_store)
	if uploaded_file and st.session_state.vector_store is None:
	try:
	with st.spinner("Procesando tu documento, por favor espera..."):
	# Guardamos el archivo temporalmente
	with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
	tmp_file.write(uploaded_file.getvalue())
	tmp_file_path = tmp_file.name

	# Cargamos el PDF con PyPDFLoader
	loader = PyPDFLoader(tmp_file_path)
	documents = loader.load()

	# Borramos el archivo temporal
	os.unlink(tmp_file_path)

	# Dividimos el texto en fragmentos
	text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
	chunks = text_splitter.split_documents(documents)

	# Generamos embeddings y los almacenamos en una base vectorial
	embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
	st.session_state.vector_store = FAISS.from_documents(chunks, embeddings)

	st.success("¡Documento procesado con éxito!")
	except Exception as e:
	st.error(f"Error al procesar el documento: {e}")
	st.stop()

	# Paso 2: Hacer preguntas sobre el documento
	if st.session_state.vector_store:
	st.markdown("### 2. Chatea con tu documento")
	user_query = st.text_input("Escribe tu pregunta aquí:")

	if user_query:
	try:
	# Configuramos el proceso de Recuperación + Generación (RAG) con DeepSeek
	retriever = st.session_state.vector_store.as_retriever()
	llm = ChatOpenAI(
	model="deepseek-chat",
	openai_api_key=api_key,
	openai_api_base="https://api.deepseek.com/v1",
	temperature=0.85,
	max_tokens=1000 # Ajusta este valor según tus necesidades
	)
	qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)

	# Obtenemos la respuesta
	with st.spinner("Generando respuesta..."):
	response = qa_chain.run(user_query)
	st.write(f"Respuesta: {response}")
	except Exception as e:
	st.error(f"Error al generar la respuesta: {e}")
	else:
	st.info("Por favor, sube tu PDF para comenzar.")