Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import os | |
| from app.pdf_to_image import convert_pdf_to_images | |
| from app.rag import query_rag | |
| from app.embed_and_store import embed_all_images | |
| base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | |
| pdf_dir = os.path.join(base_dir, "data", "pdfs") | |
| image_dir = os.path.join(base_dir, "data", "images") | |
| os.makedirs(pdf_dir, exist_ok=True) | |
| os.makedirs(image_dir, exist_ok=True) | |
| st.title("🧠 Image-based PDF RAG") | |
| if "processed_files" not in st.session_state: | |
| st.session_state.processed_files = set() | |
| if "image_paths" not in st.session_state: | |
| st.session_state.image_paths = [] | |
| uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"]) | |
| if uploaded_file: | |
| pdf_path = os.path.join(pdf_dir, uploaded_file.name) | |
| if uploaded_file.name not in st.session_state.processed_files: | |
| with st.spinner("Processing PDF and creating embeddings..."): | |
| with open(pdf_path, "wb") as f: | |
| f.write(uploaded_file.read()) | |
| image_paths = convert_pdf_to_images(pdf_path, image_dir) | |
| for image_path in image_paths: | |
| embed_all_images(image_path) | |
| st.session_state.processed_files.add(uploaded_file.name) | |
| st.session_state.image_paths = image_paths | |
| st.success( | |
| f"PDF processed! {len(image_paths)} pages converted and embedded for search." | |
| ) | |
| else: | |
| st.success("PDF already processed. Ready to query.") | |
| image_paths = st.session_state.image_paths | |
| else: | |
| image_paths = [] | |
| # Query UI | |
| if image_paths: | |
| st.subheader("Query PDF Content") | |
| page_options = [f"Page {i+1}" for i in range(len(image_paths))] | |
| selected_page = st.selectbox("Select a page to query:", page_options) | |
| if page_options: | |
| page_idx = page_options.index(selected_page) | |
| selected_image = image_paths[page_idx] | |
| with st.expander("📄 View selected page"): | |
| st.image(selected_image, caption=f"Page {page_idx+1}") | |
| query = st.text_input("🔍 Ask a question about this page:") | |
| if query and st.button("Search"): | |
| with st.spinner("Searching with Gemini..."): | |
| result = query_rag(selected_image, query) | |
| st.markdown("### 💬 Answer") | |
| st.write(result) | |