from flask import Flask, request, jsonify from werkzeug.utils import secure_filename from flask_cors import CORS import os import logging from typing import List import gc # LangChain imports from langchain_community.document_loaders import PyPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from sentence_transformers import SentenceTransformer import faiss import numpy as np import google.generativeai as genai # Set up logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Optimize for low resource environments os.environ["HF_HOME"] = "/tmp" os.environ["XDG_CACHE_HOME"] = "/tmp" os.environ["TOKENIZERS_PARALLELISM"] = "false" os.environ["SENTENCE_TRANSFORMERS_HOME"] = "/tmp" app = Flask(__name__) CORS(app) UPLOAD_FOLDER = "/tmp/uploads" os.makedirs(UPLOAD_FOLDER, exist_ok=True) # Global model variables embeddings_model = None gemini_model = None def initialize_models(): """Initialize embedding model and Gemini API""" global embeddings_model, gemini_model try: logger.info("Initializing models...") # Get Gemini API key from environment gemini_api_key = os.environ.get("GEMINI_API_KEY") if not gemini_api_key: logger.error("GEMINI_API_KEY not found in environment variables!") return False # Configure Gemini genai.configure(api_key=gemini_api_key) gemini_model = genai.GenerativeModel('gemini-2.0-flash-exp') logger.info("Gemini API configured successfully") # Load embeddings model (only 22MB!) logger.info("Loading all-MiniLM-L6-v2...") embeddings_model = SentenceTransformer( "all-MiniLM-L6-v2", device="cpu", cache_folder="/tmp" ) logger.info("Models initialized successfully") return True except Exception as e: logger.error(f"Error initializing models: {str(e)}") import traceback traceback.print_exc() return False def load_pdf(filepath: str) -> List[str]: """Load PDF using LangChain""" try: logger.info(f"Loading PDF: {filepath}") loader = PyPDFLoader(filepath) pages = loader.load() if not pages: logger.warning("No pages extracted from PDF") return [] docs = [page.page_content for page in pages if page.page_content.strip()] logger.info(f"Loaded {len(pages)} pages") return docs except Exception as e: logger.error(f"Error loading PDF: {str(e)}") import traceback traceback.print_exc() return [] def create_faiss_index(chunks: List[str]): """Create FAISS index for similarity search""" try: logger.info(f"Creating FAISS index for {len(chunks)} chunks") batch_size = 32 embeddings_list = [] for i in range(0, len(chunks), batch_size): batch = chunks[i:i+batch_size] batch_embeddings = embeddings_model.encode(batch, show_progress_bar=False) embeddings_list.append(batch_embeddings) embeddings = np.vstack(embeddings_list).astype('float32') dim = embeddings.shape[1] index = faiss.IndexFlatL2(dim) index.add(embeddings) logger.info(f"FAISS index created with dimension {dim}") del embeddings_list gc.collect() return index, embeddings except Exception as e: logger.error(f"Error creating FAISS index: {str(e)}") import traceback traceback.print_exc() raise def retrieve_context(question: str, chunks: List[str], index, k: int = 5) -> str: """Retrieve relevant context for question""" try: q_embedding = embeddings_model.encode([question]) q_embedding = np.array(q_embedding).astype('float32') distances, indices = index.search(q_embedding, k) relevant_chunks = [] for i in indices[0]: if i < len(chunks): relevant_chunks.append(chunks[i]) context = "\n\n".join(relevant_chunks) logger.info(f"Retrieved {len(relevant_chunks)} relevant chunks") return context except Exception as e: logger.error(f"Error retrieving context: {str(e)}") return "" def generate_answer_with_gemini(question: str, context: str) -> str: """Generate answer using Gemini API""" try: logger.info(f"Generating answer with Gemini for: {question}") prompt = f"""You are a helpful AI assistant that answers questions based on the provided context from a PDF document. Context from PDF: {context} Question: {question} Instructions: - Answer the question clearly and concisely based ONLY on the context provided - If the context doesn't contain enough information to answer, say so - Provide a well-structured, informative answer - If asked to summarize, provide a comprehensive summary Answer:""" response = gemini_model.generate_content(prompt) answer = response.text.strip() logger.info(f"Generated answer: {answer[:100]}...") return answer except Exception as e: logger.error(f"Error generating answer with Gemini: {str(e)}") import traceback traceback.print_exc() return "Sorry, I couldn't generate an answer. Please try again." def cleanup_temp_files(filepath): """Clean up temporary files""" try: if os.path.exists(filepath): os.remove(filepath) logger.info(f"Removed temporary file: {filepath}") except Exception as e: logger.warning(f"Failed to clean up file {filepath}: {str(e)}") @app.route('/') def home(): return jsonify({ "message": "PDF QA API with Gemini 2.0 Flash is running!", "status": "healthy", "model": "Google Gemini 2.0 Flash", "embeddings": "all-MiniLM-L6-v2" }) @app.route('/health') def health(): return jsonify({"status": "healthy"}), 200 @app.route('/ask', methods=['POST']) def ask(): file = request.files.get("pdf") question = request.form.get("question", "") filepath = None if not file or not question: return jsonify({"error": "Both PDF file and question are required"}), 400 try: filename = secure_filename(file.filename) filepath = os.path.join(UPLOAD_FOLDER, filename) file.save(filepath) logger.info(f"Processing file: {filename}, Question: '{question}'") # Load PDF docs = load_pdf(filepath) if not docs: return jsonify({"error": "Could not extract text from the PDF"}), 400 # Split into chunks splitter = RecursiveCharacterTextSplitter( chunk_size=800, chunk_overlap=100, separators=["\n\n", "\n", ". ", " ", ""] ) chunks = [] for doc in docs: chunks.extend(splitter.split_text(doc)) logger.info(f"Created {len(chunks)} chunks") if not chunks: return jsonify({"error": "PDF content couldn't be processed"}), 400 # Create FAISS index index, embeddings = create_faiss_index(chunks) # Retrieve context (get more chunks for Gemini since it can handle it) context = retrieve_context(question, chunks, index, k=7) if not context: return jsonify({"error": "Failed to retrieve context from PDF"}), 500 # Generate answer with Gemini answer = generate_answer_with_gemini(question, context) if not answer or len(answer.strip()) < 10: return jsonify({"error": "Failed to generate answer from PDF content"}), 500 # Clean up memory del index del embeddings gc.collect() return jsonify({ "answer": answer, "model": "gemini-2.0-flash-exp" }) except Exception as e: logger.error(f"Error processing request: {str(e)}") import traceback traceback.print_exc() return jsonify({"error": f"An error occurred: {str(e)}"}), 500 finally: if filepath: cleanup_temp_files(filepath) gc.collect() if __name__ == "__main__": try: if initialize_models(): logger.info("Starting Flask application on port 7860") app.run(host="0.0.0.0", port=7860, threaded=True, debug=False) else: logger.error("Failed to initialize models") except Exception as e: logger.critical(f"Failed to start application: {str(e)}") import traceback traceback.print_exc()