from flask import Flask, request, jsonify
from werkzeug.utils import secure_filename
from flask_cors import CORS
import os
import logging
from typing import List
import gc

# LangChain imports
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import google.generativeai as genai

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Optimize for low resource environments
os.environ["HF_HOME"] = "/tmp"
os.environ["XDG_CACHE_HOME"] = "/tmp"
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["SENTENCE_TRANSFORMERS_HOME"] = "/tmp"

app = Flask(__name__)
CORS(app)

UPLOAD_FOLDER = "/tmp/uploads"
os.makedirs(UPLOAD_FOLDER, exist_ok=True)

# Global model variables
embeddings_model = None
gemini_model = None

def initialize_models():
    """Initialize embedding model and Gemini API"""
    global embeddings_model, gemini_model
    try:
        logger.info("Initializing models...")
        
        # Get Gemini API key from environment
        gemini_api_key = os.environ.get("GEMINI_API_KEY")
        if not gemini_api_key:
            logger.error("GEMINI_API_KEY not found in environment variables!")
            return False
        
        # Configure Gemini
        genai.configure(api_key=gemini_api_key)
        gemini_model = genai.GenerativeModel('gemini-2.0-flash-exp')
        logger.info("Gemini API configured successfully")
        
        # Load embeddings model (only 22MB!)
        logger.info("Loading all-MiniLM-L6-v2...")
        embeddings_model = SentenceTransformer(
            "all-MiniLM-L6-v2",
            device="cpu",
            cache_folder="/tmp"
        )
        
        logger.info("Models initialized successfully")
        return True
        
    except Exception as e:
        logger.error(f"Error initializing models: {str(e)}")
        import traceback
        traceback.print_exc()
        return False

def load_pdf(filepath: str) -> List[str]:
    """Load PDF using LangChain"""
    try:
        logger.info(f"Loading PDF: {filepath}")
        loader = PyPDFLoader(filepath)
        pages = loader.load()
        
        if not pages:
            logger.warning("No pages extracted from PDF")
            return []
        
        docs = [page.page_content for page in pages if page.page_content.strip()]
        logger.info(f"Loaded {len(pages)} pages")
        return docs
        
    except Exception as e:
        logger.error(f"Error loading PDF: {str(e)}")
        import traceback
        traceback.print_exc()
        return []

def create_faiss_index(chunks: List[str]):
    """Create FAISS index for similarity search"""
    try:
        logger.info(f"Creating FAISS index for {len(chunks)} chunks")
        
        batch_size = 32
        embeddings_list = []
        
        for i in range(0, len(chunks), batch_size):
            batch = chunks[i:i+batch_size]
            batch_embeddings = embeddings_model.encode(batch, show_progress_bar=False)
            embeddings_list.append(batch_embeddings)
        
        embeddings = np.vstack(embeddings_list).astype('float32')
        
        dim = embeddings.shape[1]
        index = faiss.IndexFlatL2(dim)
        index.add(embeddings)
        
        logger.info(f"FAISS index created with dimension {dim}")
        
        del embeddings_list
        gc.collect()
        
        return index, embeddings
        
    except Exception as e:
        logger.error(f"Error creating FAISS index: {str(e)}")
        import traceback
        traceback.print_exc()
        raise

def retrieve_context(question: str, chunks: List[str], index, k: int = 5) -> str:
    """Retrieve relevant context for question"""
    try:
        q_embedding = embeddings_model.encode([question])
        q_embedding = np.array(q_embedding).astype('float32')
        
        distances, indices = index.search(q_embedding, k)
        
        relevant_chunks = []
        for i in indices[0]:
            if i < len(chunks):
                relevant_chunks.append(chunks[i])
        
        context = "\n\n".join(relevant_chunks)
        
        logger.info(f"Retrieved {len(relevant_chunks)} relevant chunks")
        return context
        
    except Exception as e:
        logger.error(f"Error retrieving context: {str(e)}")
        return ""

def generate_answer_with_gemini(question: str, context: str) -> str:
    """Generate answer using Gemini API"""
    try:
        logger.info(f"Generating answer with Gemini for: {question}")
        
        prompt = f"""You are a helpful AI assistant that answers questions based on the provided context from a PDF document.

Context from PDF:
{context}

Question: {question}

Instructions:
- Answer the question clearly and concisely based ONLY on the context provided
- If the context doesn't contain enough information to answer, say so
- Provide a well-structured, informative answer
- If asked to summarize, provide a comprehensive summary

Answer:"""
        
        response = gemini_model.generate_content(prompt)
        answer = response.text.strip()
        
        logger.info(f"Generated answer: {answer[:100]}...")
        return answer
        
    except Exception as e:
        logger.error(f"Error generating answer with Gemini: {str(e)}")
        import traceback
        traceback.print_exc()
        return "Sorry, I couldn't generate an answer. Please try again."

def cleanup_temp_files(filepath):
    """Clean up temporary files"""
    try:
        if os.path.exists(filepath):
            os.remove(filepath)
            logger.info(f"Removed temporary file: {filepath}")
    except Exception as e:
        logger.warning(f"Failed to clean up file {filepath}: {str(e)}")

@app.route('/')
def home():
    return jsonify({
        "message": "PDF QA API with Gemini 2.0 Flash is running!",
        "status": "healthy",
        "model": "Google Gemini 2.0 Flash",
        "embeddings": "all-MiniLM-L6-v2"
    })

@app.route('/health')
def health():
    return jsonify({"status": "healthy"}), 200

@app.route('/ask', methods=['POST'])
def ask():
    file = request.files.get("pdf")
    question = request.form.get("question", "")
    filepath = None

    if not file or not question:
        return jsonify({"error": "Both PDF file and question are required"}), 400

    try:
        filename = secure_filename(file.filename)
        filepath = os.path.join(UPLOAD_FOLDER, filename)
        file.save(filepath)
        
        logger.info(f"Processing file: {filename}, Question: '{question}'")
        
        # Load PDF
        docs = load_pdf(filepath)
        if not docs:
            return jsonify({"error": "Could not extract text from the PDF"}), 400
        
        # Split into chunks
        splitter = RecursiveCharacterTextSplitter(
            chunk_size=800,
            chunk_overlap=100,
            separators=["\n\n", "\n", ". ", " ", ""]
        )
        
        chunks = []
        for doc in docs:
            chunks.extend(splitter.split_text(doc))
        
        logger.info(f"Created {len(chunks)} chunks")
        
        if not chunks:
            return jsonify({"error": "PDF content couldn't be processed"}), 400
        
        # Create FAISS index
        index, embeddings = create_faiss_index(chunks)
        
        # Retrieve context (get more chunks for Gemini since it can handle it)
        context = retrieve_context(question, chunks, index, k=7)
        
        if not context:
            return jsonify({"error": "Failed to retrieve context from PDF"}), 500
        
        # Generate answer with Gemini
        answer = generate_answer_with_gemini(question, context)
        
        if not answer or len(answer.strip()) < 10:
            return jsonify({"error": "Failed to generate answer from PDF content"}), 500
        
        # Clean up memory
        del index
        del embeddings
        gc.collect()
        
        return jsonify({
            "answer": answer,
            "model": "gemini-2.0-flash-exp"
        })

    except Exception as e:
        logger.error(f"Error processing request: {str(e)}")
        import traceback
        traceback.print_exc()
        return jsonify({"error": f"An error occurred: {str(e)}"}), 500
    finally:
        if filepath:
            cleanup_temp_files(filepath)
        gc.collect()

if __name__ == "__main__":
    try:
        if initialize_models():
            logger.info("Starting Flask application on port 7860")
            app.run(host="0.0.0.0", port=7860, threaded=True, debug=False)
        else:
            logger.error("Failed to initialize models")
    except Exception as e:
        logger.critical(f"Failed to start application: {str(e)}")
        import traceback
        traceback.print_exc()