priyanshu23456 commited on
Commit
d8a5f00
·
verified ·
1 Parent(s): 4ab0eb9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +175 -326
app.py CHANGED
@@ -2,30 +2,32 @@ from flask import Flask, request, jsonify
2
  from werkzeug.utils import secure_filename
3
  from flask_cors import CORS
4
  import os
5
- import torch
6
- import fitz # PyMuPDF
7
- import pytesseract
8
- from pdf2image import convert_from_path
9
- from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
10
- from sentence_transformers import SentenceTransformer
11
- import faiss
12
- import numpy as np
13
  import tempfile
14
- from PIL import Image
15
 
16
- import logging
 
 
 
 
 
 
 
 
17
 
18
  # Set up logging
19
  logging.basicConfig(level=logging.INFO)
20
  logger = logging.getLogger(__name__)
21
 
22
- # Fix caching issue on Hugging Face Spaces
23
  os.environ["TRANSFORMERS_CACHE"] = "/tmp"
24
  os.environ["HF_HOME"] = "/tmp"
25
  os.environ["XDG_CACHE_HOME"] = "/tmp"
 
26
 
27
  app = Flask(__name__)
28
- CORS(app) # Enable CORS for all routes
29
 
30
  UPLOAD_FOLDER = "/tmp/uploads"
31
  os.makedirs(UPLOAD_FOLDER, exist_ok=True)
@@ -34,334 +36,192 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
34
  logger.info(f"Using device: {device}")
35
 
36
  # Global model variables
37
- embedder = None
38
- qa_pipeline = None
39
- tokenizer = None
40
- model = None
41
 
42
- # Initialize models once on startup
43
  def initialize_models():
44
- global embedder, qa_pipeline, tokenizer, model
 
45
  try:
46
- logger.info("Loading SentenceTransformer model...")
47
- embedder = SentenceTransformer("all-MiniLM-L6-v2")
48
 
49
- logger.info("Loading QA pipeline...")
50
- qa_pipeline = pipeline(
51
- "question-answering",
52
- model="distilbert-base-cased-distilled-squad",
53
- tokenizer="distilbert-base-cased",
54
- device=-1 # Force CPU
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  )
56
 
57
- logger.info("Loading language model...")
58
- model_name = "Qwen/Qwen2.5-1.5B-Instruct"
59
- tokenizer = AutoTokenizer.from_pretrained(model_name)
60
  model = AutoModelForCausalLM.from_pretrained(
61
  model_name,
62
- torch_dtype=torch.float16, # Use float16 for lower memory on CPU
63
- device_map="cpu", # Explicitly set to CPU
64
- low_cpu_mem_usage=True # Optimize memory loading
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  )
66
 
67
- if tokenizer.pad_token is None:
68
- tokenizer.pad_token = tokenizer.eos_token
69
- model.config.pad_token_id = model.config.eos_token_id
70
-
71
  logger.info("Models initialized successfully")
 
 
72
  except Exception as e:
73
  logger.error(f"Error initializing models: {str(e)}")
74
- raise
75
 
76
- # Generation-based answering
77
- def answer_with_generation(index, embeddings, chunks, question):
78
  try:
79
- logger.info(f"Answering with generation model: '{question}'")
80
- global tokenizer, model
81
-
82
- if tokenizer is None or model is None:
83
- logger.info("Generation models not initialized, creating now...")
84
- model_name = "Qwen/Qwen2.5-1.5B-Instruct"
85
- tokenizer = AutoTokenizer.from_pretrained(model_name)
86
- model = AutoModelForCausalLM.from_pretrained(
87
- model_name,
88
- torch_dtype=torch.float16,
89
- device_map="cpu",
90
- low_cpu_mem_usage=True
91
- )
92
-
93
- if tokenizer.pad_token is None:
94
- tokenizer.pad_token = tokenizer.eos_token
95
- model.config.pad_token_id = model.config.eos_token_id
96
 
97
- # Get embeddings for question
98
- q_embedding = embedder.encode([question])
 
99
 
100
- # Find relevant chunks
101
- _, top_k_indices = index.search(q_embedding, k=3)
102
- relevant_chunks = [chunks[i] for i in top_k_indices[0]]
103
- context = " ".join(relevant_chunks)
104
 
105
- # Limit context size
106
- if len(context) > 2000:
107
- context = context[:2000]
108
 
109
- # Create prompt
110
- prompt = f"""<|im_start|>system
111
- You are a helpful assistant answering questions based on provided PDF content. Use the information below to give a clear, concise, and accurate answer. Avoid speculation and focus on the context.
112
- <|im_end|>
113
- <|im_start|>user
114
- **Context**: {context}
115
- **Question**: {question}
116
- **Instruction**: Provide a detailed and accurate answer based on the context. If the context doesn't contain enough information, say so clearly. <|im_end|>"""
117
-
118
- # Handle inputs
119
- inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024)
120
-
121
- # Move inputs to CPU
122
- inputs = {k: v.to('cpu') for k, v in inputs.items()}
123
 
124
- # Generate answer
125
- output = model.generate(
126
- **inputs,
127
- max_new_tokens=300,
128
- temperature=0.7,
129
- top_p=0.9,
130
- do_sample=True,
131
- num_beams=2,
132
- no_repeat_ngram_size=2
133
  )
134
 
135
- # Decode and format answer
136
- answer = tokenizer.decode(output[0], skip_special_tokens=True)
137
- if "<|im_end|>" in answer:
138
- answer = answer.split("<|im_end|>")[1].strip()
139
- elif "Instruction" in answer:
140
- answer = answer.split("Instruction")[1].strip()
141
 
142
- logger.info(f"Generation answer: '{answer[:50]}...' (length: {len(answer)})")
143
- return answer.strip()
144
- except Exception as e:
145
- logger.error(f"Generation error: {str(e)}")
146
- return "I couldn't generate a good answer based on the PDF content."
147
-
148
 
 
 
 
 
 
149
 
150
-
151
- # Cleanup function for temporary files
152
- def cleanup_temp_files(filepath):
153
- try:
154
- if os.path.exists(filepath):
155
- os.remove(filepath)
156
- logger.info(f"Removed temporary file: {filepath}")
157
- except Exception as e:
158
- logger.warning(f"Failed to clean up file {filepath}: {str(e)}")
159
-
160
- # Improved OCR function
161
- def ocr_pdf(pdf_path):
162
- try:
163
- logger.info(f"Starting OCR for {pdf_path}")
164
- # Use a higher DPI for better quality
165
- images = convert_from_path(
166
- pdf_path,
167
- dpi=300, # Higher DPI for better quality
168
- grayscale=False, # Color might help with some PDFs
169
- thread_count=2, # Use multiple threads
170
- use_pdftocairo=True # pdftocairo often gives better results
171
  )
172
 
173
- text = ""
174
- for i, img in enumerate(images):
175
- logger.info(f"Processing page {i+1} of {len(images)}")
176
- # Preprocess the image for better OCR results
177
- preprocessed = preprocess_image_for_ocr(img)
178
- # Use tesseract with more options
179
- page_text = pytesseract.image_to_string(
180
- preprocessed,
181
- config='--psm 1 --oem 3 -l eng' # Page segmentation mode 1 (auto), OCR Engine mode 3 (default)
182
- )
183
- text += page_text
184
- logger.info(f"OCR completed with {len(text)} characters extracted")
185
- return text
186
  except Exception as e:
187
- logger.error(f"OCR error: {str(e)}")
188
- return ""
189
-
190
- # Image preprocessing function for better OCR
191
- def preprocess_image_for_ocr(img):
192
- # Convert to grayscale
193
- gray = img.convert('L')
194
-
195
- # Optional: You could add more preprocessing here like:
196
- # - Thresholding
197
- # - Noise removal
198
- # - Contrast enhancement
199
-
200
- return gray
201
 
202
- # Improved extract_text function with better text detection
203
- def extract_text(pdf_path):
204
  try:
205
- logger.info(f"Extracting text from {pdf_path}")
206
- doc = fitz.open(pdf_path)
207
- text = ""
208
- for page_num, page in enumerate(doc):
209
- page_text = page.get_text()
210
- text += page_text
211
- logger.info(f"Extracted {len(page_text)} characters from page {page_num+1}")
212
 
213
- # Check if the text is meaningful (more sophisticated check)
214
- words = text.split()
215
- unique_words = set(word.lower() for word in words if len(word) > 2)
 
 
 
 
 
 
 
216
 
217
- logger.info(f"PDF text extraction: {len(text)} chars, {len(words)} words, {len(unique_words)} unique words")
 
218
 
219
- # If we don't have enough meaningful text, try OCR
220
- if len(unique_words) < 20 or len(text.strip()) < 100:
221
- logger.info("Text extraction yielded insufficient results, trying OCR...")
222
- ocr_text = ocr_pdf(pdf_path)
223
- # If OCR gave us more text, use it
224
- if len(ocr_text.strip()) > len(text.strip()):
225
- logger.info(f"Using OCR result: {len(ocr_text)} chars (better than {len(text)} chars)")
226
- text = ocr_text
227
 
228
- return text
229
  except Exception as e:
230
- logger.error(f"Text extraction error: {str(e)}")
231
- return ""
232
 
233
- # Split into chunks
234
- def split_into_chunks(text, max_tokens=300, overlap=50):
235
- logger.info(f"Splitting text into chunks (max_tokens={max_tokens}, overlap={overlap})")
236
- sentences = text.split('.')
237
- chunks, current = [], ''
238
- for sentence in sentences:
239
- sentence = sentence.strip() + '.'
240
- if len(current) + len(sentence) < max_tokens:
241
- current += sentence
242
- else:
243
- chunks.append(current.strip())
244
- words = current.split()
245
- if len(words) > overlap:
246
- current = ' '.join(words[-overlap:]) + ' ' + sentence
247
- else:
248
- current = sentence
249
- if current:
250
- chunks.append(current.strip())
251
- logger.info(f"Split text into {len(chunks)} chunks")
252
- return chunks
253
 
254
- # Setup FAISS
255
- def setup_faiss(chunks):
256
- try:
257
- logger.info("Setting up FAISS index")
258
- global embedder
259
- if embedder is None:
260
- embedder = SentenceTransformer("all-MiniLM-L6-v2")
261
-
262
- embeddings = embedder.encode(chunks)
263
- dim = embeddings.shape[1]
264
- index = faiss.IndexFlatL2(dim)
265
- index.add(embeddings)
266
- logger.info(f"FAISS index created with {len(chunks)} chunks and dimension {dim}")
267
- return index, embeddings, chunks
268
- except Exception as e:
269
- logger.error(f"FAISS setup error: {str(e)}")
270
- raise
271
 
272
- # QA pipeline
273
- def answer_with_qa_pipeline(chunks, question):
274
- try:
275
- logger.info(f"Answering with QA pipeline: '{question}'")
276
- global qa_pipeline
277
- if qa_pipeline is None:
278
- logger.info("QA pipeline not initialized, creating now...")
279
- qa_pipeline = pipeline(
280
- "question-answering",
281
- model="distilbert-base-cased-distilled-squad",
282
- tokenizer="distilbert-base-cased",
283
- device=0 if device == "cuda" else -1
284
- )
285
-
286
- # Limit context size to avoid token length issues
287
- context = " ".join(chunks[:5])
288
- if len(context) > 5000: # Approx token limit
289
- context = context[:5000]
290
-
291
- result = qa_pipeline(question=question, context=context)
292
- logger.info(f"QA pipeline answer: '{result['answer']}' (score: {result['score']})")
293
- return result["answer"]
294
- except Exception as e:
295
- logger.error(f"QA pipeline error: {str(e)}")
296
- return ""
297
 
298
- # Generation-based answering
299
- def answer_with_generation(index, embeddings, chunks, question):
300
  try:
301
- logger.info(f"Answering with generation model: '{question}'")
302
- global tokenizer, model
303
-
304
- if tokenizer is None or model is None:
305
- logger.info("Generation models not initialized, creating now...")
306
- tokenizer = AutoTokenizer.from_pretrained("distilgpt2")
307
- model = AutoModelForCausalLM.from_pretrained(
308
- "distilgpt2",
309
- device_map="auto",
310
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
311
- )
312
-
313
- if tokenizer.pad_token is None:
314
- tokenizer.pad_token = tokenizer.eos_token
315
- model.config.pad_token_id = model.config.eos_token_id
316
-
317
- # Get embeddings for question
318
- q_embedding = embedder.encode([question])
319
-
320
- # Find relevant chunks
321
- _, top_k_indices = index.search(q_embedding, k=3)
322
- relevant_chunks = [chunks[i] for i in top_k_indices[0]]
323
- context = " ".join(relevant_chunks)
324
-
325
- # Limit context size to avoid token length issues
326
- if len(context) > 4000:
327
- context = context[:4000]
328
-
329
- # Create prompt
330
- prompt = f"Answer the following question based on this information:\n\nInformation: {context}\n\nQuestion: {question}\n\nDetailed answer:"
331
-
332
- # Handle inputs
333
- inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
334
-
335
- # Move inputs to the right device if needed
336
- if torch.cuda.is_available():
337
- inputs = {k: v.to('cuda') for k, v in inputs.items()}
338
-
339
- # Generate answer
340
- output = model.generate(
341
- **inputs,
342
- max_new_tokens=300,
343
- temperature=0.7,
344
- top_p=0.9,
345
- do_sample=True,
346
- num_beams=3,
347
- no_repeat_ngram_size=2
348
- )
349
-
350
- # Decode and format answer
351
- answer = tokenizer.decode(output[0], skip_special_tokens=True)
352
- if "Detailed answer:" in answer:
353
- answer = answer.split("Detailed answer:")[-1].strip()
354
-
355
- logger.info(f"Generation answer: '{answer[:50]}...' (length: {len(answer)})")
356
- return answer.strip()
357
  except Exception as e:
358
- logger.error(f"Generation error: {str(e)}")
359
- return "I couldn't generate a good answer based on the PDF content."
360
 
361
- # API route
362
  @app.route('/')
363
  def home():
364
- return jsonify({"message": "PDF QA API is running!"})
365
 
366
  @app.route('/ask', methods=['POST'])
367
  def ask():
@@ -379,46 +239,35 @@ def ask():
379
 
380
  logger.info(f"Processing file: {filename}, Question: '{question}'")
381
 
382
- # Process PDF and generate answer
383
- text = extract_text(filepath)
384
- if not text.strip():
385
  return jsonify({"error": "Could not extract text from the PDF"}), 400
386
-
387
- chunks = split_into_chunks(text)
388
- if not chunks:
389
- return jsonify({"error": "PDF content couldn't be processed"}), 400
390
-
391
- try:
392
- answer = answer_with_qa_pipeline(chunks, question)
393
- except Exception as e:
394
- logger.warning(f"QA pipeline failed: {str(e)}")
395
- answer = ""
396
-
397
- # If QA pipeline didn't give a good answer, try generation
398
- if not answer or len(answer.strip()) < 20:
399
- try:
400
- logger.info("QA pipeline answer insufficient, trying generation...")
401
- index, embeddings, chunks = setup_faiss(chunks)
402
- answer = answer_with_generation(index, embeddings, chunks, question)
403
- except Exception as e:
404
- logger.error(f"Generation fallback failed: {str(e)}")
405
- return jsonify({"error": "Failed to generate answer from PDF content"}), 500
406
-
407
  return jsonify({"answer": answer})
408
 
409
  except Exception as e:
410
  logger.error(f"Error processing request: {str(e)}")
411
- return jsonify({"error": f"An error occurred processing your request: {str(e)}"}), 500
412
  finally:
413
- # Always clean up, even if errors occur
414
  if filepath:
415
  cleanup_temp_files(filepath)
416
 
417
  if __name__ == "__main__":
418
  try:
419
- # Initialize models at startup
420
- initialize_models()
421
- logger.info("Starting Flask application")
422
- app.run(host="0.0.0.0", port=7860)
 
423
  except Exception as e:
424
  logger.critical(f"Failed to start application: {str(e)}")
 
2
  from werkzeug.utils import secure_filename
3
  from flask_cors import CORS
4
  import os
5
+ import logging
6
+ from typing import List
 
 
 
 
 
 
7
  import tempfile
 
8
 
9
+ # LangChain imports
10
+ from langchain.document_loaders import PyPDFLoader
11
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
12
+ from langchain.embeddings import HuggingFaceEmbeddings
13
+ from langchain.vectorstores import FAISS
14
+ from langchain.llms import HuggingFacePipeline
15
+ from langchain.chains import RetrievalQA
16
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
17
+ import torch
18
 
19
  # Set up logging
20
  logging.basicConfig(level=logging.INFO)
21
  logger = logging.getLogger(__name__)
22
 
23
+ # Optimize for low resource environments
24
  os.environ["TRANSFORMERS_CACHE"] = "/tmp"
25
  os.environ["HF_HOME"] = "/tmp"
26
  os.environ["XDG_CACHE_HOME"] = "/tmp"
27
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
28
 
29
  app = Flask(__name__)
30
+ CORS(app)
31
 
32
  UPLOAD_FOLDER = "/tmp/uploads"
33
  os.makedirs(UPLOAD_FOLDER, exist_ok=True)
 
36
  logger.info(f"Using device: {device}")
37
 
38
  # Global model variables
39
+ embeddings_model = None
40
+ llm_chain = None
 
 
41
 
 
42
  def initialize_models():
43
+ """Initialize lightweight models optimized for resource constraints"""
44
+ global embeddings_model, llm_chain
45
  try:
46
+ logger.info("Initializing lightweight models...")
 
47
 
48
+ # Use lightweight embeddings model
49
+ logger.info("Loading embeddings model (all-MiniLM-L6-v2)...")
50
+ embeddings_model = HuggingFaceEmbeddings(
51
+ model_name="all-MiniLM-L6-v2",
52
+ model_kwargs={
53
+ "device": device,
54
+ "trust_remote_code": True
55
+ },
56
+ encode_kwargs={
57
+ "normalize_embeddings": True,
58
+ "batch_size": 8
59
+ }
60
+ )
61
+
62
+ # Use a very lightweight LLM (phi-2 or similar)
63
+ logger.info("Loading lightweight LLM (phi-2)...")
64
+ model_name = "microsoft/phi-2"
65
+
66
+ tokenizer = AutoTokenizer.from_pretrained(
67
+ model_name,
68
+ trust_remote_code=True,
69
+ cache_dir="/tmp"
70
  )
71
 
 
 
 
72
  model = AutoModelForCausalLM.from_pretrained(
73
  model_name,
74
+ torch_dtype=torch.float16 if device == "cuda" else torch.float32,
75
+ device_map="auto" if device == "cuda" else "cpu",
76
+ trust_remote_code=True,
77
+ cache_dir="/tmp",
78
+ low_cpu_mem_usage=True,
79
+ load_in_8bit=False # Disable if memory is very limited
80
+ )
81
+
82
+ # Create text generation pipeline
83
+ text_gen_pipeline = pipeline(
84
+ "text-generation",
85
+ model=model,
86
+ tokenizer=tokenizer,
87
+ max_new_tokens=256,
88
+ temperature=0.7,
89
+ top_p=0.9,
90
+ do_sample=True,
91
+ device=0 if device == "cuda" else -1,
92
+ return_full_text=False
93
+ )
94
+
95
+ # Create LLM wrapper
96
+ llm_chain = HuggingFacePipeline(
97
+ pipeline=text_gen_pipeline,
98
+ model_kwargs={
99
+ "temperature": 0.7,
100
+ "max_length": 512
101
+ }
102
  )
103
 
 
 
 
 
104
  logger.info("Models initialized successfully")
105
+ return True
106
+
107
  except Exception as e:
108
  logger.error(f"Error initializing models: {str(e)}")
109
+ return False
110
 
111
+ def load_pdf(filepath: str) -> List[str]:
112
+ """Load PDF using LangChain with fallback"""
113
  try:
114
+ logger.info(f"Loading PDF: {filepath}")
115
+ loader = PyPDFLoader(filepath)
116
+ pages = loader.load()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
 
118
+ if not pages:
119
+ logger.warning("No pages extracted from PDF")
120
+ return []
121
 
122
+ # Combine page content
123
+ docs = [page.page_content for page in pages if page.page_content.strip()]
124
+ logger.info(f"Loaded {len(pages)} pages with {sum(len(d) for d in docs)} characters")
125
+ return docs
126
 
127
+ except Exception as e:
128
+ logger.error(f"Error loading PDF: {str(e)}")
129
+ return []
130
 
131
+ def create_retriever(docs: List[str]):
132
+ """Create FAISS retriever for efficient similarity search"""
133
+ try:
134
+ logger.info("Creating retriever from documents")
 
 
 
 
 
 
 
 
 
 
135
 
136
+ # Split documents into smaller chunks
137
+ text_splitter = RecursiveCharacterTextSplitter(
138
+ chunk_size=400,
139
+ chunk_overlap=50,
140
+ separators=["\n\n", "\n", " ", ""]
 
 
 
 
141
  )
142
 
143
+ # Process all docs and split
144
+ all_chunks = []
145
+ for doc in docs:
146
+ chunks = text_splitter.split_text(doc)
147
+ all_chunks.extend(chunks)
 
148
 
149
+ logger.info(f"Created {len(all_chunks)} chunks")
 
 
 
 
 
150
 
151
+ # Create FAISS vectorstore
152
+ vectorstore = FAISS.from_texts(
153
+ all_chunks,
154
+ embeddings_model
155
+ )
156
 
157
+ # Return retriever with limited top_k for speed
158
+ retriever = vectorstore.as_retriever(
159
+ search_kwargs={"k": 3} # Only get top 3 most relevant chunks
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
  )
161
 
162
+ logger.info("Retriever created successfully")
163
+ return retriever
164
+
 
 
 
 
 
 
 
 
 
 
165
  except Exception as e:
166
+ logger.error(f"Error creating retriever: {str(e)}")
167
+ raise
 
 
 
 
 
 
 
 
 
 
 
 
168
 
169
+ def answer_question(retriever, question: str) -> str:
170
+ """Answer question using RAG chain"""
171
  try:
172
+ logger.info(f"Answering question: {question}")
 
 
 
 
 
 
173
 
174
+ # Create RAG chain
175
+ qa_chain = RetrievalQA.from_chain_type(
176
+ llm=llm_chain,
177
+ chain_type="stuff", # Use "stuff" for simplicity
178
+ retriever=retriever,
179
+ return_source_documents=False,
180
+ chain_type_kwargs={
181
+ "prompt": _get_qa_prompt()
182
+ }
183
+ )
184
 
185
+ result = qa_chain({"query": question})
186
+ answer = result.get("result", "No answer generated")
187
 
188
+ logger.info(f"Answer generated: {answer[:100]}...")
189
+ return answer.strip()
 
 
 
 
 
 
190
 
 
191
  except Exception as e:
192
+ logger.error(f"Error answering question: {str(e)}")
193
+ return "Sorry, I couldn't generate an answer from the PDF content."
194
 
195
+ def _get_qa_prompt():
196
+ """Create a simple QA prompt template"""
197
+ from langchain.prompts import PromptTemplate
198
+
199
+ template = """Use the following pieces of context to answer the question at the end.
200
+ If you don't know the answer, just say you don't know.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
 
202
+ Context:
203
+ {context}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204
 
205
+ Question: {question}
206
+ Answer:"""
207
+
208
+ return PromptTemplate(
209
+ template=template,
210
+ input_variables=["context", "question"]
211
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
 
213
+ def cleanup_temp_files(filepath):
214
+ """Clean up temporary files"""
215
  try:
216
+ if os.path.exists(filepath):
217
+ os.remove(filepath)
218
+ logger.info(f"Removed temporary file: {filepath}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
219
  except Exception as e:
220
+ logger.warning(f"Failed to clean up file {filepath}: {str(e)}")
 
221
 
 
222
  @app.route('/')
223
  def home():
224
+ return jsonify({"message": "PDF QA API is running with LangChain!"})
225
 
226
  @app.route('/ask', methods=['POST'])
227
  def ask():
 
239
 
240
  logger.info(f"Processing file: {filename}, Question: '{question}'")
241
 
242
+ # Load PDF
243
+ docs = load_pdf(filepath)
244
+ if not docs:
245
  return jsonify({"error": "Could not extract text from the PDF"}), 400
246
+
247
+ # Create retriever
248
+ retriever = create_retriever(docs)
249
+
250
+ # Get answer
251
+ answer = answer_question(retriever, question)
252
+
253
+ if not answer or len(answer.strip()) < 5:
254
+ return jsonify({"error": "Failed to generate answer from PDF content"}), 500
255
+
 
 
 
 
 
 
 
 
 
 
 
256
  return jsonify({"answer": answer})
257
 
258
  except Exception as e:
259
  logger.error(f"Error processing request: {str(e)}")
260
+ return jsonify({"error": f"An error occurred: {str(e)}"}), 500
261
  finally:
 
262
  if filepath:
263
  cleanup_temp_files(filepath)
264
 
265
  if __name__ == "__main__":
266
  try:
267
+ if initialize_models():
268
+ logger.info("Starting Flask application")
269
+ app.run(host="0.0.0.0", port=7860, threaded=False)
270
+ else:
271
+ logger.error("Failed to initialize models")
272
  except Exception as e:
273
  logger.critical(f"Failed to start application: {str(e)}")