priyanshu23456 commited on
Commit
4ab0eb9
·
verified ·
1 Parent(s): e9905be

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +124 -164
app.py CHANGED
@@ -1,4 +1,4 @@
1
- from flask import Flask, request, jsonify, Response
2
  from werkzeug.utils import secure_filename
3
  from flask_cors import CORS
4
  import os
@@ -6,14 +6,12 @@ import torch
6
  import fitz # PyMuPDF
7
  import pytesseract
8
  from pdf2image import convert_from_path
9
- from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
10
  from sentence_transformers import SentenceTransformer
11
  import faiss
12
  import numpy as np
13
  import tempfile
14
  from PIL import Image
15
- import threading
16
- import json
17
 
18
  import logging
19
 
@@ -75,6 +73,81 @@ def initialize_models():
75
  logger.error(f"Error initializing models: {str(e)}")
76
  raise
77
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  # Cleanup function for temporary files
79
  def cleanup_temp_files(filepath):
80
  try:
@@ -222,21 +295,19 @@ def answer_with_qa_pipeline(chunks, question):
222
  logger.error(f"QA pipeline error: {str(e)}")
223
  return ""
224
 
225
- # Generation-based answering with streaming support
226
- def generate_streaming_answer(index, embeddings, chunks, question, streamer):
227
  try:
228
- logger.info(f"Generating streaming answer for: '{question}'")
229
  global tokenizer, model
230
 
231
  if tokenizer is None or model is None:
232
  logger.info("Generation models not initialized, creating now...")
233
- model_name = "Qwen/Qwen2.5-1.5B-Instruct"
234
- tokenizer = AutoTokenizer.from_pretrained(model_name)
235
  model = AutoModelForCausalLM.from_pretrained(
236
- model_name,
237
- torch_dtype=torch.float16,
238
- device_map="cpu",
239
- low_cpu_mem_usage=True
240
  )
241
 
242
  if tokenizer.pad_token is None:
@@ -251,47 +322,41 @@ def generate_streaming_answer(index, embeddings, chunks, question, streamer):
251
  relevant_chunks = [chunks[i] for i in top_k_indices[0]]
252
  context = " ".join(relevant_chunks)
253
 
254
- # Limit context size
255
- if len(context) > 2000:
256
- context = context[:2000]
257
 
258
  # Create prompt
259
- prompt = f"""<|im_start|>system
260
- You are a helpful assistant answering questions based on provided PDF content. Use the information below to give a clear, concise, and accurate answer. Avoid speculation and focus on the context.
261
- <|im_end|>
262
- <|im_start|>user
263
- **Context**: {context}
264
- **Question**: {question}
265
- **Instruction**: Provide a detailed and accurate answer based on the context. If the context doesn't contain enough information, say so clearly. <|im_end|>"""
266
 
267
  # Handle inputs
268
- inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024)
269
 
270
- # Move inputs to CPU
271
- inputs = {k: v.to('cpu') for k, v in inputs.items()}
 
272
 
273
- # Generate answer using the streamer
274
- generate_kwargs = dict(
275
  **inputs,
276
- streamer=streamer,
277
  max_new_tokens=300,
278
  temperature=0.7,
279
  top_p=0.9,
280
  do_sample=True,
281
- num_beams=1, # FIXED: Changed from 2 to 1 to be compatible with streaming
282
  no_repeat_ngram_size=2
283
  )
284
 
285
- # Generate the answer (this will stream through the streamer)
286
- model.generate(**generate_kwargs)
 
 
287
 
 
 
288
  except Exception as e:
289
- logger.error(f"Streaming generation error: {str(e)}")
290
- # If an error occurs during streaming, push an error message to the streamer
291
- try:
292
- streamer.put("I encountered an error while generating the response.")
293
- except:
294
- pass
295
 
296
  # API route
297
  @app.route('/')
@@ -302,7 +367,6 @@ def home():
302
  def ask():
303
  file = request.files.get("pdf")
304
  question = request.form.get("question", "")
305
- streaming = request.form.get("streaming", "true").lower() == "true"
306
  filepath = None
307
 
308
  if not file or not question:
@@ -313,9 +377,9 @@ def ask():
313
  filepath = os.path.join(UPLOAD_FOLDER, filename)
314
  file.save(filepath)
315
 
316
- logger.info(f"Processing file: {filename}, Question: '{question}', Streaming: {streaming}")
317
 
318
- # Process PDF and extract text
319
  text = extract_text(filepath)
320
  if not text.strip():
321
  return jsonify({"error": "Could not extract text from the PDF"}), 400
@@ -323,137 +387,33 @@ def ask():
323
  chunks = split_into_chunks(text)
324
  if not chunks:
325
  return jsonify({"error": "PDF content couldn't be processed"}), 400
326
-
327
- # Set up FAISS for semantic search
328
- index, embeddings, chunks = setup_faiss(chunks)
329
-
330
- # For non-streaming responses, use the regular approach
331
- if not streaming:
332
- try:
333
- answer = answer_with_qa_pipeline(chunks, question)
334
- if not answer or len(answer.strip()) < 20:
335
- answer = answer_with_generation(index, embeddings, chunks, question)
336
- return jsonify({"answer": answer})
337
- except Exception as e:
338
- logger.error(f"Error generating answer: {str(e)}")
339
- return jsonify({"error": f"An error occurred: {str(e)}"}), 500
340
-
341
- # For streaming responses, use SSE
342
- else:
343
  try:
344
- # Create a streamer for the text generation
345
- streamer = TextIteratorStreamer(
346
- tokenizer, skip_prompt=True, skip_special_tokens=True
347
- )
348
-
349
- # Start generation in a separate thread
350
- thread = threading.Thread(
351
- target=generate_streaming_answer,
352
- args=(index, embeddings, chunks, question, streamer)
353
- )
354
- thread.start()
355
-
356
- # Stream responses as Server-Sent Events (SSE)
357
- def generate():
358
- for new_text in streamer:
359
- yield f"data: {json.dumps({'response': new_text})}\n\n"
360
- yield "data: [DONE]\n\n"
361
-
362
- # Cleanup will happen in a separate thread after the response is complete
363
- cleanup_thread = threading.Thread(
364
- target=cleanup_temp_files,
365
- args=(filepath,)
366
- )
367
- cleanup_thread.daemon = True
368
- cleanup_thread.start()
369
-
370
- return Response(generate(), mimetype="text/event-stream")
371
-
372
  except Exception as e:
373
- logger.error(f"Error in streaming setup: {str(e)}")
374
- return jsonify({"error": f"An error occurred: {str(e)}"}), 500
375
-
 
 
376
  except Exception as e:
377
  logger.error(f"Error processing request: {str(e)}")
378
  return jsonify({"error": f"An error occurred processing your request: {str(e)}"}), 500
379
  finally:
380
- # For non-streaming responses, clean up immediately
381
- # For streaming, we clean up in a separate thread
382
- if filepath and not streaming:
383
  cleanup_temp_files(filepath)
384
 
385
- # Original generation function kept for non-streaming use
386
- def answer_with_generation(index, embeddings, chunks, question):
387
- try:
388
- logger.info(f"Answering with generation model: '{question}'")
389
- global tokenizer, model
390
-
391
- if tokenizer is None or model is None:
392
- logger.info("Generation models not initialized, creating now...")
393
- model_name = "Qwen/Qwen2.5-1.5B-Instruct"
394
- tokenizer = AutoTokenizer.from_pretrained(model_name)
395
- model = AutoModelForCausalLM.from_pretrained(
396
- model_name,
397
- torch_dtype=torch.float16,
398
- device_map="cpu",
399
- low_cpu_mem_usage=True
400
- )
401
-
402
- if tokenizer.pad_token is None:
403
- tokenizer.pad_token = tokenizer.eos_token
404
- model.config.pad_token_id = model.config.eos_token_id
405
-
406
- # Get embeddings for question
407
- q_embedding = embedder.encode([question])
408
-
409
- # Find relevant chunks
410
- _, top_k_indices = index.search(q_embedding, k=3)
411
- relevant_chunks = [chunks[i] for i in top_k_indices[0]]
412
- context = " ".join(relevant_chunks)
413
-
414
- # Limit context size
415
- if len(context) > 2000:
416
- context = context[:2000]
417
-
418
- # Create prompt
419
- prompt = f"""<|im_start|>system
420
- You are a helpful assistant answering questions based on provided PDF content. Use the information below to give a clear, concise, and accurate answer. Avoid speculation and focus on the context.
421
- <|im_end|>
422
- <|im_start|>user
423
- **Context**: {context}
424
- **Question**: {question}
425
- **Instruction**: Provide a detailed and accurate answer based on the context. If the context doesn't contain enough information, say so clearly. <|im_end|>"""
426
-
427
- # Handle inputs
428
- inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024)
429
-
430
- # Move inputs to CPU
431
- inputs = {k: v.to('cpu') for k, v in inputs.items()}
432
-
433
- # Generate answer
434
- output = model.generate(
435
- **inputs,
436
- max_new_tokens=300,
437
- temperature=0.7,
438
- top_p=0.9,
439
- do_sample=True,
440
- num_beams=2, # This is fine since non-streaming doesn't use a streamer
441
- no_repeat_ngram_size=2
442
- )
443
-
444
- # Decode and format answer
445
- answer = tokenizer.decode(output[0], skip_special_tokens=True)
446
- if "<|im_end|>" in answer:
447
- answer = answer.split("<|im_end|>")[1].strip()
448
- elif "Instruction" in answer:
449
- answer = answer.split("Instruction")[1].strip()
450
-
451
- logger.info(f"Generation answer: '{answer[:50]}...' (length: {len(answer)})")
452
- return answer.strip()
453
- except Exception as e:
454
- logger.error(f"Generation error: {str(e)}")
455
- return "I couldn't generate a good answer based on the PDF content."
456
-
457
  if __name__ == "__main__":
458
  try:
459
  # Initialize models at startup
 
1
+ from flask import Flask, request, jsonify
2
  from werkzeug.utils import secure_filename
3
  from flask_cors import CORS
4
  import os
 
6
  import fitz # PyMuPDF
7
  import pytesseract
8
  from pdf2image import convert_from_path
9
+ from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
10
  from sentence_transformers import SentenceTransformer
11
  import faiss
12
  import numpy as np
13
  import tempfile
14
  from PIL import Image
 
 
15
 
16
  import logging
17
 
 
73
  logger.error(f"Error initializing models: {str(e)}")
74
  raise
75
 
76
+ # Generation-based answering
77
+ def answer_with_generation(index, embeddings, chunks, question):
78
+ try:
79
+ logger.info(f"Answering with generation model: '{question}'")
80
+ global tokenizer, model
81
+
82
+ if tokenizer is None or model is None:
83
+ logger.info("Generation models not initialized, creating now...")
84
+ model_name = "Qwen/Qwen2.5-1.5B-Instruct"
85
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
86
+ model = AutoModelForCausalLM.from_pretrained(
87
+ model_name,
88
+ torch_dtype=torch.float16,
89
+ device_map="cpu",
90
+ low_cpu_mem_usage=True
91
+ )
92
+
93
+ if tokenizer.pad_token is None:
94
+ tokenizer.pad_token = tokenizer.eos_token
95
+ model.config.pad_token_id = model.config.eos_token_id
96
+
97
+ # Get embeddings for question
98
+ q_embedding = embedder.encode([question])
99
+
100
+ # Find relevant chunks
101
+ _, top_k_indices = index.search(q_embedding, k=3)
102
+ relevant_chunks = [chunks[i] for i in top_k_indices[0]]
103
+ context = " ".join(relevant_chunks)
104
+
105
+ # Limit context size
106
+ if len(context) > 2000:
107
+ context = context[:2000]
108
+
109
+ # Create prompt
110
+ prompt = f"""<|im_start|>system
111
+ You are a helpful assistant answering questions based on provided PDF content. Use the information below to give a clear, concise, and accurate answer. Avoid speculation and focus on the context.
112
+ <|im_end|>
113
+ <|im_start|>user
114
+ **Context**: {context}
115
+ **Question**: {question}
116
+ **Instruction**: Provide a detailed and accurate answer based on the context. If the context doesn't contain enough information, say so clearly. <|im_end|>"""
117
+
118
+ # Handle inputs
119
+ inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024)
120
+
121
+ # Move inputs to CPU
122
+ inputs = {k: v.to('cpu') for k, v in inputs.items()}
123
+
124
+ # Generate answer
125
+ output = model.generate(
126
+ **inputs,
127
+ max_new_tokens=300,
128
+ temperature=0.7,
129
+ top_p=0.9,
130
+ do_sample=True,
131
+ num_beams=2,
132
+ no_repeat_ngram_size=2
133
+ )
134
+
135
+ # Decode and format answer
136
+ answer = tokenizer.decode(output[0], skip_special_tokens=True)
137
+ if "<|im_end|>" in answer:
138
+ answer = answer.split("<|im_end|>")[1].strip()
139
+ elif "Instruction" in answer:
140
+ answer = answer.split("Instruction")[1].strip()
141
+
142
+ logger.info(f"Generation answer: '{answer[:50]}...' (length: {len(answer)})")
143
+ return answer.strip()
144
+ except Exception as e:
145
+ logger.error(f"Generation error: {str(e)}")
146
+ return "I couldn't generate a good answer based on the PDF content."
147
+
148
+
149
+
150
+
151
  # Cleanup function for temporary files
152
  def cleanup_temp_files(filepath):
153
  try:
 
295
  logger.error(f"QA pipeline error: {str(e)}")
296
  return ""
297
 
298
+ # Generation-based answering
299
+ def answer_with_generation(index, embeddings, chunks, question):
300
  try:
301
+ logger.info(f"Answering with generation model: '{question}'")
302
  global tokenizer, model
303
 
304
  if tokenizer is None or model is None:
305
  logger.info("Generation models not initialized, creating now...")
306
+ tokenizer = AutoTokenizer.from_pretrained("distilgpt2")
 
307
  model = AutoModelForCausalLM.from_pretrained(
308
+ "distilgpt2",
309
+ device_map="auto",
310
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
 
311
  )
312
 
313
  if tokenizer.pad_token is None:
 
322
  relevant_chunks = [chunks[i] for i in top_k_indices[0]]
323
  context = " ".join(relevant_chunks)
324
 
325
+ # Limit context size to avoid token length issues
326
+ if len(context) > 4000:
327
+ context = context[:4000]
328
 
329
  # Create prompt
330
+ prompt = f"Answer the following question based on this information:\n\nInformation: {context}\n\nQuestion: {question}\n\nDetailed answer:"
 
 
 
 
 
 
331
 
332
  # Handle inputs
333
+ inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
334
 
335
+ # Move inputs to the right device if needed
336
+ if torch.cuda.is_available():
337
+ inputs = {k: v.to('cuda') for k, v in inputs.items()}
338
 
339
+ # Generate answer
340
+ output = model.generate(
341
  **inputs,
 
342
  max_new_tokens=300,
343
  temperature=0.7,
344
  top_p=0.9,
345
  do_sample=True,
346
+ num_beams=3,
347
  no_repeat_ngram_size=2
348
  )
349
 
350
+ # Decode and format answer
351
+ answer = tokenizer.decode(output[0], skip_special_tokens=True)
352
+ if "Detailed answer:" in answer:
353
+ answer = answer.split("Detailed answer:")[-1].strip()
354
 
355
+ logger.info(f"Generation answer: '{answer[:50]}...' (length: {len(answer)})")
356
+ return answer.strip()
357
  except Exception as e:
358
+ logger.error(f"Generation error: {str(e)}")
359
+ return "I couldn't generate a good answer based on the PDF content."
 
 
 
 
360
 
361
  # API route
362
  @app.route('/')
 
367
  def ask():
368
  file = request.files.get("pdf")
369
  question = request.form.get("question", "")
 
370
  filepath = None
371
 
372
  if not file or not question:
 
377
  filepath = os.path.join(UPLOAD_FOLDER, filename)
378
  file.save(filepath)
379
 
380
+ logger.info(f"Processing file: {filename}, Question: '{question}'")
381
 
382
+ # Process PDF and generate answer
383
  text = extract_text(filepath)
384
  if not text.strip():
385
  return jsonify({"error": "Could not extract text from the PDF"}), 400
 
387
  chunks = split_into_chunks(text)
388
  if not chunks:
389
  return jsonify({"error": "PDF content couldn't be processed"}), 400
390
+
391
+ try:
392
+ answer = answer_with_qa_pipeline(chunks, question)
393
+ except Exception as e:
394
+ logger.warning(f"QA pipeline failed: {str(e)}")
395
+ answer = ""
396
+
397
+ # If QA pipeline didn't give a good answer, try generation
398
+ if not answer or len(answer.strip()) < 20:
 
 
 
 
 
 
 
 
399
  try:
400
+ logger.info("QA pipeline answer insufficient, trying generation...")
401
+ index, embeddings, chunks = setup_faiss(chunks)
402
+ answer = answer_with_generation(index, embeddings, chunks, question)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
403
  except Exception as e:
404
+ logger.error(f"Generation fallback failed: {str(e)}")
405
+ return jsonify({"error": "Failed to generate answer from PDF content"}), 500
406
+
407
+ return jsonify({"answer": answer})
408
+
409
  except Exception as e:
410
  logger.error(f"Error processing request: {str(e)}")
411
  return jsonify({"error": f"An error occurred processing your request: {str(e)}"}), 500
412
  finally:
413
+ # Always clean up, even if errors occur
414
+ if filepath:
 
415
  cleanup_temp_files(filepath)
416
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
417
  if __name__ == "__main__":
418
  try:
419
  # Initialize models at startup