Hasnain-Ali commited on
Commit
eb44aac
·
verified ·
1 Parent(s): ce2b990

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -6
app.py CHANGED
@@ -16,12 +16,17 @@ def extract_text_from_pdf(pdf):
16
 
17
  # Function to summarize text
18
  def summarize_text(text):
19
- # Limit input to 1024 characters (Bart-large-cnn model limit)
20
- max_input_length = 1024
21
- text = text[:max_input_length] # Truncate text to avoid errors
22
-
23
- summary = summarizer(text, max_length=200, min_length=50, do_sample=False)
24
- return summary[0]['summary_text']
 
 
 
 
 
25
 
26
 
27
  # Function for Q&A
 
16
 
17
  # Function to summarize text
18
  def summarize_text(text):
19
+ max_input_length = 1024 # BART model limit
20
+ chunk_size = 1000 # Slightly less to avoid errors
21
+ chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)] # Split text
22
+
23
+ summaries = []
24
+ for chunk in chunks:
25
+ summary = summarizer(chunk, max_length=200, min_length=50, do_sample=False)
26
+ summaries.append(summary[0]['summary_text'])
27
+
28
+ return " ".join(summaries) # Combine summaries into final output
29
+
30
 
31
 
32
  # Function for Q&A