raannakasturi commited on
Commit
ef651bf
·
verified ·
1 Parent(s): 9e075cc

Upload 4 files

Browse files
Files changed (2) hide show
  1. app.py +27 -3
  2. main.py +6 -8
app.py CHANGED
@@ -17,8 +17,20 @@ theme = gr.themes.Soft(
17
  ],
18
  )
19
 
 
 
 
 
 
 
20
  print("Checking for LLM model...")
21
  while not os.path.exists("Llama-3.2-1B-Instruct-Q8_0.gguf"):
 
 
 
 
 
 
22
  print("Downloading LLM model...")
23
  subprocess.run(["curl -o Llama-3.2-1B-Instruct-Q8_0.gguf https://huggingface.co/bartowski/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct-Q8_0.gguf?download=true"])
24
  print("LLM model downloaded successfully.")
@@ -29,14 +41,24 @@ print("Building app...")
29
 
30
  summarize_with_llm = partial(summarize, llm)
31
 
32
- with gr.Blocks(theme=theme, title="PDF Summarizer", fill_height=True) as app:
 
 
 
 
 
 
 
33
  with gr.Column():
34
  with gr.Row():
35
  pdf_file = gr.File(label="Upload PDF", file_types=['.pdf'])
36
  with gr.Column():
37
- summarize_btn = gr.Button(value="Summarize")
 
 
38
  info = gr.Textbox(label="Summarization Info", placeholder="Details regarding summarization will be shown here", interactive=False)
39
- summary_output = gr.TextArea(label="PDF Summary", interactive=False, show_copy_button=True)
 
40
  summarize_btn.click(
41
  summarize_with_llm,
42
  inputs=pdf_file,
@@ -47,5 +69,7 @@ with gr.Blocks(theme=theme, title="PDF Summarizer", fill_height=True) as app:
47
  show_progress="full",
48
  max_batch_size=10,
49
  )
 
 
50
  print("Build Successful. Launching app...")
51
  app.queue(default_concurrency_limit=5).launch(show_api=True)
 
17
  ],
18
  )
19
 
20
+ def clear_everything(pdf_file, summary_output, info):
21
+ pdf_file = None
22
+ summary_output = None
23
+ info = None
24
+ return pdf_file, summary_output, info
25
+
26
  print("Checking for LLM model...")
27
  while not os.path.exists("Llama-3.2-1B-Instruct-Q8_0.gguf"):
28
+ if not os.path.exists("/usr/bin/curl"):
29
+ print("Curl not found. Installing curl...")
30
+ subprocess.run(["apt-get update"])
31
+ subprocess.run(["apt-get install curl"])
32
+ else:
33
+ print("Curl found. Installing curl...")
34
  print("Downloading LLM model...")
35
  subprocess.run(["curl -o Llama-3.2-1B-Instruct-Q8_0.gguf https://huggingface.co/bartowski/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct-Q8_0.gguf?download=true"])
36
  print("LLM model downloaded successfully.")
 
41
 
42
  summarize_with_llm = partial(summarize, llm)
43
 
44
+ with gr.Blocks(theme=theme, title="Hybrid PDF Summarizer", fill_height=True) as app:
45
+ gr.HTML(
46
+ value ='''
47
+ <h1 style="text-align: center;">Hybrid PDF Summarizer</h1>
48
+ <p style="text-align: center;">This app uses a hybrid approach to summarize PDF documents completely based on CPU.</p>
49
+ <p style="text-align: center;">The app uses traditional methodologies such as TextRank, LSA, Luhn algorithms as well as quantized large language model (LLM) to generate summaries of the PDF document.</p>
50
+ <p style="text-align: center;">The summarization process can take some time depending on the size of the PDF document and the complexity of the content.</p>
51
+ ''')
52
  with gr.Column():
53
  with gr.Row():
54
  pdf_file = gr.File(label="Upload PDF", file_types=['.pdf'])
55
  with gr.Column():
56
+ with gr.Row():
57
+ summarize_btn = gr.Button(value="Summarize")
58
+ clear_btn = gr.Button(value="Clear")
59
  info = gr.Textbox(label="Summarization Info", placeholder="Details regarding summarization will be shown here", interactive=False)
60
+ summary_output = gr.TextArea(label="PDF Summary", placeholder="The summary will be displayed here", interactive=False, show_copy_button=True)
61
+
62
  summarize_btn.click(
63
  summarize_with_llm,
64
  inputs=pdf_file,
 
69
  show_progress="full",
70
  max_batch_size=10,
71
  )
72
+ clear_btn.click(clear_everything, inputs=[pdf_file, summary_output, info], outputs=[pdf_file, summary_output, info], show_api=False)
73
+
74
  print("Build Successful. Launching app...")
75
  app.queue(default_concurrency_limit=5).launch(show_api=True)
main.py CHANGED
@@ -22,6 +22,7 @@ def generate_summary(llm, file):
22
  return summary, length_of_research_paper
23
 
24
  def summarize(llm, file):
 
25
  start_time = time.time()
26
  response, length_of_research_paper = generate_summary(llm, file)
27
  if "**" in response:
@@ -29,17 +30,14 @@ def summarize(llm, file):
29
  response = response.replace("**", "")
30
  response = response.replace("\n\n", "\n")
31
  response = response.replace("\\n\\n", "\\n")
32
- else:
33
- pass
34
  summary = ""
35
- for line in response:
36
  if line.startswith("###"):
37
- summary += "\\n\\n" +line
38
  else:
39
- summary += line
40
  end_time = time.time()
41
  total_time_taken = end_time - start_time
42
- total_time_taken_minutes = total_time_taken / 60
43
- total_time_taken_minutes = round(total_time_taken_minutes, 3)
44
  info = f"The research paper of {length_of_research_paper} characters long was summarized in {total_time_taken_minutes} minutes."
45
- return summary, info
 
22
  return summary, length_of_research_paper
23
 
24
  def summarize(llm, file):
25
+ import time
26
  start_time = time.time()
27
  response, length_of_research_paper = generate_summary(llm, file)
28
  if "**" in response:
 
30
  response = response.replace("**", "")
31
  response = response.replace("\n\n", "\n")
32
  response = response.replace("\\n\\n", "\\n")
 
 
33
  summary = ""
34
+ for line in response.splitlines():
35
  if line.startswith("###"):
36
+ summary += "\n\n" + line
37
  else:
38
+ summary += "\n" + line
39
  end_time = time.time()
40
  total_time_taken = end_time - start_time
41
+ total_time_taken_minutes = round(total_time_taken / 60, 3)
 
42
  info = f"The research paper of {length_of_research_paper} characters long was summarized in {total_time_taken_minutes} minutes."
43
+ return summary.strip(), info