Spaces:

raannakasturi
/

hybrid-researchpaper-summarizer

Sleeping

App Files Files Community

raannakasturi commited on Nov 16, 2024

Commit

ef651bf

verified ·

1 Parent(s): 9e075cc

Upload 4 files

Browse files

Files changed (2) hide show

app.py +27 -3
main.py +6 -8

app.py CHANGED Viewed

@@ -17,8 +17,20 @@ theme = gr.themes.Soft(
     ],
 )
 print("Checking for LLM model...")
 while not os.path.exists("Llama-3.2-1B-Instruct-Q8_0.gguf"):
     print("Downloading LLM model...")
     subprocess.run(["curl -o Llama-3.2-1B-Instruct-Q8_0.gguf https://huggingface.co/bartowski/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct-Q8_0.gguf?download=true"])
     print("LLM model downloaded successfully.")
@@ -29,14 +41,24 @@ print("Building app...")
 summarize_with_llm = partial(summarize, llm)
-with gr.Blocks(theme=theme, title="PDF Summarizer", fill_height=True) as app:
     with gr.Column():
         with gr.Row():
             pdf_file = gr.File(label="Upload PDF", file_types=['.pdf'])
             with gr.Column():
-                summarize_btn = gr.Button(value="Summarize")
                 info = gr.Textbox(label="Summarization Info", placeholder="Details regarding summarization will be shown here", interactive=False)
-        summary_output = gr.TextArea(label="PDF Summary", interactive=False, show_copy_button=True)
     summarize_btn.click(
         summarize_with_llm,
         inputs=pdf_file,
@@ -47,5 +69,7 @@ with gr.Blocks(theme=theme, title="PDF Summarizer", fill_height=True) as app:
         show_progress="full",
         max_batch_size=10,
     )
 print("Build Successful. Launching app...")
 app.queue(default_concurrency_limit=5).launch(show_api=True)

     ],
 )
+def clear_everything(pdf_file, summary_output, info):
+    pdf_file = None
+    summary_output = None
+    info = None
+    return pdf_file, summary_output, info
 print("Checking for LLM model...")
 while not os.path.exists("Llama-3.2-1B-Instruct-Q8_0.gguf"):
+    if not os.path.exists("/usr/bin/curl"):
+        print("Curl not found. Installing curl...")
+        subprocess.run(["apt-get update"])
+        subprocess.run(["apt-get install curl"])
+    else:
+        print("Curl found. Installing curl...")
     print("Downloading LLM model...")
     subprocess.run(["curl -o Llama-3.2-1B-Instruct-Q8_0.gguf https://huggingface.co/bartowski/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct-Q8_0.gguf?download=true"])
     print("LLM model downloaded successfully.")
 summarize_with_llm = partial(summarize, llm)
+with gr.Blocks(theme=theme, title="Hybrid PDF Summarizer", fill_height=True) as app:
+    gr.HTML(
+        value ='''
+        <h1 style="text-align: center;">Hybrid PDF Summarizer</h1>
+        <p style="text-align: center;">This app uses a hybrid approach to summarize PDF documents completely based on CPU.</p>
+        <p style="text-align: center;">The app uses traditional methodologies such as TextRank, LSA, Luhn algorithms as well as quantized large language model (LLM) to generate summaries of the PDF document.</p>
+        <p style="text-align: center;">The summarization process can take some time depending on the size of the PDF document and the complexity of the content.</p>
+        ''')
     with gr.Column():
         with gr.Row():
             pdf_file = gr.File(label="Upload PDF", file_types=['.pdf'])
             with gr.Column():
+                with gr.Row():
+                    summarize_btn = gr.Button(value="Summarize")
+                    clear_btn = gr.Button(value="Clear")
                 info = gr.Textbox(label="Summarization Info", placeholder="Details regarding summarization will be shown here", interactive=False)
+        summary_output = gr.TextArea(label="PDF Summary", placeholder="The summary will be displayed here", interactive=False, show_copy_button=True)
     summarize_btn.click(
         summarize_with_llm,
         inputs=pdf_file,
         show_progress="full",
         max_batch_size=10,
     )
+    clear_btn.click(clear_everything, inputs=[pdf_file, summary_output, info], outputs=[pdf_file, summary_output, info], show_api=False)
 print("Build Successful. Launching app...")
 app.queue(default_concurrency_limit=5).launch(show_api=True)

main.py CHANGED Viewed

@@ -22,6 +22,7 @@ def generate_summary(llm, file):
     return summary, length_of_research_paper
 def summarize(llm, file):
     start_time = time.time()
     response, length_of_research_paper = generate_summary(llm, file)
     if "**" in response:
@@ -29,17 +30,14 @@ def summarize(llm, file):
         response = response.replace("**", "")
         response = response.replace("\n\n", "\n")
         response = response.replace("\\n\\n", "\\n")
-    else:
-        pass
     summary = ""
-    for line in response:
         if line.startswith("###"):
-            summary += "\\n\\n" +line
         else:
-            summary += line
     end_time = time.time()
     total_time_taken = end_time - start_time
-    total_time_taken_minutes = total_time_taken / 60
-    total_time_taken_minutes = round(total_time_taken_minutes, 3)
     info = f"The research paper of {length_of_research_paper} characters long was summarized in {total_time_taken_minutes} minutes."
-    return summary, info

     return summary, length_of_research_paper
 def summarize(llm, file):
+    import time
     start_time = time.time()
     response, length_of_research_paper = generate_summary(llm, file)
     if "**" in response:
         response = response.replace("**", "")
         response = response.replace("\n\n", "\n")
         response = response.replace("\\n\\n", "\\n")
     summary = ""
+    for line in response.splitlines():
         if line.startswith("###"):
+            summary += "\n\n" + line
         else:
+            summary += "\n" + line
     end_time = time.time()
     total_time_taken = end_time - start_time
+    total_time_taken_minutes = round(total_time_taken / 60, 3)
     info = f"The research paper of {length_of_research_paper} characters long was summarized in {total_time_taken_minutes} minutes."
+    return summary.strip(), info