Upload 4 files
Browse files
app.py
CHANGED
@@ -17,8 +17,20 @@ theme = gr.themes.Soft(
|
|
17 |
],
|
18 |
)
|
19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
print("Checking for LLM model...")
|
21 |
while not os.path.exists("Llama-3.2-1B-Instruct-Q8_0.gguf"):
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
print("Downloading LLM model...")
|
23 |
subprocess.run(["curl -o Llama-3.2-1B-Instruct-Q8_0.gguf https://huggingface.co/bartowski/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct-Q8_0.gguf?download=true"])
|
24 |
print("LLM model downloaded successfully.")
|
@@ -29,14 +41,24 @@ print("Building app...")
|
|
29 |
|
30 |
summarize_with_llm = partial(summarize, llm)
|
31 |
|
32 |
-
with gr.Blocks(theme=theme, title="PDF Summarizer", fill_height=True) as app:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
with gr.Column():
|
34 |
with gr.Row():
|
35 |
pdf_file = gr.File(label="Upload PDF", file_types=['.pdf'])
|
36 |
with gr.Column():
|
37 |
-
|
|
|
|
|
38 |
info = gr.Textbox(label="Summarization Info", placeholder="Details regarding summarization will be shown here", interactive=False)
|
39 |
-
summary_output = gr.TextArea(label="PDF Summary", interactive=False, show_copy_button=True)
|
|
|
40 |
summarize_btn.click(
|
41 |
summarize_with_llm,
|
42 |
inputs=pdf_file,
|
@@ -47,5 +69,7 @@ with gr.Blocks(theme=theme, title="PDF Summarizer", fill_height=True) as app:
|
|
47 |
show_progress="full",
|
48 |
max_batch_size=10,
|
49 |
)
|
|
|
|
|
50 |
print("Build Successful. Launching app...")
|
51 |
app.queue(default_concurrency_limit=5).launch(show_api=True)
|
|
|
17 |
],
|
18 |
)
|
19 |
|
20 |
+
def clear_everything(pdf_file, summary_output, info):
|
21 |
+
pdf_file = None
|
22 |
+
summary_output = None
|
23 |
+
info = None
|
24 |
+
return pdf_file, summary_output, info
|
25 |
+
|
26 |
print("Checking for LLM model...")
|
27 |
while not os.path.exists("Llama-3.2-1B-Instruct-Q8_0.gguf"):
|
28 |
+
if not os.path.exists("/usr/bin/curl"):
|
29 |
+
print("Curl not found. Installing curl...")
|
30 |
+
subprocess.run(["apt-get update"])
|
31 |
+
subprocess.run(["apt-get install curl"])
|
32 |
+
else:
|
33 |
+
print("Curl found. Installing curl...")
|
34 |
print("Downloading LLM model...")
|
35 |
subprocess.run(["curl -o Llama-3.2-1B-Instruct-Q8_0.gguf https://huggingface.co/bartowski/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct-Q8_0.gguf?download=true"])
|
36 |
print("LLM model downloaded successfully.")
|
|
|
41 |
|
42 |
summarize_with_llm = partial(summarize, llm)
|
43 |
|
44 |
+
with gr.Blocks(theme=theme, title="Hybrid PDF Summarizer", fill_height=True) as app:
|
45 |
+
gr.HTML(
|
46 |
+
value ='''
|
47 |
+
<h1 style="text-align: center;">Hybrid PDF Summarizer</h1>
|
48 |
+
<p style="text-align: center;">This app uses a hybrid approach to summarize PDF documents completely based on CPU.</p>
|
49 |
+
<p style="text-align: center;">The app uses traditional methodologies such as TextRank, LSA, Luhn algorithms as well as quantized large language model (LLM) to generate summaries of the PDF document.</p>
|
50 |
+
<p style="text-align: center;">The summarization process can take some time depending on the size of the PDF document and the complexity of the content.</p>
|
51 |
+
''')
|
52 |
with gr.Column():
|
53 |
with gr.Row():
|
54 |
pdf_file = gr.File(label="Upload PDF", file_types=['.pdf'])
|
55 |
with gr.Column():
|
56 |
+
with gr.Row():
|
57 |
+
summarize_btn = gr.Button(value="Summarize")
|
58 |
+
clear_btn = gr.Button(value="Clear")
|
59 |
info = gr.Textbox(label="Summarization Info", placeholder="Details regarding summarization will be shown here", interactive=False)
|
60 |
+
summary_output = gr.TextArea(label="PDF Summary", placeholder="The summary will be displayed here", interactive=False, show_copy_button=True)
|
61 |
+
|
62 |
summarize_btn.click(
|
63 |
summarize_with_llm,
|
64 |
inputs=pdf_file,
|
|
|
69 |
show_progress="full",
|
70 |
max_batch_size=10,
|
71 |
)
|
72 |
+
clear_btn.click(clear_everything, inputs=[pdf_file, summary_output, info], outputs=[pdf_file, summary_output, info], show_api=False)
|
73 |
+
|
74 |
print("Build Successful. Launching app...")
|
75 |
app.queue(default_concurrency_limit=5).launch(show_api=True)
|
main.py
CHANGED
@@ -22,6 +22,7 @@ def generate_summary(llm, file):
|
|
22 |
return summary, length_of_research_paper
|
23 |
|
24 |
def summarize(llm, file):
|
|
|
25 |
start_time = time.time()
|
26 |
response, length_of_research_paper = generate_summary(llm, file)
|
27 |
if "**" in response:
|
@@ -29,17 +30,14 @@ def summarize(llm, file):
|
|
29 |
response = response.replace("**", "")
|
30 |
response = response.replace("\n\n", "\n")
|
31 |
response = response.replace("\\n\\n", "\\n")
|
32 |
-
else:
|
33 |
-
pass
|
34 |
summary = ""
|
35 |
-
for line in response:
|
36 |
if line.startswith("###"):
|
37 |
-
summary += "
|
38 |
else:
|
39 |
-
summary += line
|
40 |
end_time = time.time()
|
41 |
total_time_taken = end_time - start_time
|
42 |
-
total_time_taken_minutes = total_time_taken / 60
|
43 |
-
total_time_taken_minutes = round(total_time_taken_minutes, 3)
|
44 |
info = f"The research paper of {length_of_research_paper} characters long was summarized in {total_time_taken_minutes} minutes."
|
45 |
-
return summary, info
|
|
|
22 |
return summary, length_of_research_paper
|
23 |
|
24 |
def summarize(llm, file):
|
25 |
+
import time
|
26 |
start_time = time.time()
|
27 |
response, length_of_research_paper = generate_summary(llm, file)
|
28 |
if "**" in response:
|
|
|
30 |
response = response.replace("**", "")
|
31 |
response = response.replace("\n\n", "\n")
|
32 |
response = response.replace("\\n\\n", "\\n")
|
|
|
|
|
33 |
summary = ""
|
34 |
+
for line in response.splitlines():
|
35 |
if line.startswith("###"):
|
36 |
+
summary += "\n\n" + line
|
37 |
else:
|
38 |
+
summary += "\n" + line
|
39 |
end_time = time.time()
|
40 |
total_time_taken = end_time - start_time
|
41 |
+
total_time_taken_minutes = round(total_time_taken / 60, 3)
|
|
|
42 |
info = f"The research paper of {length_of_research_paper} characters long was summarized in {total_time_taken_minutes} minutes."
|
43 |
+
return summary.strip(), info
|