Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,186 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
PDF β Summary β Audio β Talk to PDF β Diagram
|
3 |
+
All Hugging Face APIs.
|
4 |
+
"""
|
5 |
+
|
6 |
+
import os
|
7 |
+
import tempfile
|
8 |
+
import time
|
9 |
+
from typing import List
|
10 |
+
|
11 |
+
import fitz # PyMuPDF
|
12 |
+
import requests
|
13 |
+
import gradio as gr
|
14 |
+
|
15 |
+
# ================== Config ==================
|
16 |
+
CHUNK_CHARS = 20000
|
17 |
+
HF_SUMMARY_MODEL = "Groq/Llama-3-Groq-8B-Tool-Use" # text-generation
|
18 |
+
HF_TTS_MODEL = "espnet/kan-bayashi_ljspeech_vits"
|
19 |
+
HF_IMAGE_MODEL = "runwayml/stable-diffusion-v1-5"
|
20 |
+
|
21 |
+
pdf_text_storage = {"text": "", "processed": False}
|
22 |
+
|
23 |
+
# ================== Utils ==================
|
24 |
+
def extract_text_from_pdf(file_path: str) -> str:
|
25 |
+
doc = fitz.open(file_path)
|
26 |
+
text = "\n\n".join(page.get_text("text") for page in doc)
|
27 |
+
doc.close()
|
28 |
+
return text.strip()
|
29 |
+
|
30 |
+
def chunk_text(text: str, max_chars: int) -> List[str]:
|
31 |
+
if not text:
|
32 |
+
return []
|
33 |
+
parts, start, L = [], 0, len(text)
|
34 |
+
while start < L:
|
35 |
+
end = min(start + max_chars, L)
|
36 |
+
if end < L:
|
37 |
+
back = text.rfind("\n", start, end)
|
38 |
+
if back == -1:
|
39 |
+
back = text.rfind(" ", start, end)
|
40 |
+
if back != -1 and back > start:
|
41 |
+
end = back
|
42 |
+
parts.append(text[start:end].strip())
|
43 |
+
start = end
|
44 |
+
return parts
|
45 |
+
|
46 |
+
# ================== Hugging Face Summarization ==================
|
47 |
+
def summarize_chunk_hf(chunk_text: str, hf_token: str) -> str:
|
48 |
+
headers = {"Authorization": f"Bearer {hf_token}"}
|
49 |
+
payload = {
|
50 |
+
"inputs": f"Summarize the following text into a concise paragraph (~180 words max):\n\n{chunk_text}",
|
51 |
+
"parameters": {"max_new_tokens": 800, "temperature": 0.2}
|
52 |
+
}
|
53 |
+
resp = requests.post(f"https://api-inference.huggingface.co/models/{HF_SUMMARY_MODEL}", headers=headers, json=payload, timeout=120)
|
54 |
+
resp.raise_for_status()
|
55 |
+
output = resp.json()
|
56 |
+
if isinstance(output, list) and "generated_text" in output[0]:
|
57 |
+
return output[0]["generated_text"]
|
58 |
+
return str(output)
|
59 |
+
|
60 |
+
def summarize_document(extracted_text: str, hf_token: str) -> str:
|
61 |
+
if len(extracted_text) <= CHUNK_CHARS:
|
62 |
+
return summarize_chunk_hf(extracted_text, hf_token)
|
63 |
+
chunks = chunk_text(extracted_text, CHUNK_CHARS)
|
64 |
+
summaries = []
|
65 |
+
for ch in chunks:
|
66 |
+
try:
|
67 |
+
summaries.append(summarize_chunk_hf(ch, hf_token))
|
68 |
+
except Exception as e:
|
69 |
+
summaries.append(f"(error summarizing chunk: {str(e)})")
|
70 |
+
final_prompt = "Combine and refine the following summaries into a single clear summary (200-300 words):\n\n" + " ".join(summaries)
|
71 |
+
return summarize_chunk_hf(final_prompt, hf_token)
|
72 |
+
|
73 |
+
# ================== Hugging Face TTS ==================
|
74 |
+
def hf_tts(summary_text: str, hf_token: str, model: str = HF_TTS_MODEL) -> str:
|
75 |
+
url = f"https://api-inference.huggingface.co/models/{model}"
|
76 |
+
headers = {"Authorization": f"Bearer {hf_token}"}
|
77 |
+
payload = {"inputs": summary_text}
|
78 |
+
resp = requests.post(url, headers=headers, json=payload, timeout=120)
|
79 |
+
resp.raise_for_status()
|
80 |
+
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
|
81 |
+
tmp.write(resp.content)
|
82 |
+
tmp.close()
|
83 |
+
return tmp.name
|
84 |
+
|
85 |
+
# ================== Talk to PDF ==================
|
86 |
+
def ask_pdf_question(question: str, hf_token: str) -> str:
|
87 |
+
if not pdf_text_storage["processed"]:
|
88 |
+
return "β Please process a PDF first!"
|
89 |
+
if not question.strip():
|
90 |
+
return "β Please enter a question!"
|
91 |
+
prompt = f"Here is the PDF content:\n\n{pdf_text_storage['text'][:15000]}\n\nUser Question: {question}\nAnswer strictly based on PDF content."
|
92 |
+
headers = {"Authorization": f"Bearer {hf_token}"}
|
93 |
+
payload = {"inputs": prompt, "parameters": {"max_new_tokens": 500, "temperature": 0}}
|
94 |
+
resp = requests.post(f"https://api-inference.huggingface.co/models/{HF_SUMMARY_MODEL}", headers=headers, json=payload, timeout=120)
|
95 |
+
resp.raise_for_status()
|
96 |
+
output = resp.json()
|
97 |
+
if isinstance(output, list) and "generated_text" in output[0]:
|
98 |
+
return f"π€ {output[0]['generated_text'].strip()}"
|
99 |
+
return str(output)
|
100 |
+
|
101 |
+
# ================== Diagram via HF ==================
|
102 |
+
def generate_diagram(summary: str, hf_token: str) -> str:
|
103 |
+
headers = {"Authorization": f"Bearer {hf_token}"}
|
104 |
+
payload = {"inputs": f"detailed diagram, clean illustration of: {summary[:500]}"}
|
105 |
+
resp = requests.post(f"https://api-inference.huggingface.co/models/{HF_IMAGE_MODEL}", headers=headers, json=payload, timeout=60)
|
106 |
+
if resp.status_code == 200 and len(resp.content) > 1000:
|
107 |
+
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
|
108 |
+
tmp.write(resp.content)
|
109 |
+
tmp.close()
|
110 |
+
return tmp.name
|
111 |
+
# fallback: text placeholder
|
112 |
+
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".txt")
|
113 |
+
tmp.write(f"Diagram generation failed. Summary: {summary[:200]}...".encode())
|
114 |
+
tmp.close()
|
115 |
+
return tmp.name
|
116 |
+
|
117 |
+
# ================== Main Pipeline ==================
|
118 |
+
def process_pdf_pipeline(pdf_file, hf_token):
|
119 |
+
try:
|
120 |
+
if not hf_token.strip():
|
121 |
+
return "β Missing Hugging Face token!", None, None, "Process a PDF first!"
|
122 |
+
if pdf_file is None:
|
123 |
+
return "β Please upload a PDF!", None, None, "Process a PDF first!"
|
124 |
+
|
125 |
+
pdf_path = pdf_file.name if hasattr(pdf_file, "name") else str(pdf_file)
|
126 |
+
text = extract_text_from_pdf(pdf_path)
|
127 |
+
if not text.strip():
|
128 |
+
return "β PDF contains no extractable text!", None, None, "Process a PDF first!"
|
129 |
+
|
130 |
+
pdf_text_storage["text"] = text
|
131 |
+
pdf_text_storage["processed"] = True
|
132 |
+
|
133 |
+
summary = summarize_document(text, hf_token)
|
134 |
+
audio_path = hf_tts(summary, hf_token)
|
135 |
+
diagram_path = generate_diagram(summary, hf_token)
|
136 |
+
|
137 |
+
return summary, audio_path, diagram_path, "β
PDF processed!"
|
138 |
+
except Exception as e:
|
139 |
+
pdf_text_storage["processed"] = False
|
140 |
+
return f"β Error: {str(e)}", None, None, "Process a PDF first!"
|
141 |
+
|
142 |
+
# ================== Gradio UI ==================
|
143 |
+
def build_ui():
|
144 |
+
hf_token_env = os.environ.get("HF_TOKEN", "")
|
145 |
+
|
146 |
+
with gr.Blocks(title="π₯ PDF AI Pipeline") as demo:
|
147 |
+
gr.Markdown("## π₯ Hugging Face PDF Processor")
|
148 |
+
|
149 |
+
with gr.Row():
|
150 |
+
with gr.Column(scale=1):
|
151 |
+
pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
|
152 |
+
hf_token = gr.Textbox(label="HF Token", value=hf_token_env, type="password")
|
153 |
+
process_btn = gr.Button("π PROCESS PDF")
|
154 |
+
with gr.Column(scale=2):
|
155 |
+
summary_output = gr.Textbox(label="Summary", lines=12)
|
156 |
+
audio_output = gr.Audio(label="Audio", type="filepath")
|
157 |
+
diagram_output = gr.Image(label="Diagram", interactive=False)
|
158 |
+
|
159 |
+
gr.Markdown("## π¬ Chat with PDF")
|
160 |
+
question_input = gr.Textbox(label="Your Question")
|
161 |
+
ask_btn = gr.Button("π¨ ASK")
|
162 |
+
chat_output = gr.Textbox(label="Response", lines=8)
|
163 |
+
|
164 |
+
process_btn.click(
|
165 |
+
fn=process_pdf_pipeline,
|
166 |
+
inputs=[pdf_input, hf_token],
|
167 |
+
outputs=[summary_output, audio_output, diagram_output, gr.Textbox(label="Status")]
|
168 |
+
)
|
169 |
+
|
170 |
+
ask_btn.click(
|
171 |
+
fn=ask_pdf_question,
|
172 |
+
inputs=[question_input, hf_token],
|
173 |
+
outputs=[chat_output]
|
174 |
+
)
|
175 |
+
|
176 |
+
question_input.submit(
|
177 |
+
fn=ask_pdf_question,
|
178 |
+
inputs=[question_input, hf_token],
|
179 |
+
outputs=[chat_output]
|
180 |
+
)
|
181 |
+
|
182 |
+
return demo
|
183 |
+
|
184 |
+
if __name__ == "__main__":
|
185 |
+
demo = build_ui()
|
186 |
+
demo.launch(share=True, debug=True)
|