pdf-wow / app.py
ChatBotsTA's picture
Create app.py
0e9d6b6 verified
raw
history blame
7.48 kB
"""
PDF β†’ Summary β†’ Audio β†’ Talk to PDF β†’ Diagram
All Hugging Face APIs.
"""
import os
import tempfile
import time
from typing import List
import fitz # PyMuPDF
import requests
import gradio as gr
# ================== Config ==================
CHUNK_CHARS = 20000
HF_SUMMARY_MODEL = "Groq/Llama-3-Groq-8B-Tool-Use" # text-generation
HF_TTS_MODEL = "espnet/kan-bayashi_ljspeech_vits"
HF_IMAGE_MODEL = "runwayml/stable-diffusion-v1-5"
pdf_text_storage = {"text": "", "processed": False}
# ================== Utils ==================
def extract_text_from_pdf(file_path: str) -> str:
doc = fitz.open(file_path)
text = "\n\n".join(page.get_text("text") for page in doc)
doc.close()
return text.strip()
def chunk_text(text: str, max_chars: int) -> List[str]:
if not text:
return []
parts, start, L = [], 0, len(text)
while start < L:
end = min(start + max_chars, L)
if end < L:
back = text.rfind("\n", start, end)
if back == -1:
back = text.rfind(" ", start, end)
if back != -1 and back > start:
end = back
parts.append(text[start:end].strip())
start = end
return parts
# ================== Hugging Face Summarization ==================
def summarize_chunk_hf(chunk_text: str, hf_token: str) -> str:
headers = {"Authorization": f"Bearer {hf_token}"}
payload = {
"inputs": f"Summarize the following text into a concise paragraph (~180 words max):\n\n{chunk_text}",
"parameters": {"max_new_tokens": 800, "temperature": 0.2}
}
resp = requests.post(f"https://api-inference.huggingface.co/models/{HF_SUMMARY_MODEL}", headers=headers, json=payload, timeout=120)
resp.raise_for_status()
output = resp.json()
if isinstance(output, list) and "generated_text" in output[0]:
return output[0]["generated_text"]
return str(output)
def summarize_document(extracted_text: str, hf_token: str) -> str:
if len(extracted_text) <= CHUNK_CHARS:
return summarize_chunk_hf(extracted_text, hf_token)
chunks = chunk_text(extracted_text, CHUNK_CHARS)
summaries = []
for ch in chunks:
try:
summaries.append(summarize_chunk_hf(ch, hf_token))
except Exception as e:
summaries.append(f"(error summarizing chunk: {str(e)})")
final_prompt = "Combine and refine the following summaries into a single clear summary (200-300 words):\n\n" + " ".join(summaries)
return summarize_chunk_hf(final_prompt, hf_token)
# ================== Hugging Face TTS ==================
def hf_tts(summary_text: str, hf_token: str, model: str = HF_TTS_MODEL) -> str:
url = f"https://api-inference.huggingface.co/models/{model}"
headers = {"Authorization": f"Bearer {hf_token}"}
payload = {"inputs": summary_text}
resp = requests.post(url, headers=headers, json=payload, timeout=120)
resp.raise_for_status()
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
tmp.write(resp.content)
tmp.close()
return tmp.name
# ================== Talk to PDF ==================
def ask_pdf_question(question: str, hf_token: str) -> str:
if not pdf_text_storage["processed"]:
return "❌ Please process a PDF first!"
if not question.strip():
return "❌ Please enter a question!"
prompt = f"Here is the PDF content:\n\n{pdf_text_storage['text'][:15000]}\n\nUser Question: {question}\nAnswer strictly based on PDF content."
headers = {"Authorization": f"Bearer {hf_token}"}
payload = {"inputs": prompt, "parameters": {"max_new_tokens": 500, "temperature": 0}}
resp = requests.post(f"https://api-inference.huggingface.co/models/{HF_SUMMARY_MODEL}", headers=headers, json=payload, timeout=120)
resp.raise_for_status()
output = resp.json()
if isinstance(output, list) and "generated_text" in output[0]:
return f"πŸ€– {output[0]['generated_text'].strip()}"
return str(output)
# ================== Diagram via HF ==================
def generate_diagram(summary: str, hf_token: str) -> str:
headers = {"Authorization": f"Bearer {hf_token}"}
payload = {"inputs": f"detailed diagram, clean illustration of: {summary[:500]}"}
resp = requests.post(f"https://api-inference.huggingface.co/models/{HF_IMAGE_MODEL}", headers=headers, json=payload, timeout=60)
if resp.status_code == 200 and len(resp.content) > 1000:
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
tmp.write(resp.content)
tmp.close()
return tmp.name
# fallback: text placeholder
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".txt")
tmp.write(f"Diagram generation failed. Summary: {summary[:200]}...".encode())
tmp.close()
return tmp.name
# ================== Main Pipeline ==================
def process_pdf_pipeline(pdf_file, hf_token):
try:
if not hf_token.strip():
return "❌ Missing Hugging Face token!", None, None, "Process a PDF first!"
if pdf_file is None:
return "❌ Please upload a PDF!", None, None, "Process a PDF first!"
pdf_path = pdf_file.name if hasattr(pdf_file, "name") else str(pdf_file)
text = extract_text_from_pdf(pdf_path)
if not text.strip():
return "❌ PDF contains no extractable text!", None, None, "Process a PDF first!"
pdf_text_storage["text"] = text
pdf_text_storage["processed"] = True
summary = summarize_document(text, hf_token)
audio_path = hf_tts(summary, hf_token)
diagram_path = generate_diagram(summary, hf_token)
return summary, audio_path, diagram_path, "βœ… PDF processed!"
except Exception as e:
pdf_text_storage["processed"] = False
return f"❌ Error: {str(e)}", None, None, "Process a PDF first!"
# ================== Gradio UI ==================
def build_ui():
hf_token_env = os.environ.get("HF_TOKEN", "")
with gr.Blocks(title="πŸ”₯ PDF AI Pipeline") as demo:
gr.Markdown("## πŸ”₯ Hugging Face PDF Processor")
with gr.Row():
with gr.Column(scale=1):
pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
hf_token = gr.Textbox(label="HF Token", value=hf_token_env, type="password")
process_btn = gr.Button("πŸš€ PROCESS PDF")
with gr.Column(scale=2):
summary_output = gr.Textbox(label="Summary", lines=12)
audio_output = gr.Audio(label="Audio", type="filepath")
diagram_output = gr.Image(label="Diagram", interactive=False)
gr.Markdown("## πŸ’¬ Chat with PDF")
question_input = gr.Textbox(label="Your Question")
ask_btn = gr.Button("πŸ“¨ ASK")
chat_output = gr.Textbox(label="Response", lines=8)
process_btn.click(
fn=process_pdf_pipeline,
inputs=[pdf_input, hf_token],
outputs=[summary_output, audio_output, diagram_output, gr.Textbox(label="Status")]
)
ask_btn.click(
fn=ask_pdf_question,
inputs=[question_input, hf_token],
outputs=[chat_output]
)
question_input.submit(
fn=ask_pdf_question,
inputs=[question_input, hf_token],
outputs=[chat_output]
)
return demo
if __name__ == "__main__":
demo = build_ui()
demo.launch(share=True, debug=True)