""" PDF → Summary → Audio → Talk to PDF → Diagram All Hugging Face APIs. """ import os import tempfile import time from typing import List import fitz # PyMuPDF import requests import gradio as gr # ================== Config ================== CHUNK_CHARS = 20000 HF_SUMMARY_MODEL = "Groq/Llama-3-Groq-8B-Tool-Use" # text-generation HF_TTS_MODEL = "espnet/kan-bayashi_ljspeech_vits" HF_IMAGE_MODEL = "runwayml/stable-diffusion-v1-5" pdf_text_storage = {"text": "", "processed": False} # ================== Utils ================== def extract_text_from_pdf(file_path: str) -> str: doc = fitz.open(file_path) text = "\n\n".join(page.get_text("text") for page in doc) doc.close() return text.strip() def chunk_text(text: str, max_chars: int) -> List[str]: if not text: return [] parts, start, L = [], 0, len(text) while start < L: end = min(start + max_chars, L) if end < L: back = text.rfind("\n", start, end) if back == -1: back = text.rfind(" ", start, end) if back != -1 and back > start: end = back parts.append(text[start:end].strip()) start = end return parts # ================== Hugging Face Summarization ================== def summarize_chunk_hf(chunk_text: str, hf_token: str) -> str: headers = {"Authorization": f"Bearer {hf_token}"} payload = { "inputs": f"Summarize the following text into a concise paragraph (~180 words max):\n\n{chunk_text}", "parameters": {"max_new_tokens": 800, "temperature": 0.2} } resp = requests.post(f"https://api-inference.huggingface.co/models/{HF_SUMMARY_MODEL}", headers=headers, json=payload, timeout=120) resp.raise_for_status() output = resp.json() if isinstance(output, list) and "generated_text" in output[0]: return output[0]["generated_text"] return str(output) def summarize_document(extracted_text: str, hf_token: str) -> str: if len(extracted_text) <= CHUNK_CHARS: return summarize_chunk_hf(extracted_text, hf_token) chunks = chunk_text(extracted_text, CHUNK_CHARS) summaries = [] for ch in chunks: try: summaries.append(summarize_chunk_hf(ch, hf_token)) except Exception as e: summaries.append(f"(error summarizing chunk: {str(e)})") final_prompt = "Combine and refine the following summaries into a single clear summary (200-300 words):\n\n" + " ".join(summaries) return summarize_chunk_hf(final_prompt, hf_token) # ================== Hugging Face TTS ================== def hf_tts(summary_text: str, hf_token: str, model: str = HF_TTS_MODEL) -> str: url = f"https://api-inference.huggingface.co/models/{model}" headers = {"Authorization": f"Bearer {hf_token}"} payload = {"inputs": summary_text} resp = requests.post(url, headers=headers, json=payload, timeout=120) resp.raise_for_status() tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".wav") tmp.write(resp.content) tmp.close() return tmp.name # ================== Talk to PDF ================== def ask_pdf_question(question: str, hf_token: str) -> str: if not pdf_text_storage["processed"]: return "❌ Please process a PDF first!" if not question.strip(): return "❌ Please enter a question!" prompt = f"Here is the PDF content:\n\n{pdf_text_storage['text'][:15000]}\n\nUser Question: {question}\nAnswer strictly based on PDF content." headers = {"Authorization": f"Bearer {hf_token}"} payload = {"inputs": prompt, "parameters": {"max_new_tokens": 500, "temperature": 0}} resp = requests.post(f"https://api-inference.huggingface.co/models/{HF_SUMMARY_MODEL}", headers=headers, json=payload, timeout=120) resp.raise_for_status() output = resp.json() if isinstance(output, list) and "generated_text" in output[0]: return f"🤖 {output[0]['generated_text'].strip()}" return str(output) # ================== Diagram via HF ================== def generate_diagram(summary: str, hf_token: str) -> str: headers = {"Authorization": f"Bearer {hf_token}"} payload = {"inputs": f"detailed diagram, clean illustration of: {summary[:500]}"} resp = requests.post(f"https://api-inference.huggingface.co/models/{HF_IMAGE_MODEL}", headers=headers, json=payload, timeout=60) if resp.status_code == 200 and len(resp.content) > 1000: tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".png") tmp.write(resp.content) tmp.close() return tmp.name # fallback: text placeholder tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".txt") tmp.write(f"Diagram generation failed. Summary: {summary[:200]}...".encode()) tmp.close() return tmp.name # ================== Main Pipeline ================== def process_pdf_pipeline(pdf_file, hf_token): try: if not hf_token.strip(): return "❌ Missing Hugging Face token!", None, None, "Process a PDF first!" if pdf_file is None: return "❌ Please upload a PDF!", None, None, "Process a PDF first!" pdf_path = pdf_file.name if hasattr(pdf_file, "name") else str(pdf_file) text = extract_text_from_pdf(pdf_path) if not text.strip(): return "❌ PDF contains no extractable text!", None, None, "Process a PDF first!" pdf_text_storage["text"] = text pdf_text_storage["processed"] = True summary = summarize_document(text, hf_token) audio_path = hf_tts(summary, hf_token) diagram_path = generate_diagram(summary, hf_token) return summary, audio_path, diagram_path, "✅ PDF processed!" except Exception as e: pdf_text_storage["processed"] = False return f"❌ Error: {str(e)}", None, None, "Process a PDF first!" # ================== Gradio UI ================== def build_ui(): hf_token_env = os.environ.get("HF_TOKEN", "") with gr.Blocks(title="🔥 PDF AI Pipeline") as demo: gr.Markdown("## 🔥 Hugging Face PDF Processor") with gr.Row(): with gr.Column(scale=1): pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"]) hf_token = gr.Textbox(label="HF Token", value=hf_token_env, type="password") process_btn = gr.Button("🚀 PROCESS PDF") with gr.Column(scale=2): summary_output = gr.Textbox(label="Summary", lines=12) audio_output = gr.Audio(label="Audio", type="filepath") diagram_output = gr.Image(label="Diagram", interactive=False) gr.Markdown("## 💬 Chat with PDF") question_input = gr.Textbox(label="Your Question") ask_btn = gr.Button("📨 ASK") chat_output = gr.Textbox(label="Response", lines=8) process_btn.click( fn=process_pdf_pipeline, inputs=[pdf_input, hf_token], outputs=[summary_output, audio_output, diagram_output, gr.Textbox(label="Status")] ) ask_btn.click( fn=ask_pdf_question, inputs=[question_input, hf_token], outputs=[chat_output] ) question_input.submit( fn=ask_pdf_question, inputs=[question_input, hf_token], outputs=[chat_output] ) return demo if __name__ == "__main__": demo = build_ui() demo.launch(share=True, debug=True)