Spaces:

vharika31
/

wolf

Sleeping

App Files Files Community

vharika31 commited on Apr 20

Commit

da0b26a

verified ·

1 Parent(s): 1c4093a

Create app.py

Browse files

Files changed (1) hide show

app.py +133 -0

app.py ADDED Viewed

	@@ -0,0 +1,133 @@

+# ✅ Install dependencies
+# 📚 Imports
+import fitz  # PyMuPDF
+import requests
+import gradio as gr
+import tempfile
+import os
+import io
+# 🔑 Enter your OpenRouter API key here
+OPENROUTER_API_KEY = "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"
+# Global variable to store the extracted text
+pdf_text = ""
+# 📄 Extract text from PDF
+def extract_text_from_pdf(file_obj):
+    global pdf_text
+    if file_obj is None:
+        return "Please upload a PDF file first."
+    try:
+        # Get the file path from the file object
+        # In Gradio, the file object has a name attribute that contains the path
+        file_path = file_obj.name
+        # Now open the file with PyMuPDF
+        doc = fitz.open(file_path)
+        text = ""
+        for page in doc:
+            text += page.get_text()
+        doc.close()
+        # Store the text for later use
+        pdf_text = text
+        # Return preview of the extracted text
+        preview = text[:500] + "..." if len(text) > 500 else text
+        return f"✅ PDF uploaded and processed successfully. Preview:\n\n{preview}"
+    except Exception as e:
+        return f"❌ Error processing PDF: {str(e)}"
+# 💬 Ask the open-source LLM (Mistral-7B via OpenRouter)
+def ask_open_source_llm(question, model_choice="nvidia/llama-3.1-nemotron-nano-8b-v1:free"):
+    global pdf_text
+    if not pdf_text:
+        return "⚠️ Please upload a PDF document first."
+    # Limit text to prevent token overflow
+    limited_text = pdf_text[:3000]  # First 3000 characters
+    # Create prompt based on question
+    if not question:
+        prompt = f"Summarize the following document:\n\n{limited_text}"
+    else:
+        prompt = f"The document says:\n\n{limited_text}\n\nNow answer this: {question}"
+    # Call the API
+    url = "https://openrouter.ai/api/v1/chat/completions"
+    headers = {
+        "Authorization": f"Bearer {OPENROUTER_API_KEY}",
+        "Content-Type": "application/json"
+    }
+    data = {
+        "model": model_choice,
+        "messages": [{"role": "user", "content": prompt}]
+    }
+    try:
+        response = requests.post(url, headers=headers, json=data)
+        if response.status_code == 200:
+            return response.json()["choices"][0]["message"]["content"]
+        else:
+            return f"❌ Error: {response.text}"
+    except Exception as e:
+        return f"❌ An error occurred: {str(e)}"
+# Gradio app function
+def process_query(pdf_file, question, model_choice):
+    # First extract text if a PDF is uploaded
+    if pdf_file is not None:
+        result = extract_text_from_pdf(pdf_file)
+        if result.startswith("❌ Error"):
+            return result
+    # Then process the question
+    if question:
+        return ask_open_source_llm(question, model_choice)
+    else:
+        return ask_open_source_llm("Please summarize this document.", model_choice)
+# Create Gradio interface
+with gr.Blocks(title="PDF Document Analysis") as app:
+    gr.Markdown("# 📚 PDF Document Analysis with LLM")
+    gr.Markdown("Upload a PDF document and ask questions about its content.")
+    with gr.Row():
+        with gr.Column(scale=1):
+            pdf_input = gr.File(label="Upload PDF Document", file_types=[".pdf"])
+            model_choice = gr.Dropdown(
+                choices=[
+                    "nvidia/llama-3.1-nemotron-nano-8b-v1:free",
+                    "mistralai/mistral-7b-instruct-v0.1:free",
+                    "meta-llama/llama-2-13b-chat:free"
+                ],
+                label="LLM Model",
+                value="nvidia/llama-3.1-nemotron-nano-8b-v1:free"
+            )
+            question_input = gr.Textbox(label="Ask a question (or leave empty for summary)", lines=2)
+            submit_btn = gr.Button("Process", variant="primary")
+        with gr.Column(scale=2):
+            output = gr.Textbox(label="Response", lines=15)
+    # Set up event handlers
+    submit_btn.click(
+        fn=process_query,
+        inputs=[pdf_input, question_input, model_choice],
+        outputs=output
+    )
+    gr.Markdown("### 📝 Notes")
+    gr.Markdown("- For large documents, only the first 3000 characters are analyzed")
+    gr.Markdown("- You can change the LLM model from the dropdown menu")
+    gr.Markdown("- Leave the question field empty to get a general summary")
+# Launch the app
+app.launch(debug=True, share=True)