apimineru / app.py
marcosremar2's picture
Simplified app to use in-memory storage instead of S3
8d53d80
import gradio as gr
from typing import Dict
import os
import base64
from magic_pdf.user_api import parse_union_pdf
from magic_pdf.rw import BaseReaderWriter
from loguru import logger
from io import BytesIO
class InMemoryReaderWriter(BaseReaderWriter):
"""In-memory implementation of ReaderWriter that stores files in memory"""
def __init__(self):
self.storage = {} # Dictionary to store files in memory
def write(self, content, path, content_type=None):
"""Write content to in-memory storage"""
self.storage[path] = content
return f"memory://{path}"
def read(self, path):
"""Read content from in-memory storage"""
if path.startswith("memory://"):
path = path[9:] # Remove "memory://" prefix
if path in self.storage:
return self.storage[path]
else:
raise FileNotFoundError(f"File not found in memory storage: {path}")
def get_storage():
"""Initialize in-memory storage"""
return InMemoryReaderWriter()
def inference(inputs: Dict):
"""
Serverless API entry point
"""
try:
# Validate input
if "pdf_bytes" not in inputs:
return {"status": "error", "message": "No PDF data provided"}
# Base64 decode PDF content
try:
pdf_bytes = base64.b64decode(inputs["pdf_bytes"])
except Exception as e:
return {"status": "error", "message": f"Invalid PDF data: {str(e)}"}
# Initialize in-memory writer
image_writer = get_storage()
# Prepare parameters
kwargs = {
"lang": inputs.get("lang", "zh"),
"layout_model": inputs.get("layout_model", True),
"formula_enable": inputs.get("formula_enable", True),
"table_enable": inputs.get("table_enable", True),
"input_model_is_empty": True
}
# Process using parse_union_pdf
result = parse_union_pdf(
pdf_bytes=pdf_bytes,
pdf_models=[], # Use built-in models
imageWriter=image_writer,
**kwargs
)
return {
"status": "success",
"data": result
}
except Exception as e:
logger.exception("Error processing PDF")
return {
"status": "error",
"message": str(e)
}
# Create Gradio interface
def process_pdf_ui(pdf_file, lang="zh", layout_model=True, formula_enable=True, table_enable=True):
if pdf_file is None:
return {"status": "error", "message": "No PDF file provided"}
# Convert uploaded file to base64
pdf_bytes = pdf_file
encoded_pdf = base64.b64encode(pdf_bytes).decode('utf-8')
# Call the inference function
result = inference({
"pdf_bytes": encoded_pdf,
"lang": lang,
"layout_model": layout_model,
"formula_enable": formula_enable,
"table_enable": table_enable
})
return result
# Create Gradio interface with API
with gr.Blocks() as demo:
gr.Markdown("# PDF Processing API")
with gr.Tab("UI Demo"):
with gr.Row():
with gr.Column():
pdf_input = gr.File(label="Upload PDF")
lang = gr.Dropdown(["zh", "en"], label="Language", value="zh")
layout_model = gr.Checkbox(label="Use Layout Model", value=True)
formula_enable = gr.Checkbox(label="Enable Formula Detection", value=True)
table_enable = gr.Checkbox(label="Enable Table Detection", value=True)
submit_btn = gr.Button("Process PDF")
with gr.Column():
output = gr.JSON(label="Result")
submit_btn.click(
fn=process_pdf_ui,
inputs=[pdf_input, lang, layout_model, formula_enable, table_enable],
outputs=output
)
with gr.Tab("API Documentation"):
gr.Markdown("""
## API Usage
### Endpoint
`POST https://marcosremar2-apimineru.hf.space/api/predict`
### Request Format
```json
{
"pdf_bytes": "base64 encoded PDF content",
"lang": "zh", // Optional, default "zh"
"layout_model": true, // Optional, default true
"formula_enable": true, // Optional, default true
"table_enable": true // Optional, default true
}
```
### Python Example
```python
from huggingface_hub import InferenceClient
import base64
def process_pdf(pdf_path: str, hf_token: str):
# Create client
client = InferenceClient(
model="marcosremar2/apimineru",
token=hf_token
)
# Read and encode PDF
with open(pdf_path, 'rb') as f:
pdf_bytes = base64.b64encode(f.read()).decode()
# Send request
response = client.post(json={
"pdf_bytes": pdf_bytes,
"lang": "zh",
"layout_model": True,
"formula_enable": True,
"table_enable": True
})
return response
```
""")
# This exposes both the UI and API endpoints
demo.queue().launch()