smoldocling-api / app.py
jobian's picture
Fix model loading
d8475f5
import os
from fastapi import FastAPI, UploadFile, File, HTTPException
from fastapi.responses import JSONResponse
from fastapi.middleware.cors import CORSMiddleware
from typing import List
from smoldocling import cli
import shutil
import dotenv
os.environ["TRANSFORMERS_CACHE"] = "/app/.cache/transformers"
os.environ["HF_HUB_CACHE"] = "/app/.cache/hub"
# Load environment variables
dotenv.load_dotenv()
# Initialize FastAPI app
app = FastAPI()
# Enable CORS (optional, but good for dev/testing)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
# Ensure directories exist
UPLOAD_DIR = "/tmp/uploads"
OUTPUT_DIR = "/tmp/output"
os.makedirs(UPLOAD_DIR, exist_ok=True)
os.makedirs(OUTPUT_DIR, exist_ok=True)
def docling_process_files(file_list: List[str]) -> str:
cli.process_files(file_list, OUTPUT_DIR, output_format='json')
file_path = file_list[0].replace('\\', '/')
file_name = os.path.splitext(os.path.basename(file_path))[0]
json_output = os.path.join(OUTPUT_DIR, f"{file_name}.json")
overlay_html = os.path.join(OUTPUT_DIR, f"{file_name}_overlay.html")
# Generate overlay (optional)
cli.generate_docling_overlay(file_path, json_output, overlay_html)
# Stitch final cleaned text (you can toggle GPT fixing)
cleaned_text = cli.stitch_text_from_json(json_output, gpt_fix=False)
return cleaned_text
@app.get("/")
def root():
return JSONResponse(content={"message": "Root is working"})
@app.get("/health")
def health_check():
return JSONResponse(content={"status": "ok"})
@app.post("/parse")
async def parse_docling(file: UploadFile = File(...)):
if not file:
raise HTTPException(status_code=400, detail="No file uploaded.")
save_path = os.path.join(UPLOAD_DIR, file.filename)
with open(save_path, "wb") as buffer:
shutil.copyfileobj(file.file, buffer)
try:
text_output = docling_process_files([save_path])
return JSONResponse(content={"text": text_output})
except Exception as e:
return JSONResponse(status_code=500, content={"error": str(e)})