Spaces:
Sleeping
Sleeping
from fastapi import FastAPI, HTTPException | |
from fastapi.middleware.cors import CORSMiddleware | |
from fastapi.responses import FileResponse, HTMLResponse | |
from pydantic import BaseModel | |
from typing import List, Dict, Any, Optional | |
from datetime import datetime | |
import uvicorn | |
import os | |
from pathlib import Path | |
import json | |
import logging | |
from contextlib import asynccontextmanager | |
# Import the analyzer | |
from analyze_data_quality import DataQualityAnalyzer | |
# Setup logging | |
logging.basicConfig(level=logging.INFO) | |
logger = logging.getLogger(__name__) | |
# Global state | |
analyzer = None | |
last_analysis_time = None | |
analysis_results = None | |
async def lifespan(app: FastAPI): | |
# Startup | |
global analyzer | |
try: | |
analyzer = DataQualityAnalyzer() | |
logger.info("DataQualityAnalyzer initialized successfully") | |
except Exception as e: | |
logger.error(f"Error initializing analyzer: {str(e)}") | |
yield | |
# Shutdown | |
logger.info("Shutting down analysis API") | |
app = FastAPI( | |
title="Synthex Medical Text Analysis API", | |
description=""" | |
API for analyzing medical text data quality. This API provides endpoints for: | |
- Running data quality analysis | |
- Checking analysis status | |
- Accessing generated plots | |
- Listing available datasets | |
""", | |
version="1.0.0", | |
lifespan=lifespan | |
) | |
# Add CORS middleware | |
app.add_middleware( | |
CORSMiddleware, | |
allow_origins=["*"], | |
allow_credentials=True, | |
allow_methods=["*"], | |
allow_headers=["*"], | |
) | |
class AnalysisResponse(BaseModel): | |
summary: Dict[str, Any] | |
datasets: Dict[str, Dict[str, Any]] | |
plots_available: List[str] | |
timestamp: str | |
class StatusResponse(BaseModel): | |
last_analysis: Optional[str] | |
summary: Optional[Dict[str, Any]] | |
is_analyzed: bool | |
async def root(): | |
"""API documentation and status page""" | |
return """ | |
<html> | |
<head> | |
<title>Synthex Medical Text Analysis API</title> | |
<link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet"> | |
<style> | |
body { padding: 20px; } | |
.container { max-width: 800px; } | |
</style> | |
</head> | |
<body> | |
<div class="container"> | |
<h1>Synthex Medical Text Analysis API</h1> | |
<p class="lead">API for analyzing medical text data quality</p> | |
<h2>Available Endpoints</h2> | |
<ul> | |
<li><strong>POST /analyze</strong> - Run full data analysis</li> | |
<li><strong>GET /analysis/status</strong> - Check analysis status</li> | |
<li><strong>GET /plots/{plot_name}</strong> - Get specific plot</li> | |
<li><strong>GET /datasets</strong> - List available datasets</li> | |
</ul> | |
<h2>API Documentation</h2> | |
<p>For detailed API documentation, visit:</p> | |
<ul> | |
<li><a href="/docs">Swagger UI</a></li> | |
<li><a href="/redoc">ReDoc</a></li> | |
</ul> | |
<h2>Status</h2> | |
<p>API is running and ready to process requests.</p> | |
</div> | |
</body> | |
</html> | |
""" | |
async def analyze_data(): | |
"""Run full data analysis and generate reports""" | |
global analyzer, last_analysis_time, analysis_results | |
if analyzer is None: | |
try: | |
analyzer = DataQualityAnalyzer() | |
except Exception as e: | |
raise HTTPException(status_code=500, detail=f"Failed to initialize analyzer: {str(e)}") | |
try: | |
# Run analysis | |
analyzer.analyze_all_datasets() | |
report = analyzer.generate_report() | |
analyzer.plot_metrics() | |
# Get list of generated plots | |
plots_dir = analyzer.data_dir.parent / "reports" / "plots" | |
plots_available = [f.name for f in plots_dir.glob("*.png")] | |
# Update global state | |
last_analysis_time = datetime.now().isoformat() | |
analysis_results = report | |
return AnalysisResponse( | |
summary=report["summary"], | |
datasets=report["datasets"], | |
plots_available=plots_available, | |
timestamp=last_analysis_time | |
) | |
except Exception as e: | |
raise HTTPException(status_code=500, detail=f"Analysis failed: {str(e)}") | |
async def get_analysis_status(): | |
"""Get the status of the last analysis run""" | |
if last_analysis_time is None: | |
return StatusResponse( | |
last_analysis=None, | |
summary=None, | |
is_analyzed=False | |
) | |
return StatusResponse( | |
last_analysis=last_analysis_time, | |
summary=analysis_results["summary"] if analysis_results else None, | |
is_analyzed=True | |
) | |
async def get_plot(plot_name: str): | |
"""Get a specific plot by name""" | |
plots_dir = Path("data/reports/plots") | |
plot_path = plots_dir / plot_name | |
if not plot_path.exists(): | |
raise HTTPException(status_code=404, detail=f"Plot {plot_name} not found") | |
return FileResponse(plot_path) | |
async def get_datasets(): | |
"""List all available datasets for analysis""" | |
if analyzer is None: | |
try: | |
analyzer = DataQualityAnalyzer() | |
except Exception as e: | |
raise HTTPException(status_code=500, detail=f"Failed to initialize analyzer: {str(e)}") | |
try: | |
datasets = [] | |
for file_path in analyzer.data_dir.glob("*.json"): | |
datasets.append({ | |
"name": file_path.stem, | |
"path": str(file_path), | |
"size": file_path.stat().st_size | |
}) | |
return {"datasets": datasets} | |
except Exception as e: | |
raise HTTPException(status_code=500, detail=f"Failed to list datasets: {str(e)}") | |
if __name__ == "__main__": | |
uvicorn.run("analysis_api:app", host="0.0.0.0", port=8001, reload=True) |