Vx2-3y commited on
Commit
3fa9baf
·
0 Parent(s):

Initial project structure: FastAPI backend, Dockerfile, requirements, and PRD

Browse files
.gitignore ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ env/
12
+ build/
13
+ develop-eggs/
14
+ dist/
15
+ downloads/
16
+ eggs/
17
+ .eggs/
18
+ lib/
19
+ lib64/
20
+ parts/
21
+ sdist/
22
+ var/
23
+ *.egg-info/
24
+ .installed.cfg
25
+ *.egg
26
+
27
+ # PyInstaller
28
+ # Usually these files are written by a python script from a template
29
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
30
+ *.manifest
31
+ *.spec
32
+
33
+ # Installer logs
34
+ debug.log
35
+
36
+ # Unit test / coverage reports
37
+ htmlcov/
38
+ .tox/
39
+ .nox/
40
+ .coverage
41
+ .coverage.*
42
+ .cache
43
+ nosetests.xml
44
+ coverage.xml
45
+ *.cover
46
+ .hypothesis/
47
+ .pytest_cache/
48
+
49
+ # Environments
50
+ .env
51
+ .venv
52
+ ENV/
53
+ venv/
54
+
55
+ # VS Code
56
+ .vscode/
57
+
58
+ # Docker
59
+ *.log
60
+ docker-compose.override.yml
61
+
62
+ # Ignore local env files
63
+ .env
64
+ .env.*
65
+
66
+ # macOS
67
+ .DS_Store
Dockerfile ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use official Python slim image for smaller size
2
+ FROM python:3.11-slim
3
+
4
+ # Set environment variables for Python
5
+ ENV PYTHONDONTWRITEBYTECODE=1
6
+ ENV PYTHONUNBUFFERED=1
7
+
8
+ # Set work directory
9
+ WORKDIR /app
10
+
11
+ # Install system dependencies
12
+ RUN apt-get update && apt-get install -y \
13
+ build-essential \
14
+ && rm -rf /var/lib/apt/lists/*
15
+
16
+ # Install Python dependencies
17
+ COPY requirements.txt .
18
+ RUN pip install --upgrade pip && pip install -r requirements.txt
19
+
20
+ # Copy application code
21
+ COPY . .
22
+
23
+ # Expose port for FastAPI
24
+ EXPOSE 7860
25
+
26
+ # Command to run the app with Uvicorn
27
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
main.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from pydantic import BaseModel
3
+ from typing import Optional, Any
4
+
5
+ app = FastAPI(
6
+ title="NCOS Compliance LLM API",
7
+ description="API contract for inference, health checks, and job queueing.",
8
+ version="1.0.0"
9
+ )
10
+
11
+ # --- Pydantic models for request/response ---
12
+
13
+ class InferRequest(BaseModel):
14
+ input_text: str
15
+ parameters: Optional[dict] = None # e.g., temperature, max_tokens
16
+
17
+ class InferResponse(BaseModel):
18
+ result: str
19
+ status: str
20
+ error: Optional[str] = None
21
+
22
+ class QueueRequest(BaseModel):
23
+ input_text: str
24
+ parameters: Optional[dict] = None
25
+
26
+ class QueueResponse(BaseModel):
27
+ job_id: str
28
+ status: str
29
+ result: Optional[str] = None
30
+ error: Optional[str] = None
31
+
32
+ # --- Endpoints ---
33
+
34
+ @app.post("/infer", response_model=InferResponse)
35
+ def infer(request: InferRequest):
36
+ """
37
+ Run model inference on the input text.
38
+ """
39
+ # Placeholder logic for now
40
+ try:
41
+ # TODO: Call your model here
42
+ output = f"Echo: {request.input_text}"
43
+ return InferResponse(result=output, status="success")
44
+ except Exception as e:
45
+ return InferResponse(result="", status="error", error=str(e))
46
+
47
+ @app.get("/healthz")
48
+ def healthz():
49
+ """
50
+ Health check endpoint.
51
+ Returns 200 OK if the service is healthy.
52
+ """
53
+ return {"status": "ok"}
54
+
55
+ @app.post("/queue", response_model=QueueResponse)
56
+ def submit_job(request: QueueRequest):
57
+ """
58
+ Submit a job to the queue (e.g., Redis).
59
+ """
60
+ # TODO: Integrate with Redis queue
61
+ job_id = "job_123" # Placeholder
62
+ return QueueResponse(job_id=job_id, status="queued")
63
+
64
+ @app.get("/queue", response_model=QueueResponse)
65
+ def get_job_status(job_id: str):
66
+ """
67
+ Get the status/result of a queued job.
68
+ """
69
+ # TODO: Query Redis for job status/result
70
+ return QueueResponse(job_id=job_id, status="pending")
71
+
72
+ # --- End of API contract skeleton ---
73
+
74
+ # FastAPI will auto-generate OpenAPI docs at /docs and /openapi.json
requirements.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # FastAPI web framework
2
+ fastapi
3
+
4
+ # ASGI server for running FastAPI
5
+ uvicorn[standard]
6
+
7
+ # Supabase Python client
8
+ supabase
9
+
10
+ # For loading environment variables from .env
11
+ python-dotenv
12
+
13
+ # Data validation and settings management
14
+ pydantic
scripts/prd.md ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Product Requirements Document (PRD)
2
+ # Project: NCOS_S1 (Large Compliance LLM Pipeline)
3
+
4
+ ## 1. Project Overview
5
+ Deploy a large compliance LLM (ACATECH/ncos, Llama-2-70B) on Hugging Face Spaces, with a Next.js frontend (Vercel), Supabase for test cases, and Redis for queueing. The backend is a FastAPI app running in a Docker container for full control (CUDA, dependencies, etc.).
6
+
7
+ ---
8
+
9
+ ## 2. Current State Analysis
10
+ - **Backend:**
11
+ - FastAPI app in Hugging Face Space, Dockerized.
12
+ - CUDA and torch set up for GPU inference.
13
+ - Permissions and cache issues resolved.
14
+ - Requirements are mostly correct and reproducible.
15
+ - **Frontend:**
16
+ - Next.js app on Vercel (not tightly integrated yet).
17
+ - **Test/Queue:**
18
+ - Supabase for test cases.
19
+ - Redis for queueing (not fully integrated).
20
+ - **Issues:**
21
+ - Dependency hell (CUDA, torch, flash-attn, numpy, etc.).
22
+ - File permission and cache issues.
23
+ - Model/tokenizer loading errors (corrupt/incompatible files).
24
+ - Manual syncing of requirements and Dockerfile.
25
+ - No robust, end-to-end pipeline from test case → queue → model → result → storage.
26
+ - No clear API contract between frontend, backend, and test/queue system.
27
+ - No health checks, monitoring, or error reporting.
28
+ - No automated deployment or CI/CD for the Space.
29
+ - Monolithic codebase, hard to debug.
30
+
31
+ ---
32
+
33
+ ## 3. Goals
34
+ - Modular, robust, and reproducible pipeline for LLM compliance testing.
35
+ - Clean separation of backend, frontend, and queue/storage.
36
+ - Automated, reliable deployment and monitoring.
37
+ - Clear API contract and documentation.
38
+
39
+ ---
40
+
41
+ ## 4. Recommended Architecture
42
+ ### A. Modular Structure
43
+ - **Backend (Hugging Face Space):**
44
+ - FastAPI app, Dockerized, REST API for inference.
45
+ - Handles model loading, inference, health checks.
46
+ - Connects to Redis for job queueing.
47
+ - Optionally connects to Supabase for test/result storage.
48
+ - **Frontend (Vercel/Next.js):**
49
+ - Calls backend API for inference.
50
+ - Displays results, test case status, health info.
51
+ - **Queue/Storage:**
52
+ - Redis for job queueing (decouples frontend/backend).
53
+ - Supabase for storing test cases/results.
54
+
55
+ ### B. Key Features
56
+ - Robust error handling and logging in backend.
57
+ - Health check endpoints (`/healthz`, `/readyz`).
58
+ - Clear API contract (OpenAPI/Swagger for FastAPI).
59
+ - Automated Docker build and deployment (version pinning).
60
+ - CI/CD pipeline for backend and frontend.
61
+ - Documentation for setup, usage, troubleshooting.
62
+
63
+ ---
64
+
65
+ ## 5. Action Plan
66
+ ### Step 1: Design the API Contract
67
+ - Define endpoints for:
68
+ - `/infer` (POST): Accepts input, returns model output.
69
+ - `/healthz` (GET): Returns service health.
70
+ - `/queue` (POST/GET): For job submission/status (if using Redis).
71
+ - Use FastAPI's OpenAPI docs for clarity.
72
+
73
+ ### Step 2: Clean Backend Implementation
74
+ - Start a new repo or clean branch.
75
+ - Write a minimal FastAPI app:
76
+ - Loads model/tokenizer (with robust error handling).
77
+ - Exposes `/infer` and `/healthz`.
78
+ - Logs errors and requests.
79
+ - Add Redis integration for queueing (optional, but recommended for scale).
80
+ - Add Supabase integration for test/result storage (optional, can be added after core works).
81
+
82
+ ### Step 3: Dockerize the Backend
83
+ - Use a clean, minimal Dockerfile:
84
+ - Start from `nvidia/cuda:12.1.0-devel-ubuntu22.04`.
85
+ - Install Python, torch, dependencies in correct order.
86
+ - Set up cache and permissions.
87
+ - Pin all versions in `requirements.txt`.
88
+ - Add a health check in Dockerfile (`HEALTHCHECK`).
89
+
90
+ ### Step 4: Model/Tokenizer Management
91
+ - Ensure model/tokenizer files are valid and compatible.
92
+ - Test loading locally before pushing to Hugging Face.
93
+ - Document the process for updating model files.
94
+
95
+ ### Step 5: Frontend Integration
96
+ - Update Next.js frontend to call the new backend API.
97
+ - Show job status, results, and health info.
98
+ - Add error handling and user feedback.
99
+
100
+ ### Step 6: Queue and Storage Integration
101
+ - Set up Redis for job queueing.
102
+ - Set up Supabase for test case/result storage.
103
+ - Ensure backend can pull jobs from Redis, process, and store results in Supabase.
104
+
105
+ ### Step 7: Monitoring and Health
106
+ - Add logging and error reporting (e.g., to stdout, or a logging service).
107
+ - Implement `/healthz` and `/readyz` endpoints.
108
+ - Optionally, add Prometheus/Grafana metrics.
109
+
110
+ ### Step 8: CI/CD and Documentation
111
+ - Add GitHub Actions or similar for automated build/test/deploy.
112
+ - Write clear README and API docs.
113
+
114
+ ---
115
+
116
+ ## 6. Success Criteria
117
+ - End-to-end pipeline works: test case → queue → model → result → storage.
118
+ - Robust error handling and health checks in place.
119
+ - Automated, reproducible builds and deployments.
120
+ - Clear, up-to-date documentation for all components.
tasks/task_001.txt ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Task ID: 1
2
+ # Title: Design API Contract
3
+ # Status: pending
4
+ # Dependencies: None
5
+ # Priority: high
6
+ # Description: Define endpoints for inference, health checks, and job queueing. Use FastAPI's OpenAPI for documentation.
7
+ # Details:
8
+ Create endpoints for /infer (POST), /healthz (GET), and /queue (POST/GET if using Redis). Specify request/response schemas and document using OpenAPI.
9
+
10
+ # Test Strategy:
11
+ Manually test endpoints with sample requests. Verify OpenAPI docs are generated correctly.
12
+
13
+ # Subtasks:
14
+ ## 1. Define /infer endpoint [in-progress]
15
+ ### Dependencies: None
16
+ ### Description: Create a POST endpoint at /infer for making inference requests. Define request and response schemas.
17
+ ### Details:
18
+ The /infer endpoint should accept a JSON payload with the required input data. It should return the inference result or an appropriate error response. Use pydantic to define the request and response models. Document the endpoint using FastAPI's OpenAPI annotations.
19
+
20
+ ## 2. Implement /healthz endpoint [in-progress]
21
+ ### Dependencies: None
22
+ ### Description: Create a GET endpoint at /healthz for health checks. Return a 200 OK response if the service is healthy.
23
+ ### Details:
24
+ The /healthz endpoint should perform any necessary checks to determine if the service is healthy and able to handle requests. This can include checking database connections, verifying external service availability, etc. If all checks pass, return a 200 OK response. Use FastAPI's OpenAPI annotations to document the endpoint.
25
+
26
+ ## 3. Add /queue endpoint for job queueing [pending]
27
+ ### Dependencies: 1.1
28
+ ### Description: If using Redis for job queueing, create POST and GET endpoints at /queue for submitting and retrieving jobs.
29
+ ### Details:
30
+ The POST /queue endpoint should accept a job payload and enqueue it in Redis. The GET /queue endpoint should retrieve job status and results. If not using Redis, this subtask can be skipped. Ensure proper error handling and document the endpoints using FastAPI's OpenAPI annotations.
31
+
tasks/task_002.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Task ID: 2
2
+ # Title: Implement FastAPI Backend
3
+ # Status: pending
4
+ # Dependencies: 1
5
+ # Priority: high
6
+ # Description: Write a minimal FastAPI app that loads the model, exposes API endpoints, and handles errors and logging.
7
+ # Details:
8
+ Create a new clean codebase. Implement model loading with error handling, the /infer and /healthz endpoints, and request/error logging. Optionally integrate Redis for queueing.
9
+
10
+ # Test Strategy:
11
+ Unit test critical functionality. Integration test API endpoints. Verify logging and error handling.
tasks/task_003.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Task ID: 3
2
+ # Title: Dockerize Backend
3
+ # Status: pending
4
+ # Dependencies: 2
5
+ # Priority: high
6
+ # Description: Create a clean, minimal Dockerfile for the FastAPI backend. Ensure proper setup of dependencies, cache, and permissions.
7
+ # Details:
8
+ Base image: nvidia/cuda:12.1.0-devel-ubuntu22.04. Install Python, torch, and pinned dependencies in order. Set up cache and permissions. Add a HEALTHCHECK.
9
+
10
+ # Test Strategy:
11
+ Build and run Docker image. Verify API endpoints, model loading, and health check. Test on GPU machine.
tasks/task_004.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Task ID: 4
2
+ # Title: Validate Model and Tokenizer
3
+ # Status: pending
4
+ # Dependencies: None
5
+ # Priority: medium
6
+ # Description: Ensure the model and tokenizer files are valid, compatible, and ready for use in the backend.
7
+ # Details:
8
+ Test loading the model and tokenizer files locally before integrating into the backend. Verify versions and checksums. Document the update process.
9
+
10
+ # Test Strategy:
11
+ Manually test model/tokenizer loading. Validate model outputs. Automate checks if possible.
tasks/task_005.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Task ID: 5
2
+ # Title: Integrate Frontend
3
+ # Status: pending
4
+ # Dependencies: 2
5
+ # Priority: high
6
+ # Description: Update the Next.js frontend to use the new backend API. Display job status, results, and health. Handle errors.
7
+ # Details:
8
+ Modify frontend to make requests to /infer, /healthz, and /queue endpoints. Update UI to show job status, inference results, and backend health. Implement user-friendly error handling.
9
+
10
+ # Test Strategy:
11
+ Integration test frontend against a running backend instance. Verify UI updates and error display.
tasks/task_006.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Task ID: 6
2
+ # Title: Set Up Redis Queue
3
+ # Status: pending
4
+ # Dependencies: 2
5
+ # Priority: medium
6
+ # Description: Configure Redis for job queueing. Integrate Redis into the backend for job submission and status tracking.
7
+ # Details:
8
+ Provision a Redis instance. Implement a job queue using Redis lists or streams. Modify the backend to enqueue jobs on /queue POST and return status on GET. Process jobs asynchronously.
9
+
10
+ # Test Strategy:
11
+ Integration test queueing by submitting jobs and verifying processing. Validate job status updates.
tasks/task_007.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Task ID: 7
2
+ # Title: Set Up Supabase Storage
3
+ # Status: pending
4
+ # Dependencies: 2
5
+ # Priority: low
6
+ # Description: Configure Supabase for storing test cases and results. Integrate Supabase into the backend.
7
+ # Details:
8
+ Provision a Supabase instance. Design schemas for test cases and inference results. Modify the backend to store and retrieve data from Supabase tables. Consider access control.
9
+
10
+ # Test Strategy:
11
+ Integration test Supabase by storing and querying test data. Verify data integrity and security.
tasks/task_008.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Task ID: 8
2
+ # Title: Implement Monitoring and Health Checks
3
+ # Status: pending
4
+ # Dependencies: 2
5
+ # Priority: medium
6
+ # Description: Add logging, error reporting, and health check endpoints to the backend. Optionally integrate Prometheus/Grafana.
7
+ # Details:
8
+ Implement comprehensive logging to stdout or a logging service. Add /healthz and /readyz endpoints for liveness and readiness checks. Optionally expose Prometheus metrics and set up a Grafana dashboard.
9
+
10
+ # Test Strategy:
11
+ Verify health checks by running the backend and probing the endpoints. Trigger errors and validate reporting.
tasks/task_009.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Task ID: 9
2
+ # Title: Set Up CI/CD Pipeline
3
+ # Status: pending
4
+ # Dependencies: 3, 5
5
+ # Priority: high
6
+ # Description: Configure a CI/CD system for automated building, testing, and deployment of the backend and frontend.
7
+ # Details:
8
+ Use GitHub Actions or similar. Define workflows for build, test, and deploy stages. Trigger on pull requests and merges to main. Deploy backend to Hugging Face Spaces and frontend to Vercel.
9
+
10
+ # Test Strategy:
11
+ Manually trigger a full CI/CD run. Verify successful build, test passing, and deployment to production.
tasks/task_010.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Task ID: 10
2
+ # Title: Write Documentation
3
+ # Status: pending
4
+ # Dependencies: 1, 2, 5
5
+ # Priority: high
6
+ # Description: Create comprehensive documentation for the backend API, frontend usage, and overall system architecture.
7
+ # Details:
8
+ Write a README covering system overview, architecture, setup, and usage. Document the API endpoints, request/response formats, and error codes. Include examples and troubleshooting guides.
9
+
10
+ # Test Strategy:
11
+ Review documentation for clarity, accuracy, and completeness. Verify instructions by following them.
tasks/tasks.json ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "tasks": [
3
+ {
4
+ "id": 1,
5
+ "title": "Design API Contract",
6
+ "description": "Define endpoints for inference, health checks, and job queueing. Use FastAPI's OpenAPI for documentation.",
7
+ "status": "pending",
8
+ "dependencies": [],
9
+ "priority": "high",
10
+ "details": "Create endpoints for /infer (POST), /healthz (GET), and /queue (POST/GET if using Redis). Specify request/response schemas and document using OpenAPI.",
11
+ "testStrategy": "Manually test endpoints with sample requests. Verify OpenAPI docs are generated correctly.",
12
+ "subtasks": [
13
+ {
14
+ "id": 1,
15
+ "title": "Define /infer endpoint",
16
+ "description": "Create a POST endpoint at /infer for making inference requests. Define request and response schemas.",
17
+ "dependencies": [],
18
+ "details": "The /infer endpoint should accept a JSON payload with the required input data. It should return the inference result or an appropriate error response. Use pydantic to define the request and response models. Document the endpoint using FastAPI's OpenAPI annotations.",
19
+ "status": "in-progress",
20
+ "parentTaskId": 1
21
+ },
22
+ {
23
+ "id": 2,
24
+ "title": "Implement /healthz endpoint",
25
+ "description": "Create a GET endpoint at /healthz for health checks. Return a 200 OK response if the service is healthy.",
26
+ "dependencies": [],
27
+ "details": "The /healthz endpoint should perform any necessary checks to determine if the service is healthy and able to handle requests. This can include checking database connections, verifying external service availability, etc. If all checks pass, return a 200 OK response. Use FastAPI's OpenAPI annotations to document the endpoint.",
28
+ "status": "in-progress",
29
+ "parentTaskId": 1
30
+ },
31
+ {
32
+ "id": 3,
33
+ "title": "Add /queue endpoint for job queueing",
34
+ "description": "If using Redis for job queueing, create POST and GET endpoints at /queue for submitting and retrieving jobs.",
35
+ "dependencies": [
36
+ 1
37
+ ],
38
+ "details": "The POST /queue endpoint should accept a job payload and enqueue it in Redis. The GET /queue endpoint should retrieve job status and results. If not using Redis, this subtask can be skipped. Ensure proper error handling and document the endpoints using FastAPI's OpenAPI annotations.",
39
+ "status": "pending",
40
+ "parentTaskId": 1
41
+ }
42
+ ]
43
+ },
44
+ {
45
+ "id": 2,
46
+ "title": "Implement FastAPI Backend",
47
+ "description": "Write a minimal FastAPI app that loads the model, exposes API endpoints, and handles errors and logging.",
48
+ "status": "pending",
49
+ "dependencies": [
50
+ 1
51
+ ],
52
+ "priority": "high",
53
+ "details": "Create a new clean codebase. Implement model loading with error handling, the /infer and /healthz endpoints, and request/error logging. Optionally integrate Redis for queueing.",
54
+ "testStrategy": "Unit test critical functionality. Integration test API endpoints. Verify logging and error handling."
55
+ },
56
+ {
57
+ "id": 3,
58
+ "title": "Dockerize Backend",
59
+ "description": "Create a clean, minimal Dockerfile for the FastAPI backend. Ensure proper setup of dependencies, cache, and permissions.",
60
+ "status": "pending",
61
+ "dependencies": [
62
+ 2
63
+ ],
64
+ "priority": "high",
65
+ "details": "Base image: nvidia/cuda:12.1.0-devel-ubuntu22.04. Install Python, torch, and pinned dependencies in order. Set up cache and permissions. Add a HEALTHCHECK.",
66
+ "testStrategy": "Build and run Docker image. Verify API endpoints, model loading, and health check. Test on GPU machine."
67
+ },
68
+ {
69
+ "id": 4,
70
+ "title": "Validate Model and Tokenizer",
71
+ "description": "Ensure the model and tokenizer files are valid, compatible, and ready for use in the backend.",
72
+ "status": "pending",
73
+ "dependencies": [],
74
+ "priority": "medium",
75
+ "details": "Test loading the model and tokenizer files locally before integrating into the backend. Verify versions and checksums. Document the update process.",
76
+ "testStrategy": "Manually test model/tokenizer loading. Validate model outputs. Automate checks if possible."
77
+ },
78
+ {
79
+ "id": 5,
80
+ "title": "Integrate Frontend",
81
+ "description": "Update the Next.js frontend to use the new backend API. Display job status, results, and health. Handle errors.",
82
+ "status": "pending",
83
+ "dependencies": [
84
+ 2
85
+ ],
86
+ "priority": "high",
87
+ "details": "Modify frontend to make requests to /infer, /healthz, and /queue endpoints. Update UI to show job status, inference results, and backend health. Implement user-friendly error handling.",
88
+ "testStrategy": "Integration test frontend against a running backend instance. Verify UI updates and error display."
89
+ },
90
+ {
91
+ "id": 6,
92
+ "title": "Set Up Redis Queue",
93
+ "description": "Configure Redis for job queueing. Integrate Redis into the backend for job submission and status tracking.",
94
+ "status": "pending",
95
+ "dependencies": [
96
+ 2
97
+ ],
98
+ "priority": "medium",
99
+ "details": "Provision a Redis instance. Implement a job queue using Redis lists or streams. Modify the backend to enqueue jobs on /queue POST and return status on GET. Process jobs asynchronously.",
100
+ "testStrategy": "Integration test queueing by submitting jobs and verifying processing. Validate job status updates."
101
+ },
102
+ {
103
+ "id": 7,
104
+ "title": "Set Up Supabase Storage",
105
+ "description": "Configure Supabase for storing test cases and results. Integrate Supabase into the backend.",
106
+ "status": "pending",
107
+ "dependencies": [
108
+ 2
109
+ ],
110
+ "priority": "low",
111
+ "details": "Provision a Supabase instance. Design schemas for test cases and inference results. Modify the backend to store and retrieve data from Supabase tables. Consider access control.",
112
+ "testStrategy": "Integration test Supabase by storing and querying test data. Verify data integrity and security."
113
+ },
114
+ {
115
+ "id": 8,
116
+ "title": "Implement Monitoring and Health Checks",
117
+ "description": "Add logging, error reporting, and health check endpoints to the backend. Optionally integrate Prometheus/Grafana.",
118
+ "status": "pending",
119
+ "dependencies": [
120
+ 2
121
+ ],
122
+ "priority": "medium",
123
+ "details": "Implement comprehensive logging to stdout or a logging service. Add /healthz and /readyz endpoints for liveness and readiness checks. Optionally expose Prometheus metrics and set up a Grafana dashboard.",
124
+ "testStrategy": "Verify health checks by running the backend and probing the endpoints. Trigger errors and validate reporting."
125
+ },
126
+ {
127
+ "id": 9,
128
+ "title": "Set Up CI/CD Pipeline",
129
+ "description": "Configure a CI/CD system for automated building, testing, and deployment of the backend and frontend.",
130
+ "status": "pending",
131
+ "dependencies": [
132
+ 3,
133
+ 5
134
+ ],
135
+ "priority": "high",
136
+ "details": "Use GitHub Actions or similar. Define workflows for build, test, and deploy stages. Trigger on pull requests and merges to main. Deploy backend to Hugging Face Spaces and frontend to Vercel.",
137
+ "testStrategy": "Manually trigger a full CI/CD run. Verify successful build, test passing, and deployment to production."
138
+ },
139
+ {
140
+ "id": 10,
141
+ "title": "Write Documentation",
142
+ "description": "Create comprehensive documentation for the backend API, frontend usage, and overall system architecture.",
143
+ "status": "pending",
144
+ "dependencies": [
145
+ 1,
146
+ 2,
147
+ 5
148
+ ],
149
+ "priority": "high",
150
+ "details": "Write a README covering system overview, architecture, setup, and usage. Document the API endpoints, request/response formats, and error codes. Include examples and troubleshooting guides.",
151
+ "testStrategy": "Review documentation for clarity, accuracy, and completeness. Verify instructions by following them."
152
+ }
153
+ ],
154
+ "metadata": {
155
+ "projectName": "NCOS_S1 (Large Compliance LLM Pipeline)",
156
+ "totalTasks": 10,
157
+ "sourceFile": "scripts/prd.md",
158
+ "generatedAt": "2023-06-21"
159
+ }
160
+ }
tasks/tasks.json.bak ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "tasks": [
3
+ {
4
+ "id": 1,
5
+ "title": "Design API Contract",
6
+ "description": "Define endpoints for inference, health checks, and job queueing. Use FastAPI's OpenAPI for documentation.",
7
+ "status": "pending",
8
+ "dependencies": [],
9
+ "priority": "high",
10
+ "details": "Create endpoints for /infer (POST), /healthz (GET), and /queue (POST/GET if using Redis). Specify request/response schemas and document using OpenAPI.",
11
+ "testStrategy": "Manually test endpoints with sample requests. Verify OpenAPI docs are generated correctly."
12
+ },
13
+ {
14
+ "id": 2,
15
+ "title": "Implement FastAPI Backend",
16
+ "description": "Write a minimal FastAPI app that loads the model, exposes API endpoints, and handles errors and logging.",
17
+ "status": "pending",
18
+ "dependencies": [
19
+ 1
20
+ ],
21
+ "priority": "high",
22
+ "details": "Create a new clean codebase. Implement model loading with error handling, the /infer and /healthz endpoints, and request/error logging. Optionally integrate Redis for queueing.",
23
+ "testStrategy": "Unit test critical functionality. Integration test API endpoints. Verify logging and error handling."
24
+ },
25
+ {
26
+ "id": 3,
27
+ "title": "Dockerize Backend",
28
+ "description": "Create a clean, minimal Dockerfile for the FastAPI backend. Ensure proper setup of dependencies, cache, and permissions.",
29
+ "status": "pending",
30
+ "dependencies": [
31
+ 2
32
+ ],
33
+ "priority": "high",
34
+ "details": "Base image: nvidia/cuda:12.1.0-devel-ubuntu22.04. Install Python, torch, and pinned dependencies in order. Set up cache and permissions. Add a HEALTHCHECK.",
35
+ "testStrategy": "Build and run Docker image. Verify API endpoints, model loading, and health check. Test on GPU machine."
36
+ },
37
+ {
38
+ "id": 4,
39
+ "title": "Validate Model and Tokenizer",
40
+ "description": "Ensure the model and tokenizer files are valid, compatible, and ready for use in the backend.",
41
+ "status": "pending",
42
+ "dependencies": [],
43
+ "priority": "medium",
44
+ "details": "Test loading the model and tokenizer files locally before integrating into the backend. Verify versions and checksums. Document the update process.",
45
+ "testStrategy": "Manually test model/tokenizer loading. Validate model outputs. Automate checks if possible."
46
+ },
47
+ {
48
+ "id": 5,
49
+ "title": "Integrate Frontend",
50
+ "description": "Update the Next.js frontend to use the new backend API. Display job status, results, and health. Handle errors.",
51
+ "status": "pending",
52
+ "dependencies": [
53
+ 2
54
+ ],
55
+ "priority": "high",
56
+ "details": "Modify frontend to make requests to /infer, /healthz, and /queue endpoints. Update UI to show job status, inference results, and backend health. Implement user-friendly error handling.",
57
+ "testStrategy": "Integration test frontend against a running backend instance. Verify UI updates and error display."
58
+ },
59
+ {
60
+ "id": 6,
61
+ "title": "Set Up Redis Queue",
62
+ "description": "Configure Redis for job queueing. Integrate Redis into the backend for job submission and status tracking.",
63
+ "status": "pending",
64
+ "dependencies": [
65
+ 2
66
+ ],
67
+ "priority": "medium",
68
+ "details": "Provision a Redis instance. Implement a job queue using Redis lists or streams. Modify the backend to enqueue jobs on /queue POST and return status on GET. Process jobs asynchronously.",
69
+ "testStrategy": "Integration test queueing by submitting jobs and verifying processing. Validate job status updates."
70
+ },
71
+ {
72
+ "id": 7,
73
+ "title": "Set Up Supabase Storage",
74
+ "description": "Configure Supabase for storing test cases and results. Integrate Supabase into the backend.",
75
+ "status": "pending",
76
+ "dependencies": [
77
+ 2
78
+ ],
79
+ "priority": "low",
80
+ "details": "Provision a Supabase instance. Design schemas for test cases and inference results. Modify the backend to store and retrieve data from Supabase tables. Consider access control.",
81
+ "testStrategy": "Integration test Supabase by storing and querying test data. Verify data integrity and security."
82
+ },
83
+ {
84
+ "id": 8,
85
+ "title": "Implement Monitoring and Health Checks",
86
+ "description": "Add logging, error reporting, and health check endpoints to the backend. Optionally integrate Prometheus/Grafana.",
87
+ "status": "pending",
88
+ "dependencies": [
89
+ 2
90
+ ],
91
+ "priority": "medium",
92
+ "details": "Implement comprehensive logging to stdout or a logging service. Add /healthz and /readyz endpoints for liveness and readiness checks. Optionally expose Prometheus metrics and set up a Grafana dashboard.",
93
+ "testStrategy": "Verify health checks by running the backend and probing the endpoints. Trigger errors and validate reporting."
94
+ },
95
+ {
96
+ "id": 9,
97
+ "title": "Set Up CI/CD Pipeline",
98
+ "description": "Configure a CI/CD system for automated building, testing, and deployment of the backend and frontend.",
99
+ "status": "pending",
100
+ "dependencies": [
101
+ 3,
102
+ 5
103
+ ],
104
+ "priority": "high",
105
+ "details": "Use GitHub Actions or similar. Define workflows for build, test, and deploy stages. Trigger on pull requests and merges to main. Deploy backend to Hugging Face Spaces and frontend to Vercel.",
106
+ "testStrategy": "Manually trigger a full CI/CD run. Verify successful build, test passing, and deployment to production."
107
+ },
108
+ {
109
+ "id": 10,
110
+ "title": "Write Documentation",
111
+ "description": "Create comprehensive documentation for the backend API, frontend usage, and overall system architecture.",
112
+ "status": "pending",
113
+ "dependencies": [
114
+ 1,
115
+ 2,
116
+ 5
117
+ ],
118
+ "priority": "high",
119
+ "details": "Write a README covering system overview, architecture, setup, and usage. Document the API endpoints, request/response formats, and error codes. Include examples and troubleshooting guides.",
120
+ "testStrategy": "Review documentation for clarity, accuracy, and completeness. Verify instructions by following them."
121
+ }
122
+ ],
123
+ "metadata": {
124
+ "projectName": "NCOS_S1 (Large Compliance LLM Pipeline)",
125
+ "totalTasks": 10,
126
+ "sourceFile": "scripts/prd.md",
127
+ "generatedAt": "2023-06-21"
128
+ }
129
+ }