bhoomika19 commited on
Commit
39ce191
·
1 Parent(s): 6874d8b

added fastapi backend server

Browse files
backend/main.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ FastAPI backend for Math Agentic RAG system.
3
+ """
4
+ import sys
5
+ import os
6
+ from pathlib import Path
7
+
8
+ # Add the parent directory to Python path to import database module
9
+ parent_dir = Path(__file__).parent.parent
10
+ sys.path.append(str(parent_dir))
11
+
12
+ from fastapi import FastAPI, HTTPException
13
+ from fastapi.middleware.cors import CORSMiddleware
14
+ from contextlib import asynccontextmanager
15
+ import logging
16
+ import structlog
17
+ from dotenv import load_dotenv
18
+
19
+ # Import routes
20
+ from routes.search import router as search_router
21
+ from routes.feedback import router as feedback_router
22
+
23
+ # Load environment variables
24
+ load_dotenv()
25
+
26
+ # Configure structured logging
27
+ structlog.configure(
28
+ processors=[
29
+ structlog.stdlib.filter_by_level,
30
+ structlog.stdlib.add_logger_name,
31
+ structlog.stdlib.add_log_level,
32
+ structlog.stdlib.PositionalArgumentsFormatter(),
33
+ structlog.processors.TimeStamper(fmt="iso"),
34
+ structlog.processors.StackInfoRenderer(),
35
+ structlog.processors.format_exc_info,
36
+ structlog.processors.UnicodeDecoder(),
37
+ structlog.processors.JSONRenderer()
38
+ ],
39
+ context_class=dict,
40
+ logger_factory=structlog.stdlib.LoggerFactory(),
41
+ cache_logger_on_first_use=True,
42
+ )
43
+
44
+ logger = structlog.get_logger()
45
+
46
+ @asynccontextmanager
47
+ async def lifespan(app: FastAPI):
48
+ """Application lifespan manager."""
49
+ logger.info("Starting Math Agentic RAG Backend...")
50
+
51
+ # Startup
52
+ try:
53
+ # Initialize services here if needed
54
+ logger.info("Backend services initialized successfully")
55
+ yield
56
+ except Exception as e:
57
+ logger.error("Failed to initialize backend services", error=str(e))
58
+ raise
59
+ finally:
60
+ # Cleanup
61
+ logger.info("Shutting down Math Agentic RAG Backend...")
62
+
63
+ # Create FastAPI application
64
+ app = FastAPI(
65
+ title="Math Agentic RAG API",
66
+ description="Backend API for Math-focused Agentic RAG system with knowledge base and web search capabilities",
67
+ version="1.0.0",
68
+ docs_url="/docs",
69
+ redoc_url="/redoc",
70
+ lifespan=lifespan
71
+ )
72
+
73
+ # Add CORS middleware
74
+ app.add_middleware(
75
+ CORSMiddleware,
76
+ allow_origins=["*"], # Configure this properly for production
77
+ allow_credentials=True,
78
+ allow_methods=["*"],
79
+ allow_headers=["*"],
80
+ )
81
+
82
+ # Include routers
83
+ app.include_router(search_router, prefix="/api", tags=["search"])
84
+ app.include_router(feedback_router, prefix="/api", tags=["feedback"])
85
+
86
+ @app.get("/")
87
+ async def root():
88
+ """Root endpoint for health check."""
89
+ return {
90
+ "message": "Math Agentic RAG Backend API",
91
+ "status": "running",
92
+ "version": "1.0.0",
93
+ "docs": "/docs"
94
+ }
95
+
96
+ @app.get("/health")
97
+ async def health_check():
98
+ """Health check endpoint."""
99
+ return {
100
+ "status": "healthy",
101
+ "timestamp": structlog.processors.TimeStamper(fmt="iso")._stamper(),
102
+ "services": {
103
+ "api": "running",
104
+ "database": "connected", # Will be updated with actual checks
105
+ "mcp": "available" # Will be updated with actual checks
106
+ }
107
+ }
108
+
109
+ if __name__ == "__main__":
110
+ import uvicorn
111
+ uvicorn.run(
112
+ "main:app",
113
+ host="0.0.0.0",
114
+ port=8000,
115
+ reload=True,
116
+ log_level="info"
117
+ )
backend/models/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Empty __init__.py file to make this a Python package
backend/models/schemas.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Pydantic models for API request/response schemas.
3
+ """
4
+ from pydantic import BaseModel, Field
5
+ from typing import List, Optional, Literal
6
+ from datetime import datetime
7
+ import uuid
8
+
9
+ # Request Models
10
+ class SearchRequest(BaseModel):
11
+ """Request model for search endpoint."""
12
+ question: str = Field(..., description="Math question to search for", max_length=200)
13
+
14
+ class FeedbackRequest(BaseModel):
15
+ """Request model for feedback endpoint."""
16
+ question: str = Field(..., description="Original question")
17
+ response_id: str = Field(..., description="UUID of the response")
18
+ correctness_rating: int = Field(..., ge=1, le=5, description="Rating from 1-5")
19
+ comment: str = Field("", description="Optional feedback comment")
20
+
21
+ # Response Models
22
+ class SearchResult(BaseModel):
23
+ """Individual search result."""
24
+ problem: str = Field(..., description="Math problem statement")
25
+ solution: str = Field(..., description="Solution to the problem")
26
+ score: float = Field(..., description="Similarity score")
27
+
28
+ class SearchResponse(BaseModel):
29
+ """Response model for search endpoint."""
30
+ response_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
31
+ final_answer: str = Field(..., description="The main answer to the question")
32
+ source: Literal["KB", "MCP"] = Field(..., description="Source of the answer")
33
+ explanation: Optional[str] = Field(None, description="Optional explanation")
34
+ results: List[SearchResult] = Field(default_factory=list, description="Detailed search results")
35
+ metadata: dict = Field(default_factory=dict, description="Additional metadata")
36
+ response_time_ms: Optional[float] = Field(None, description="Response time in milliseconds")
37
+
38
+ class FeedbackResponse(BaseModel):
39
+ """Response model for feedback endpoint."""
40
+ message: str = Field(..., description="Confirmation message")
41
+ feedback_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
42
+
43
+ # Internal Models
44
+ class APILogEntry(BaseModel):
45
+ """Model for logging API requests and responses."""
46
+ request_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
47
+ timestamp: datetime = Field(default_factory=datetime.utcnow)
48
+ endpoint: str = Field(..., description="API endpoint called")
49
+ method: str = Field(..., description="HTTP method")
50
+ request_data: dict = Field(..., description="Request payload")
51
+ response_data: dict = Field(..., description="Response payload")
52
+ response_time_ms: float = Field(..., description="Response time in milliseconds")
53
+ source: Literal["KB", "MCP"] = Field(..., description="Source of the answer")
54
+ feedback_received: bool = Field(default=False, description="Whether feedback was received")
55
+ status_code: int = Field(..., description="HTTP status code")
56
+
57
+ class ErrorResponse(BaseModel):
58
+ """Standard error response model."""
59
+ error: str = Field(..., description="Error message")
60
+ detail: Optional[str] = Field(None, description="Detailed error information")
61
+ request_id: Optional[str] = Field(None, description="Request ID for tracking")
backend/requirements.txt ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # FastAPI and web server
2
+ fastapi==0.104.1
3
+ uvicorn[standard]==0.24.0
4
+
5
+ # HTTP client for MCP calls
6
+ httpx==0.25.2
7
+
8
+ # Qdrant vector database
9
+ qdrant-client==1.8.0
10
+
11
+ # AI Guardrails
12
+ guardrails-ai==0.4.5
13
+
14
+ # Environment management
15
+ python-dotenv==1.0.0
16
+
17
+ # Structured logging
18
+ structlog==23.2.0
19
+
20
+ # Data processing and embeddings (reusing from database module)
21
+ sentence-transformers==2.2.2
22
+ datasets==2.18.0
23
+ pandas==2.1.4
24
+
25
+ # MCP client (for web search integration)
26
+ fastmcp==0.3.0
27
+
28
+ # Logging and monitoring
29
+ structlog==23.2.0
30
+
31
+ # Data validation
32
+ pydantic==2.5.0
33
+
34
+ # Async support
35
+ asyncio==3.4.3
36
+
37
+ # UUID generation (built-in, but listed for clarity)
38
+ # uuid (built-in)
39
+
40
+ # JSON handling
41
+ orjson==3.9.10
backend/routes/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Empty __init__.py file to make this a Python package
backend/routes/feedback.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Feedback endpoint for the Math Agentic RAG system.
3
+ """
4
+ from fastapi import APIRouter, HTTPException, BackgroundTasks
5
+ import structlog
6
+ import time
7
+ from typing import Dict, Any
8
+
9
+ from models.schemas import FeedbackRequest, FeedbackResponse, ErrorResponse
10
+
11
+ router = APIRouter()
12
+ logger = structlog.get_logger()
13
+
14
+ @router.post("/feedback", response_model=FeedbackResponse)
15
+ async def submit_feedback(
16
+ feedback: FeedbackRequest,
17
+ background_tasks: BackgroundTasks
18
+ ) -> FeedbackResponse:
19
+ """
20
+ Submit user feedback for search results.
21
+
22
+ Args:
23
+ feedback: Feedback data including response_id, rating, and comments
24
+ background_tasks: Background tasks for processing
25
+
26
+ Returns:
27
+ FeedbackResponse confirming feedback receipt
28
+ """
29
+ start_time = time.time()
30
+
31
+ try:
32
+ logger.info("Processing feedback submission",
33
+ response_id=feedback.response_id,
34
+ rating=feedback.rating,
35
+ has_comments=bool(feedback.comments))
36
+
37
+ # Process feedback in background
38
+ background_tasks.add_task(
39
+ process_feedback,
40
+ feedback.dict()
41
+ )
42
+
43
+ response_time_ms = (time.time() - start_time) * 1000
44
+
45
+ response = FeedbackResponse(
46
+ message="Feedback received successfully",
47
+ feedback_id=feedback.response_id, # Using response_id as feedback_id for traceability
48
+ status="received"
49
+ )
50
+
51
+ logger.info("Feedback submission completed",
52
+ response_id=feedback.response_id,
53
+ response_time_ms=response_time_ms)
54
+
55
+ return response
56
+
57
+ except Exception as e:
58
+ logger.error("Feedback submission failed",
59
+ response_id=feedback.response_id,
60
+ error=str(e))
61
+
62
+ raise HTTPException(
63
+ status_code=500,
64
+ detail=f"Failed to process feedback: {str(e)}"
65
+ )
66
+
67
+ async def process_feedback(feedback_data: Dict[str, Any]):
68
+ """
69
+ Process feedback data in the background.
70
+
71
+ This function will:
72
+ 1. Store feedback in Qdrant for analysis
73
+ 2. Update system metrics
74
+ 3. Trigger retraining if needed (future enhancement)
75
+ """
76
+ try:
77
+ logger.info("Processing feedback in background",
78
+ response_id=feedback_data.get("response_id"))
79
+
80
+ # TODO: Implement feedback storage in Qdrant
81
+ # TODO: Update system performance metrics
82
+ # TODO: Implement feedback-based model improvements
83
+
84
+ # For now, just log the feedback
85
+ logger.info("Feedback processed successfully",
86
+ feedback_data=feedback_data)
87
+
88
+ except Exception as e:
89
+ logger.error("Background feedback processing failed",
90
+ error=str(e),
91
+ feedback_data=feedback_data)
backend/routes/search.py ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Search endpoint for the Math Agentic RAG system.
3
+ """
4
+ from fastapi import APIRouter, HTTPException, BackgroundTasks
5
+ import sys
6
+ from pathlib import Path
7
+ import structlog
8
+ import time
9
+ import uuid
10
+
11
+ # Add parent directory to import database module
12
+ parent_dir = Path(__file__).parent.parent.parent
13
+ sys.path.append(str(parent_dir))
14
+
15
+ from models.schemas import SearchRequest, SearchResponse, ErrorResponse, SearchResult
16
+ from services.qdrant_service import QdrantService
17
+ from services.mcp_service import MCPService
18
+ from services.guardrails_service import GuardrailsService
19
+
20
+ router = APIRouter()
21
+ logger = structlog.get_logger()
22
+
23
+ # Initialize services (will be properly initialized when packages are installed)
24
+ qdrant_service = None
25
+ mcp_service = None
26
+ guardrails_service = None
27
+
28
+ def initialize_services():
29
+ """Initialize services on first request."""
30
+ global qdrant_service, mcp_service, guardrails_service
31
+
32
+ if qdrant_service is None:
33
+ qdrant_service = QdrantService()
34
+ mcp_service = MCPService()
35
+ guardrails_service = GuardrailsService()
36
+
37
+ @router.post("/search", response_model=SearchResponse)
38
+ async def search_math_problems(
39
+ request: SearchRequest,
40
+ background_tasks: BackgroundTasks
41
+ ) -> SearchResponse:
42
+ """
43
+ Search for math problems in knowledge base or web.
44
+
45
+ Args:
46
+ request: Search request containing the math question
47
+ background_tasks: Background tasks for logging
48
+
49
+ Returns:
50
+ SearchResponse with results and metadata
51
+ """
52
+ start_time = time.time()
53
+ response_id = str(uuid.uuid4())
54
+
55
+ try:
56
+ # Initialize services if not already done
57
+ initialize_services()
58
+
59
+ logger.info("Processing search request",
60
+ request_id=response_id,
61
+ question=request.question)
62
+
63
+ # Step 1: Validate input with guardrails
64
+ validated_question = guardrails_service.validate_input(request.question)
65
+
66
+ # Step 2: Search knowledge base (Qdrant)
67
+ kb_results = await qdrant_service.search_similar(validated_question)
68
+
69
+ # Step 3: Determine if we need web search fallback
70
+ confidence_threshold = 0.5
71
+ best_score = kb_results[0].score if kb_results else 0.0
72
+
73
+ if best_score >= confidence_threshold:
74
+ # Use knowledge base results
75
+ source = "KB"
76
+ final_answer = kb_results[0].solution if kb_results else "No solution found"
77
+ explanation = f"Found similar problem with confidence score: {best_score:.3f}"
78
+ results = kb_results[:3] # Return top 3 results
79
+
80
+ else:
81
+ # Fallback to web search via MCP
82
+ logger.info("Low confidence KB results, using web search fallback",
83
+ best_score=best_score, threshold=confidence_threshold)
84
+
85
+ try:
86
+ web_results = await mcp_service.search_web(validated_question)
87
+ source = "MCP"
88
+ final_answer = web_results.get("answer", "No web results found")
89
+ explanation = f"Knowledge base confidence too low ({best_score:.3f}), used web search"
90
+
91
+ # Convert web results to SearchResult format
92
+ results = [SearchResult(
93
+ problem=validated_question,
94
+ solution=final_answer,
95
+ score=0.8 # Default score for web results
96
+ )]
97
+
98
+ except Exception as e:
99
+ logger.error("Web search failed, falling back to KB results", error=str(e))
100
+ source = "KB"
101
+ final_answer = kb_results[0].solution if kb_results else "No solution available"
102
+ explanation = f"Web search failed, using best KB result (score: {best_score:.3f})"
103
+ results = kb_results[:1] if kb_results else []
104
+
105
+ # Step 4: Validate output with guardrails
106
+ validated_response = guardrails_service.validate_output(final_answer)
107
+
108
+ # Calculate response time
109
+ response_time_ms = (time.time() - start_time) * 1000
110
+
111
+ # Create response
112
+ response = SearchResponse(
113
+ response_id=response_id,
114
+ final_answer=validated_response,
115
+ source=source,
116
+ explanation=explanation,
117
+ results=results,
118
+ metadata={
119
+ "confidence_score": best_score,
120
+ "threshold_used": confidence_threshold,
121
+ "kb_results_count": len(kb_results) if kb_results else 0
122
+ },
123
+ response_time_ms=response_time_ms
124
+ )
125
+
126
+ # Log API call in background
127
+ background_tasks.add_task(
128
+ log_api_call,
129
+ request=request.dict(),
130
+ response=response.dict(),
131
+ response_time_ms=response_time_ms,
132
+ source=source
133
+ )
134
+
135
+ logger.info("Search request completed successfully",
136
+ request_id=response_id,
137
+ source=source,
138
+ response_time_ms=response_time_ms)
139
+
140
+ return response
141
+
142
+ except Exception as e:
143
+ logger.error("Search request failed",
144
+ request_id=response_id,
145
+ error=str(e))
146
+
147
+ raise HTTPException(
148
+ status_code=500,
149
+ detail=f"Internal server error: {str(e)}"
150
+ )
151
+
152
+ async def log_api_call(
153
+ request: dict,
154
+ response: dict,
155
+ response_time_ms: float,
156
+ source: str
157
+ ):
158
+ """Log API call to Qdrant for analytics."""
159
+ try:
160
+ if qdrant_service:
161
+ await qdrant_service.log_api_call(
162
+ endpoint="/search",
163
+ method="POST",
164
+ request_data=request,
165
+ response_data=response,
166
+ response_time_ms=response_time_ms,
167
+ source=source
168
+ )
169
+ except Exception as e:
170
+ logger.warning("Failed to log API call", error=str(e))
backend/services/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Empty __init__.py file to make this a Python package
backend/services/guardrails_service.py ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Guardrails service for input/output validation and safety.
3
+ """
4
+ import re
5
+ import structlog
6
+ from typing import Dict, List, Any, Optional
7
+
8
+ logger = structlog.get_logger()
9
+
10
+ class GuardrailsService:
11
+ """Service for input/output validation using guardrails-ai."""
12
+
13
+ def __init__(self):
14
+ """Initialize Guardrails service."""
15
+ self.initialized = False
16
+ self._setup_validation_rules()
17
+ logger.info("Guardrails service initialized")
18
+
19
+ def _setup_validation_rules(self):
20
+ """Setup validation rules for math content."""
21
+ # TODO: Implement actual guardrails-ai integration
22
+ # For now, implement basic validation rules
23
+
24
+ # Prohibited content patterns
25
+ self.prohibited_patterns = [
26
+ r'(?i)\b(hack|exploit|malicious|virus|attack)\b',
27
+ r'(?i)\b(personal|private|confidential|secret)\b',
28
+ r'(?i)\b(password|credit|social.*security)\b'
29
+ ]
30
+
31
+ # Math-related positive patterns
32
+ self.math_patterns = [
33
+ r'\b\d+\b', # Numbers
34
+ r'[+\-*/=()]', # Math operators
35
+ r'(?i)\b(solve|equation|function|derivative|integral|limit|sum|product)\b',
36
+ r'(?i)\b(algebra|geometry|calculus|trigonometry|statistics|probability)\b',
37
+ r'(?i)\b(theorem|proof|formula|solution|answer)\b'
38
+ ]
39
+
40
+ self.initialized = True
41
+
42
+ def validate_input(self, question: str) -> str:
43
+ """
44
+ Validate and sanitize input question.
45
+
46
+ Args:
47
+ question: Input question to validate
48
+
49
+ Returns:
50
+ Validated and sanitized question
51
+
52
+ Raises:
53
+ ValueError: If input is invalid or unsafe
54
+ """
55
+ try:
56
+ if not question or not question.strip():
57
+ raise ValueError("Question cannot be empty")
58
+
59
+ # Check length limits
60
+ if len(question) > 2000:
61
+ raise ValueError("Question too long (max 2000 characters)")
62
+
63
+ if len(question) < 5:
64
+ raise ValueError("Question too short (min 5 characters)")
65
+
66
+ # Check for prohibited content
67
+ for pattern in self.prohibited_patterns:
68
+ if re.search(pattern, question):
69
+ logger.warning("Prohibited content detected in input",
70
+ pattern=pattern)
71
+ raise ValueError("Input contains prohibited content")
72
+
73
+ # Basic sanitization
74
+ sanitized = question.strip()
75
+
76
+ # Remove potential script injections
77
+ sanitized = re.sub(r'<script.*?</script>', '', sanitized, flags=re.IGNORECASE | re.DOTALL)
78
+ sanitized = re.sub(r'javascript:', '', sanitized, flags=re.IGNORECASE)
79
+
80
+ # Check if it looks like a math question
81
+ has_math_content = any(re.search(pattern, sanitized) for pattern in self.math_patterns)
82
+
83
+ if not has_math_content:
84
+ logger.info("Non-math content detected, proceeding with caution")
85
+
86
+ logger.info("Input validation successful",
87
+ original_length=len(question),
88
+ sanitized_length=len(sanitized),
89
+ has_math_content=has_math_content)
90
+
91
+ return sanitized
92
+
93
+ except ValueError:
94
+ raise
95
+ except Exception as e:
96
+ logger.error("Input validation failed", error=str(e))
97
+ raise ValueError(f"Input validation error: {str(e)}")
98
+
99
+ def validate_output(self, response: str) -> str:
100
+ """
101
+ Validate and sanitize output response.
102
+
103
+ Args:
104
+ response: Output response to validate
105
+
106
+ Returns:
107
+ Validated and sanitized response
108
+
109
+ Raises:
110
+ ValueError: If output is invalid or unsafe
111
+ """
112
+ try:
113
+ if not response or not response.strip():
114
+ return "No response generated"
115
+
116
+ # Check length limits
117
+ if len(response) > 10000:
118
+ logger.warning("Response too long, truncating")
119
+ response = response[:10000] + "... [truncated]"
120
+
121
+ # Basic sanitization
122
+ sanitized = response.strip()
123
+
124
+ # Remove potential harmful content
125
+ sanitized = re.sub(r'<script.*?</script>', '', sanitized, flags=re.IGNORECASE | re.DOTALL)
126
+ sanitized = re.sub(r'javascript:', '', sanitized, flags=re.IGNORECASE)
127
+
128
+ # Check for prohibited content in output
129
+ for pattern in self.prohibited_patterns:
130
+ if re.search(pattern, sanitized):
131
+ logger.warning("Prohibited content detected in output",
132
+ pattern=pattern)
133
+ sanitized = re.sub(pattern, '[FILTERED]', sanitized, flags=re.IGNORECASE)
134
+
135
+ logger.info("Output validation successful",
136
+ original_length=len(response),
137
+ sanitized_length=len(sanitized))
138
+
139
+ return sanitized
140
+
141
+ except Exception as e:
142
+ logger.error("Output validation failed", error=str(e))
143
+ return "Response validation failed - please try again"
144
+
145
+ def is_math_related(self, text: str) -> bool:
146
+ """
147
+ Check if text is math-related.
148
+
149
+ Args:
150
+ text: Text to analyze
151
+
152
+ Returns:
153
+ True if text appears to be math-related
154
+ """
155
+ return any(re.search(pattern, text) for pattern in self.math_patterns)
backend/services/mcp_service.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ MCP (Model Context Protocol) service for web search fallback.
3
+ """
4
+ import asyncio
5
+ import structlog
6
+ from typing import Dict, Any, Optional
7
+ import json
8
+
9
+ logger = structlog.get_logger()
10
+
11
+ class MCPService:
12
+ """Service for MCP web search integration."""
13
+
14
+ def __init__(self):
15
+ """Initialize MCP service."""
16
+ self.mcp_server_path = "pranavms13/web-search-mcp"
17
+ self.initialized = False
18
+ logger.info("MCP service initialized")
19
+
20
+ async def search_web(self, question: str) -> Dict[str, Any]:
21
+ """
22
+ Search the web for math-related information using MCP.
23
+
24
+ Args:
25
+ question: The math question to search for
26
+
27
+ Returns:
28
+ Dictionary containing web search results
29
+ """
30
+ try:
31
+ logger.info("Starting web search via MCP", question_length=len(question))
32
+
33
+ # TODO: Implement actual MCP integration
34
+ # For now, return a placeholder response
35
+
36
+ # Simulate web search delay
37
+ await asyncio.sleep(0.5)
38
+
39
+ # Mock response based on question type
40
+ if any(keyword in question.lower() for keyword in ['derivative', 'integral', 'calculus']):
41
+ answer = f"Based on web search: This appears to be a calculus problem. {question} involves applying standard calculus techniques. Consider using the fundamental theorem of calculus or integration by parts."
42
+ elif any(keyword in question.lower() for keyword in ['algebra', 'equation', 'solve']):
43
+ answer = f"Based on web search: This is an algebraic problem. {question} can be solved using algebraic manipulation and equation solving techniques."
44
+ elif any(keyword in question.lower() for keyword in ['geometry', 'triangle', 'circle']):
45
+ answer = f"Based on web search: This is a geometry problem. {question} involves geometric principles and may require knowledge of shapes, areas, or angles."
46
+ else:
47
+ answer = f"Based on web search: {question} is a mathematical problem that may require breaking down into smaller steps and applying relevant mathematical concepts."
48
+
49
+ result = {
50
+ "answer": answer,
51
+ "source": "web_search",
52
+ "confidence": 0.7,
53
+ "search_query": question,
54
+ "results_count": 1
55
+ }
56
+
57
+ logger.info("Web search completed via MCP",
58
+ answer_length=len(answer),
59
+ confidence=result["confidence"])
60
+
61
+ return result
62
+
63
+ except Exception as e:
64
+ logger.error("Web search via MCP failed", error=str(e))
65
+ raise Exception(f"MCP web search failed: {str(e)}")
66
+
67
+ async def initialize_mcp_connection(self):
68
+ """Initialize connection to MCP server."""
69
+ try:
70
+ # TODO: Implement actual MCP server connection
71
+ # This would involve:
72
+ # 1. Spawning the MCP server process
73
+ # 2. Establishing JSON-RPC communication
74
+ # 3. Calling available tools like web_search
75
+
76
+ self.initialized = True
77
+ logger.info("MCP connection initialized successfully")
78
+
79
+ except Exception as e:
80
+ logger.error("Failed to initialize MCP connection", error=str(e))
81
+ raise
82
+
83
+ def is_available(self) -> bool:
84
+ """Check if MCP service is available."""
85
+ return self.initialized
backend/services/qdrant_service.py ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Qdrant service for vector database operations.
3
+ """
4
+ import sys
5
+ from pathlib import Path
6
+ import structlog
7
+ from typing import List, Dict, Any, Optional
8
+
9
+ # Add parent directory to import database module
10
+ parent_dir = Path(__file__).parent.parent.parent
11
+ sys.path.append(str(parent_dir))
12
+
13
+ try:
14
+ from database.qdrant_manager import QdrantManager
15
+ from database.utils import EmbeddingGenerator
16
+ from models.schemas import SearchResult, APILogEntry
17
+ except ImportError as e:
18
+ # Services will be initialized when packages are available
19
+ pass
20
+
21
+ logger = structlog.get_logger()
22
+
23
+ class QdrantService:
24
+ """Service layer for Qdrant vector database operations."""
25
+
26
+ def __init__(self):
27
+ """Initialize Qdrant service."""
28
+ self.qdrant_manager = None
29
+ self.embedding_generator = None
30
+ self._initialize()
31
+
32
+ def _initialize(self):
33
+ """Initialize Qdrant manager and embedding generator."""
34
+ try:
35
+ # Qdrant configuration (matching database/ingest.py)
36
+ qdrant_config = {
37
+ 'url': 'https://7c49e9a8-f84b-4cc8-9e14-bbffdc2e68ad.us-east4-0.gcp.cloud.qdrant.io:6333',
38
+ 'api_key': 'aFrfsC3xnXVgMEjClC3VNgY2Hgp0f6A5Zd30UM5yQJx4SkEPgn4xSw',
39
+ 'collection_name': 'math_problems'
40
+ }
41
+
42
+ self.qdrant_manager = QdrantManager(
43
+ url=qdrant_config['url'],
44
+ api_key=qdrant_config['api_key']
45
+ )
46
+
47
+ self.embedding_generator = EmbeddingGenerator()
48
+
49
+ logger.info("Qdrant service initialized successfully")
50
+
51
+ except Exception as e:
52
+ logger.error("Failed to initialize Qdrant service", error=str(e))
53
+ # Service will work in degraded mode
54
+
55
+ async def search_similar(self, question: str, limit: int = 5) -> List[SearchResult]:
56
+ """
57
+ Search for similar math problems in the knowledge base.
58
+
59
+ Args:
60
+ question: The math question to search for
61
+ limit: Maximum number of results to return
62
+
63
+ Returns:
64
+ List of SearchResult objects
65
+ """
66
+ if not self.qdrant_manager or not self.embedding_generator:
67
+ logger.warning("Qdrant service not properly initialized")
68
+ return []
69
+
70
+ try:
71
+ # Generate embedding for the question
72
+ query_embedding = self.embedding_generator.embed_text(question)
73
+
74
+ # Search in Qdrant
75
+ results = self.qdrant_manager.search_similar(
76
+ collection_name='math_problems',
77
+ query_vector=query_embedding,
78
+ limit=limit
79
+ )
80
+
81
+ # Convert to SearchResult objects
82
+ search_results = []
83
+ for result in results:
84
+ payload = result.payload
85
+ search_result = SearchResult(
86
+ problem=payload.get('problem', ''),
87
+ solution=payload.get('solution', ''),
88
+ score=result.score
89
+ )
90
+ search_results.append(search_result)
91
+
92
+ logger.info("Knowledge base search completed",
93
+ question_length=len(question),
94
+ results_count=len(search_results),
95
+ best_score=search_results[0].score if search_results else 0)
96
+
97
+ return search_results
98
+
99
+ except Exception as e:
100
+ logger.error("Knowledge base search failed", error=str(e))
101
+ return []
102
+
103
+ async def log_api_call(
104
+ self,
105
+ endpoint: str,
106
+ method: str,
107
+ request_data: Dict[str, Any],
108
+ response_data: Dict[str, Any],
109
+ response_time_ms: float,
110
+ source: str
111
+ ):
112
+ """
113
+ Log API call to Qdrant for analytics.
114
+
115
+ Args:
116
+ endpoint: API endpoint called
117
+ method: HTTP method
118
+ request_data: Request payload
119
+ response_data: Response payload
120
+ response_time_ms: Response time in milliseconds
121
+ source: Source of the response (KB/MCP)
122
+ """
123
+ if not self.qdrant_manager or not self.embedding_generator:
124
+ logger.warning("Cannot log API call - Qdrant service not initialized")
125
+ return
126
+
127
+ try:
128
+ # Create log entry
129
+ log_entry = APILogEntry(
130
+ endpoint=endpoint,
131
+ method=method,
132
+ request_data=request_data,
133
+ response_data=response_data,
134
+ response_time_ms=response_time_ms,
135
+ source=source
136
+ )
137
+
138
+ # TODO: Store log entry in Qdrant analytics collection
139
+ # For now, just log to stdout
140
+ logger.info("API call logged",
141
+ endpoint=endpoint,
142
+ method=method,
143
+ response_time_ms=response_time_ms,
144
+ source=source)
145
+
146
+ except Exception as e:
147
+ logger.warning("Failed to log API call", error=str(e))