"""Local LLM integration for the reasoning system.""" import os from typing import Dict, Any, Optional from datetime import datetime import logging from llama_cpp import Llama import huggingface_hub from .base import ReasoningStrategy class LocalLLMStrategy(ReasoningStrategy): """Implements reasoning using local LLM.""" def __init__(self): """Initialize the local LLM strategy.""" self.repo_id = "tensorblock/Llama-3.2-3B-Overthinker-GGUF" self.filename = "Llama-3.2-3B-Overthinker-Q8_0.gguf" self.model_dir = "models" self.logger = logging.getLogger(__name__) self.model = None async def initialize(self): """Initialize the model.""" try: # Create models directory if it doesn't exist os.makedirs(self.model_dir, exist_ok=True) model_path = os.path.join(self.model_dir, self.filename) # Download model if it doesn't exist if not os.path.exists(model_path): self.logger.info(f"Downloading model to {model_path}...") model_path = huggingface_hub.hf_hub_download( repo_id=self.repo_id, filename=self.filename, repo_type="model", local_dir=self.model_dir, local_dir_use_symlinks=False ) self.logger.info("Model downloaded successfully!") else: self.logger.info("Using existing model file...") # Try to use GPU, fall back to CPU if not available try: self.model = Llama( model_path=model_path, n_ctx=4096, n_batch=512, n_threads=8, n_gpu_layers=35 ) self.logger.info("Model loaded with GPU acceleration!") except Exception as e: self.logger.warning(f"GPU loading failed: {e}, falling back to CPU...") self.model = Llama( model_path=model_path, n_ctx=2048, n_batch=512, n_threads=4, n_gpu_layers=0 ) self.logger.info("Model loaded in CPU-only mode") except Exception as e: self.logger.error(f"Error initializing model: {e}") raise async def reason(self, query: str, context: Dict[str, Any]) -> Dict[str, Any]: """Generate reasoning response using local LLM.""" try: if not self.model: await self.initialize() # Format prompt with context prompt = self._format_prompt(query, context) # Generate response response = self.model( prompt, max_tokens=1024 if self.model.n_ctx >= 4096 else 512, temperature=0.7, top_p=0.95, repeat_penalty=1.1, echo=False ) # Extract and structure the response result = self._parse_response(response['choices'][0]['text']) return { 'success': True, 'answer': result['answer'], 'reasoning': result['reasoning'], 'confidence': result['confidence'], 'timestamp': datetime.now(), 'metadata': { 'model': self.repo_id, 'strategy': 'local_llm', 'context_length': len(prompt), 'response_length': len(response['choices'][0]['text']) } } except Exception as e: self.logger.error(f"Error in reasoning: {e}") return { 'success': False, 'error': str(e), 'timestamp': datetime.now() } def _format_prompt(self, query: str, context: Dict[str, Any]) -> str: """Format the prompt with query and context.""" # Include relevant context context_str = "\n".join([ f"{k}: {v}" for k, v in context.items() if k in ['objective', 'constraints', 'background'] ]) return f"""Let's solve this problem step by step. Context: {context_str} Question: {query} Let me break this down: 1.""" def _parse_response(self, text: str) -> Dict[str, Any]: """Parse the response into structured output.""" # Simple parsing for now lines = text.strip().split('\n') return { 'answer': lines[-1] if lines else '', 'reasoning': '\n'.join(lines[:-1]) if len(lines) > 1 else '', 'confidence': 0.8 # Default confidence }