"""Local LLM integration for the reasoning system.""" import os from typing import Dict, Any, Optional from datetime import datetime import logging from llama_cpp import Llama import huggingface_hub from .base import ReasoningStrategy class LocalLLMStrategy(ReasoningStrategy): """Implements reasoning using local LLM.""" def __init__(self, config: Optional[Dict[str, Any]] = None): """Initialize the local LLM strategy.""" super().__init__() self.config = config or {} # Configure parameters with defaults self.repo_id = self.config.get('repo_id', "gpt-omni/mini-omni2") self.filename = self.config.get('filename', "mini-omni2.gguf") self.model_dir = self.config.get('model_dir', "models") # Standard reasoning parameters self.min_confidence = self.config.get('min_confidence', 0.7) self.parallel_threshold = self.config.get('parallel_threshold', 3) self.learning_rate = self.config.get('learning_rate', 0.1) self.strategy_weights = self.config.get('strategy_weights', { "LOCAL_LLM": 0.8, "CHAIN_OF_THOUGHT": 0.6, "TREE_OF_THOUGHTS": 0.5, "META_LEARNING": 0.4 }) self.logger = logging.getLogger(__name__) self.model = None async def initialize(self): """Initialize the model.""" try: # Create models directory if it doesn't exist os.makedirs(self.model_dir, exist_ok=True) model_path = os.path.join(self.model_dir, self.filename) # Download model if it doesn't exist if not os.path.exists(model_path): self.logger.info(f"Downloading model to {model_path}...") model_path = huggingface_hub.hf_hub_download( repo_id=self.repo_id, filename=self.filename, repo_type="model", local_dir=self.model_dir, local_dir_use_symlinks=False ) self.logger.info("Model downloaded successfully!") else: self.logger.info("Using existing model file...") # Try to use GPU, fall back to CPU if not available try: self.model = Llama( model_path=model_path, n_ctx=4096, n_batch=512, n_threads=8, n_gpu_layers=35 ) self.logger.info("Model loaded with GPU acceleration!") except Exception as e: self.logger.warning(f"GPU loading failed: {e}, falling back to CPU...") self.model = Llama( model_path=model_path, n_ctx=2048, n_batch=512, n_threads=4, n_gpu_layers=0 ) self.logger.info("Model loaded in CPU-only mode") except Exception as e: self.logger.error(f"Error initializing model: {e}") raise async def reason(self, query: str, context: Dict[str, Any]) -> Dict[str, Any]: """Generate reasoning response using local LLM.""" try: if not self.model: await self.initialize() # Format prompt with context prompt = self._format_prompt(query, context) # Generate response response = self.model( prompt, max_tokens=1024 if self.model.n_ctx >= 4096 else 512, temperature=0.7, top_p=0.95, repeat_penalty=1.1, echo=False ) # Extract and structure the response result = self._parse_response(response['choices'][0]['text']) return { 'success': True, 'answer': result['answer'], 'reasoning': result['reasoning'], 'confidence': result['confidence'], 'timestamp': datetime.now(), 'metadata': { 'model': self.repo_id, 'strategy': 'local_llm', 'context_length': len(prompt), 'response_length': len(response['choices'][0]['text']) } } except Exception as e: self.logger.error(f"Error in reasoning: {e}") return { 'success': False, 'error': str(e), 'timestamp': datetime.now() } def _format_prompt(self, query: str, context: Dict[str, Any]) -> str: """Format the prompt with query and context.""" # Include relevant context context_str = "\n".join([ f"{k}: {v}" for k, v in context.items() if k in ['objective', 'constraints', 'background'] ]) return f"""Let's solve this problem step by step. Context: {context_str} Question: {query} Let me break this down: 1.""" def _parse_response(self, text: str) -> Dict[str, Any]: """Parse the response into structured output.""" # Simple parsing for now lines = text.strip().split('\n') return { 'answer': lines[-1] if lines else '', 'reasoning': '\n'.join(lines[:-1]) if len(lines) > 1 else '', 'confidence': 0.8 # Default confidence }