nananie143's picture
Upload folder using huggingface_hub
1671ec3 verified
raw
history blame
5.66 kB
"""Local LLM integration for the reasoning system."""
import os
from typing import Dict, Any, Optional
from datetime import datetime
import logging
from llama_cpp import Llama
import huggingface_hub
from .base import ReasoningStrategy
class LocalLLMStrategy(ReasoningStrategy):
"""Implements reasoning using local LLM."""
def __init__(self, config: Optional[Dict[str, Any]] = None):
"""Initialize the local LLM strategy."""
super().__init__()
self.config = config or {}
# Configure parameters with defaults
self.repo_id = self.config.get('repo_id', "gpt-omni/mini-omni2")
self.filename = self.config.get('filename', "mini-omni2.gguf")
self.model_dir = self.config.get('model_dir', "models")
# Standard reasoning parameters
self.min_confidence = self.config.get('min_confidence', 0.7)
self.parallel_threshold = self.config.get('parallel_threshold', 3)
self.learning_rate = self.config.get('learning_rate', 0.1)
self.strategy_weights = self.config.get('strategy_weights', {
"LOCAL_LLM": 0.8,
"CHAIN_OF_THOUGHT": 0.6,
"TREE_OF_THOUGHTS": 0.5,
"META_LEARNING": 0.4
})
self.logger = logging.getLogger(__name__)
self.model = None
async def initialize(self):
"""Initialize the model."""
try:
# Create models directory if it doesn't exist
os.makedirs(self.model_dir, exist_ok=True)
model_path = os.path.join(self.model_dir, self.filename)
# Download model if it doesn't exist
if not os.path.exists(model_path):
self.logger.info(f"Downloading model to {model_path}...")
model_path = huggingface_hub.hf_hub_download(
repo_id=self.repo_id,
filename=self.filename,
repo_type="model",
local_dir=self.model_dir,
local_dir_use_symlinks=False
)
self.logger.info("Model downloaded successfully!")
else:
self.logger.info("Using existing model file...")
# Try to use GPU, fall back to CPU if not available
try:
self.model = Llama(
model_path=model_path,
n_ctx=4096,
n_batch=512,
n_threads=8,
n_gpu_layers=35
)
self.logger.info("Model loaded with GPU acceleration!")
except Exception as e:
self.logger.warning(f"GPU loading failed: {e}, falling back to CPU...")
self.model = Llama(
model_path=model_path,
n_ctx=2048,
n_batch=512,
n_threads=4,
n_gpu_layers=0
)
self.logger.info("Model loaded in CPU-only mode")
except Exception as e:
self.logger.error(f"Error initializing model: {e}")
raise
async def reason(self, query: str, context: Dict[str, Any]) -> Dict[str, Any]:
"""Generate reasoning response using local LLM."""
try:
if not self.model:
await self.initialize()
# Format prompt with context
prompt = self._format_prompt(query, context)
# Generate response
response = self.model(
prompt,
max_tokens=1024 if self.model.n_ctx >= 4096 else 512,
temperature=0.7,
top_p=0.95,
repeat_penalty=1.1,
echo=False
)
# Extract and structure the response
result = self._parse_response(response['choices'][0]['text'])
return {
'success': True,
'answer': result['answer'],
'reasoning': result['reasoning'],
'confidence': result['confidence'],
'timestamp': datetime.now(),
'metadata': {
'model': self.repo_id,
'strategy': 'local_llm',
'context_length': len(prompt),
'response_length': len(response['choices'][0]['text'])
}
}
except Exception as e:
self.logger.error(f"Error in reasoning: {e}")
return {
'success': False,
'error': str(e),
'timestamp': datetime.now()
}
def _format_prompt(self, query: str, context: Dict[str, Any]) -> str:
"""Format the prompt with query and context."""
# Include relevant context
context_str = "\n".join([
f"{k}: {v}" for k, v in context.items()
if k in ['objective', 'constraints', 'background']
])
return f"""Let's solve this problem step by step.
Context:
{context_str}
Question: {query}
Let me break this down:
1."""
def _parse_response(self, text: str) -> Dict[str, Any]:
"""Parse the response into structured output."""
# Simple parsing for now
lines = text.strip().split('\n')
return {
'answer': lines[-1] if lines else '',
'reasoning': '\n'.join(lines[:-1]) if len(lines) > 1 else '',
'confidence': 0.8 # Default confidence
}