Spaces:
Runtime error
Runtime error
File size: 5,660 Bytes
dcb2a99 1671ec3 dcb2a99 1671ec3 dcb2a99 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
"""Local LLM integration for the reasoning system."""
import os
from typing import Dict, Any, Optional
from datetime import datetime
import logging
from llama_cpp import Llama
import huggingface_hub
from .base import ReasoningStrategy
class LocalLLMStrategy(ReasoningStrategy):
"""Implements reasoning using local LLM."""
def __init__(self, config: Optional[Dict[str, Any]] = None):
"""Initialize the local LLM strategy."""
super().__init__()
self.config = config or {}
# Configure parameters with defaults
self.repo_id = self.config.get('repo_id', "gpt-omni/mini-omni2")
self.filename = self.config.get('filename', "mini-omni2.gguf")
self.model_dir = self.config.get('model_dir', "models")
# Standard reasoning parameters
self.min_confidence = self.config.get('min_confidence', 0.7)
self.parallel_threshold = self.config.get('parallel_threshold', 3)
self.learning_rate = self.config.get('learning_rate', 0.1)
self.strategy_weights = self.config.get('strategy_weights', {
"LOCAL_LLM": 0.8,
"CHAIN_OF_THOUGHT": 0.6,
"TREE_OF_THOUGHTS": 0.5,
"META_LEARNING": 0.4
})
self.logger = logging.getLogger(__name__)
self.model = None
async def initialize(self):
"""Initialize the model."""
try:
# Create models directory if it doesn't exist
os.makedirs(self.model_dir, exist_ok=True)
model_path = os.path.join(self.model_dir, self.filename)
# Download model if it doesn't exist
if not os.path.exists(model_path):
self.logger.info(f"Downloading model to {model_path}...")
model_path = huggingface_hub.hf_hub_download(
repo_id=self.repo_id,
filename=self.filename,
repo_type="model",
local_dir=self.model_dir,
local_dir_use_symlinks=False
)
self.logger.info("Model downloaded successfully!")
else:
self.logger.info("Using existing model file...")
# Try to use GPU, fall back to CPU if not available
try:
self.model = Llama(
model_path=model_path,
n_ctx=4096,
n_batch=512,
n_threads=8,
n_gpu_layers=35
)
self.logger.info("Model loaded with GPU acceleration!")
except Exception as e:
self.logger.warning(f"GPU loading failed: {e}, falling back to CPU...")
self.model = Llama(
model_path=model_path,
n_ctx=2048,
n_batch=512,
n_threads=4,
n_gpu_layers=0
)
self.logger.info("Model loaded in CPU-only mode")
except Exception as e:
self.logger.error(f"Error initializing model: {e}")
raise
async def reason(self, query: str, context: Dict[str, Any]) -> Dict[str, Any]:
"""Generate reasoning response using local LLM."""
try:
if not self.model:
await self.initialize()
# Format prompt with context
prompt = self._format_prompt(query, context)
# Generate response
response = self.model(
prompt,
max_tokens=1024 if self.model.n_ctx >= 4096 else 512,
temperature=0.7,
top_p=0.95,
repeat_penalty=1.1,
echo=False
)
# Extract and structure the response
result = self._parse_response(response['choices'][0]['text'])
return {
'success': True,
'answer': result['answer'],
'reasoning': result['reasoning'],
'confidence': result['confidence'],
'timestamp': datetime.now(),
'metadata': {
'model': self.repo_id,
'strategy': 'local_llm',
'context_length': len(prompt),
'response_length': len(response['choices'][0]['text'])
}
}
except Exception as e:
self.logger.error(f"Error in reasoning: {e}")
return {
'success': False,
'error': str(e),
'timestamp': datetime.now()
}
def _format_prompt(self, query: str, context: Dict[str, Any]) -> str:
"""Format the prompt with query and context."""
# Include relevant context
context_str = "\n".join([
f"{k}: {v}" for k, v in context.items()
if k in ['objective', 'constraints', 'background']
])
return f"""Let's solve this problem step by step.
Context:
{context_str}
Question: {query}
Let me break this down:
1."""
def _parse_response(self, text: str) -> Dict[str, Any]:
"""Parse the response into structured output."""
# Simple parsing for now
lines = text.strip().split('\n')
return {
'answer': lines[-1] if lines else '',
'reasoning': '\n'.join(lines[:-1]) if len(lines) > 1 else '',
'confidence': 0.8 # Default confidence
}
|