File size: 5,660 Bytes
dcb2a99
 
 
 
 
 
 
 
 
 
 
 
 
1671ec3
dcb2a99
1671ec3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dcb2a99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
"""Local LLM integration for the reasoning system."""

import os
from typing import Dict, Any, Optional
from datetime import datetime
import logging
from llama_cpp import Llama
import huggingface_hub
from .base import ReasoningStrategy

class LocalLLMStrategy(ReasoningStrategy):
    """Implements reasoning using local LLM."""
    
    def __init__(self, config: Optional[Dict[str, Any]] = None):
        """Initialize the local LLM strategy."""
        super().__init__()
        self.config = config or {}
        
        # Configure parameters with defaults
        self.repo_id = self.config.get('repo_id', "gpt-omni/mini-omni2")
        self.filename = self.config.get('filename', "mini-omni2.gguf")
        self.model_dir = self.config.get('model_dir', "models")
        
        # Standard reasoning parameters
        self.min_confidence = self.config.get('min_confidence', 0.7)
        self.parallel_threshold = self.config.get('parallel_threshold', 3)
        self.learning_rate = self.config.get('learning_rate', 0.1)
        self.strategy_weights = self.config.get('strategy_weights', {
            "LOCAL_LLM": 0.8,
            "CHAIN_OF_THOUGHT": 0.6,
            "TREE_OF_THOUGHTS": 0.5,
            "META_LEARNING": 0.4
        })
        
        self.logger = logging.getLogger(__name__)
        self.model = None
        
    async def initialize(self):
        """Initialize the model."""
        try:
            # Create models directory if it doesn't exist
            os.makedirs(self.model_dir, exist_ok=True)
            model_path = os.path.join(self.model_dir, self.filename)
            
            # Download model if it doesn't exist
            if not os.path.exists(model_path):
                self.logger.info(f"Downloading model to {model_path}...")
                model_path = huggingface_hub.hf_hub_download(
                    repo_id=self.repo_id,
                    filename=self.filename,
                    repo_type="model",
                    local_dir=self.model_dir,
                    local_dir_use_symlinks=False
                )
                self.logger.info("Model downloaded successfully!")
            else:
                self.logger.info("Using existing model file...")
            
            # Try to use GPU, fall back to CPU if not available
            try:
                self.model = Llama(
                    model_path=model_path,
                    n_ctx=4096,
                    n_batch=512,
                    n_threads=8,
                    n_gpu_layers=35
                )
                self.logger.info("Model loaded with GPU acceleration!")
            except Exception as e:
                self.logger.warning(f"GPU loading failed: {e}, falling back to CPU...")
                self.model = Llama(
                    model_path=model_path,
                    n_ctx=2048,
                    n_batch=512,
                    n_threads=4,
                    n_gpu_layers=0
                )
                self.logger.info("Model loaded in CPU-only mode")
                
        except Exception as e:
            self.logger.error(f"Error initializing model: {e}")
            raise
            
    async def reason(self, query: str, context: Dict[str, Any]) -> Dict[str, Any]:
        """Generate reasoning response using local LLM."""
        try:
            if not self.model:
                await self.initialize()
            
            # Format prompt with context
            prompt = self._format_prompt(query, context)
            
            # Generate response
            response = self.model(
                prompt,
                max_tokens=1024 if self.model.n_ctx >= 4096 else 512,
                temperature=0.7,
                top_p=0.95,
                repeat_penalty=1.1,
                echo=False
            )
            
            # Extract and structure the response
            result = self._parse_response(response['choices'][0]['text'])
            
            return {
                'success': True,
                'answer': result['answer'],
                'reasoning': result['reasoning'],
                'confidence': result['confidence'],
                'timestamp': datetime.now(),
                'metadata': {
                    'model': self.repo_id,
                    'strategy': 'local_llm',
                    'context_length': len(prompt),
                    'response_length': len(response['choices'][0]['text'])
                }
            }
                
        except Exception as e:
            self.logger.error(f"Error in reasoning: {e}")
            return {
                'success': False,
                'error': str(e),
                'timestamp': datetime.now()
            }
            
    def _format_prompt(self, query: str, context: Dict[str, Any]) -> str:
        """Format the prompt with query and context."""
        # Include relevant context
        context_str = "\n".join([
            f"{k}: {v}" for k, v in context.items()
            if k in ['objective', 'constraints', 'background']
        ])
        
        return f"""Let's solve this problem step by step.

Context:
{context_str}

Question: {query}

Let me break this down:
1."""
        
    def _parse_response(self, text: str) -> Dict[str, Any]:
        """Parse the response into structured output."""
        # Simple parsing for now
        lines = text.strip().split('\n')
        
        return {
            'answer': lines[-1] if lines else '',
            'reasoning': '\n'.join(lines[:-1]) if len(lines) > 1 else '',
            'confidence': 0.8  # Default confidence
        }