Spaces:

arthuroe
/

askmydocs

Running

App Files Files Community

arthuroe commited on 9 days ago

Commit

e85f548

verified ·

1 Parent(s): 5561947

Create openrouter_llm.py

Browse files

Files changed (1) hide show

openrouter_llm.py +428 -0

openrouter_llm.py ADDED Viewed

	@@ -0,0 +1,428 @@

+import os
+import logging
+import json
+import requests
+from typing import List, Dict, Any, Optional, Union
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class OpenRouterFreeAdapter:
+    """Adapter for accessing only free LLMs through OpenRouter.ai API"""
+    def __init__(
+        self,
+        api_key: str = None,
+        base_url: str = "https://openrouter.ai/api/v1"
+    ):
+        """
+        Initialize the OpenRouter adapter for free models only.
+        Args:
+            api_key: OpenRouter API key. If None, will try to load from environment.
+            base_url: Base URL for the OpenRouter API.
+        """
+        self.api_key = api_key or os.getenv("OPENROUTER_API_KEY")
+        if not self.api_key:
+            logger.warning(
+                "No OpenRouter API key provided. Using limited free access.")
+        self.base_url = base_url
+        self.app_url = ""
+        # Get app info for better tracking
+        self.app_name = os.getenv("APP_NAME", "AskMyDocs")
+        self.update_best_free_model()
+    def update_best_free_model(self) -> bool:
+        """
+        Find and set the best available free model.
+        Returns:
+            Boolean indicating success.
+        """
+        free_models = self.list_free_models()
+        if not free_models:
+            # If API call fails, use fallback list of known free models
+            logger.warning(
+                "Could not retrieve free models list. Using fallback models.")
+            self.model = self._get_fallback_model()
+            return False
+        # Sort models by preference:
+        # 1. Llama 4 models (highest priority)
+        # 2. Gemini models
+        # 3. Mistral models
+        # 4. DeepSeek models
+        # 5. Others
+        ranked_models = self._rank_free_models(free_models)
+        if ranked_models:
+            self.model = ranked_models[0]["id"]
+            logger.info(f"Selected free model: {self.model}")
+            return True
+        else:
+            self.model = self._get_fallback_model()
+            logger.warning(
+                f"No suitable free models found. Using fallback: {self.model}")
+            return False
+    def _rank_free_models(self, free_models: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        """
+        Rank free models by preference for document QA tasks.
+        Args:
+            free_models: List of free model dictionaries.
+        Returns:
+            Sorted list of models by preference.
+        """
+        # Define preference tiers
+        tier_1_patterns = ["llama-4", "llama4"]
+        tier_2_patterns = ["gemini", "claude"]
+        tier_3_patterns = ["mistral", "mixtral"]
+        tier_4_patterns = ["deepseek"]
+        # Helper function to determine tier
+        def get_model_tier(model_id: str) -> int:
+            model_id_lower = model_id.lower()
+            # Check for free tag/suffix
+            is_free = ":free" in model_id_lower or "-free" in model_id_lower
+            if not is_free:
+                return 99  # Deprioritize non-free models
+            # Check pattern matches
+            for pattern in tier_1_patterns:
+                if pattern in model_id_lower:
+                    return 1
+            for pattern in tier_2_patterns:
+                if pattern in model_id_lower:
+                    return 2
+            for pattern in tier_3_patterns:
+                if pattern in model_id_lower:
+                    return 3
+            for pattern in tier_4_patterns:
+                if pattern in model_id_lower:
+                    return 4
+            return 5  # Other free models
+        # Sort by tier, then by context length (longer is better)
+        ranked_models = sorted(
+            free_models,
+            key=lambda m: (
+                get_model_tier(m["id"]),
+                # Negative to sort in descending order
+                -m.get("context_length", 0)
+            )
+        )
+        return ranked_models
+    def _get_fallback_model(self) -> str:
+        """
+        Get a fallback model if API calls fail.
+        Returns:
+            Model ID string for a known free model.
+        """
+        # List of known free models, ordered by preference
+        fallback_models = [
+            "meta-llama/llama-4-scout:free",
+            "google/gemini-2.5-pro-exp-03-25:free",
+            "mistralai/mistral-small-3.1-24b-instruct:free",
+            "deepseek/deepseek-v3-base:free",
+            "nousresearch/deephermes-3-llama-3-8b-preview:free",
+            "huggingfaceh4/zephyr-7b-beta"  # Always fallback to this older but reliable one
+        ]
+        return fallback_models[0]
+    def _get_headers(self) -> Dict[str, str]:
+        """
+        Get headers for OpenRouter API requests.
+        Returns:
+            Dictionary of headers.
+        """
+        headers = {
+            "Content-Type": "application/json"
+        }
+        # Add API key if available
+        if self.api_key:
+            headers["Authorization"] = f"Bearer {self.api_key}"
+        headers["HTTP-Referer"] = self.app_url
+        headers["X-Title"] = self.app_name
+        return headers
+    def list_models(self) -> List[Dict[str, Any]]:
+        """
+        List available models on OpenRouter.
+        Returns:
+            List of model information dictionaries.
+        """
+        try:
+            headers = self._get_headers()
+            response = requests.get(
+                f"{self.base_url}/models",
+                headers=headers
+            )
+            if response.status_code == 200:
+                return response.json().get("data", [])
+            else:
+                logger.error(
+                    f"Error listing models: {response.status_code} - {response.text}"
+                )
+                return []
+        except Exception as e:
+            logger.error(f"Exception listing models: {str(e)}")
+            return []
+    def list_free_models(self) -> List[Dict[str, Any]]:
+        """
+        List models that are free to use on OpenRouter.
+        Returns:
+            List of free model information dictionaries.
+        """
+        # Get all models
+        models = self.list_models()
+        # Filter for free models - looking for multiple indicators
+        free_models = []
+        for model in models:
+            model_id = model.get("id", "").lower()
+            pricing = model.get("pricing", {})
+            # Check various indicators that a model is free
+            is_free = False
+            # Check for explicit free tag in model ID
+            if ":free" in model_id or "-free" in model_id:
+                is_free = True
+            # Check for zero pricing
+            elif (pricing.get("prompt") == 0 and pricing.get("completion") == 0):
+                is_free = True
+            # Check for free_tier indicator if present
+            elif model.get("free_tier", False):
+                is_free = True
+            if is_free:
+                free_models.append(model)
+        # Log the number of free models found
+        logger.info(f"Found {len(free_models)} free models on OpenRouter")
+        return free_models
+    def _handle_streaming_response(self, response):
+        """
+        Handle streaming response from OpenRouter API.
+        Args:
+            response: Response object from requests.
+        Returns:
+            Combined text from streaming response.
+        """
+        result = ""
+        for line in response.iter_lines():
+            if line:
+                line_text = line.decode('utf-8')
+                # Remove the "data: " prefix
+                if line_text.startswith("data: "):
+                    line_text = line_text[6:]
+                # Skip keep-alive lines
+                if line_text.strip() == "[DONE]":
+                    break
+                try:
+                    # Parse the JSON
+                    json_data = json.loads(line_text)
+                    # Extract the text
+                    if "choices" in json_data and json_data["choices"]:
+                        delta = json_data["choices"][0].get("delta", {})
+                        if "content" in delta:
+                            result += delta["content"]
+                except json.JSONDecodeError:
+                    pass
+        return result
+    def generate(
+        self,
+        prompt: str,
+        temperature: float = 0.0,
+        max_tokens: int = 1000,
+        stream: bool = False
+    ) -> str:
+        """
+        Generate text using OpenRouter API with a free model.
+        Args:
+            prompt: The prompt to send to the model.
+            temperature: Controls randomness. Lower is more deterministic.
+            max_tokens: Maximum number of tokens to generate.
+            stream: Whether to stream the response.
+        Returns:
+            Generated text from the model.
+        """
+        # Ensure we have a model selected
+        if not self.model:
+            self.update_best_free_model()
+        # If still no model, return error
+        if not self.model:
+            return "Error: No free models available on OpenRouter."
+        try:
+            headers = self._get_headers()
+            # Use OpenAI-compatible format for the request
+            payload = {
+                "model": self.model,
+                "messages": [
+                    {"role": "user", "content": prompt}
+                ],
+                "temperature": temperature,
+                "max_tokens": max_tokens,
+                "stream": stream
+            }
+            response = requests.post(
+                f"{self.base_url}/chat/completions",
+                headers=headers,
+                json=payload
+            )
+            if response.status_code == 200:
+                if stream:
+                    # Handle streaming response
+                    return self._handle_streaming_response(response)
+                else:
+                    # Handle regular response
+                    content = response.json(
+                    )["choices"][0]["message"]["content"]
+                    # Log model usage for tracking
+                    usage = response.json().get("usage", {})
+                    logger.info(
+                        f"Used model {self.model} - Input: {usage.get('prompt_tokens', 0)}, Output: {usage.get('completion_tokens', 0)}")
+                    return content
+            else:
+                error_info = f"Error {response.status_code}"
+                try:
+                    error_detail = response.json()
+                    error_message = error_detail.get(
+                        "error", {}).get("message", "Unknown error")
+                    error_info = f"{error_info}: {error_message}"
+                except:
+                    error_info = f"{error_info}: {response.text}"
+                logger.error(f"Error generating text: {error_info}")
+                # Check for specific error cases
+                if "rate limit" in error_info.lower():
+                    return "Error: Rate limit exceeded for this free model. Please try again later or try a different model."
+                # If there's an issue with the model, try to get a different one
+                if "model" in error_info.lower() or "no endpoints" in error_info.lower():
+                    prev_model = self.model
+                    if self.update_best_free_model() and self.model != prev_model:
+                        logger.info(
+                            f"Retrying with different free model: {self.model}")
+                        return self.generate(prompt, temperature, max_tokens, stream)
+                return f"Error: Failed to generate response. {error_info}"
+        except Exception as e:
+            logger.error(f"Exception during text generation: {str(e)}")
+            return f"Error: {str(e)}"
+class OpenRouterFreeChain:
+    """Chain for handling Q&A with OpenRouter free LLMs"""
+    def __init__(self, adapter: OpenRouterFreeAdapter):
+        """
+        Initialize the OpenRouter free chain.
+        Args:
+            adapter: An initialized OpenRouterFreeAdapter.
+        """
+        self.adapter = adapter
+    def create_prompt(self, query: str, context: List[str]) -> str:
+        """
+        Create a prompt for the LLM based on the query and context.
+        Args:
+            query: The user's question.
+            context: List of document contents to provide as context.
+        Returns:
+            Formatted prompt string.
+        """
+        context_str = "\n\n".join(
+            [f"Document {i+1}:\n{doc}" for i, doc in enumerate(context)])
+        prompt = f"""You are an AI assistant answering questions based on the provided documents.
+Context information:
+{context_str}
+Based on the above context, please answer the following question:
+{query}
+If the information to answer the question is not contained in the provided documents, respond with: "I don't have enough information in the provided documents to answer this question."
+Answer:"""
+        return prompt
+    def run(self, query: str, context: List[str]) -> str:
+        """
+        Run the chain to get an answer.
+        Args:
+            query: The user's question.
+            context: List of document contents to provide as context.
+        Returns:
+            Answer from the model.
+        """
+        prompt = self.create_prompt(query, context)
+        return self.adapter.generate(prompt)
+def get_best_free_model() -> str:
+    """
+    Get the best available free model from OpenRouter.
+    Returns:
+        Model ID string for the recommended free model.
+    """
+    adapter = OpenRouterFreeAdapter()
+    adapter.update_best_free_model()
+    return adapter.model