import os import logging import json import requests from typing import List, Dict, Any, Optional, Union # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) class OpenRouterFreeAdapter: """Adapter for accessing only free LLMs through OpenRouter.ai API""" def __init__( self, api_key: str = None, base_url: str = "https://openrouter.ai/api/v1" ): """ Initialize the OpenRouter adapter for free models only. Args: api_key: OpenRouter API key. If None, will try to load from environment. base_url: Base URL for the OpenRouter API. """ self.api_key = api_key or os.getenv("OPENROUTER_API_KEY") if not self.api_key: logger.warning( "No OpenRouter API key provided. Using limited free access.") self.base_url = base_url self.app_url = "" # Get app info for better tracking self.app_name = os.getenv("APP_NAME", "AskMyDocs") self.update_best_free_model() def update_best_free_model(self) -> bool: """ Find and set the best available free model. Returns: Boolean indicating success. """ free_models = self.list_free_models() if not free_models: # If API call fails, use fallback list of known free models logger.warning( "Could not retrieve free models list. Using fallback models.") self.model = self._get_fallback_model() return False # Sort models by preference: # 1. Llama 4 models (highest priority) # 2. Gemini models # 3. Mistral models # 4. DeepSeek models # 5. Others ranked_models = self._rank_free_models(free_models) if ranked_models: self.model = ranked_models[0]["id"] logger.info(f"Selected free model: {self.model}") return True else: self.model = self._get_fallback_model() logger.warning( f"No suitable free models found. Using fallback: {self.model}") return False def _rank_free_models(self, free_models: List[Dict[str, Any]]) -> List[Dict[str, Any]]: """ Rank free models by preference for document QA tasks. Args: free_models: List of free model dictionaries. Returns: Sorted list of models by preference. """ # Define preference tiers tier_1_patterns = ["llama-4", "llama4"] tier_2_patterns = ["gemini", "claude"] tier_3_patterns = ["mistral", "mixtral"] tier_4_patterns = ["deepseek"] # Helper function to determine tier def get_model_tier(model_id: str) -> int: model_id_lower = model_id.lower() # Check for free tag/suffix is_free = ":free" in model_id_lower or "-free" in model_id_lower if not is_free: return 99 # Deprioritize non-free models # Check pattern matches for pattern in tier_1_patterns: if pattern in model_id_lower: return 1 for pattern in tier_2_patterns: if pattern in model_id_lower: return 2 for pattern in tier_3_patterns: if pattern in model_id_lower: return 3 for pattern in tier_4_patterns: if pattern in model_id_lower: return 4 return 5 # Other free models # Sort by tier, then by context length (longer is better) ranked_models = sorted( free_models, key=lambda m: ( get_model_tier(m["id"]), # Negative to sort in descending order -m.get("context_length", 0) ) ) return ranked_models def _get_fallback_model(self) -> str: """ Get a fallback model if API calls fail. Returns: Model ID string for a known free model. """ # List of known free models, ordered by preference fallback_models = [ "meta-llama/llama-4-scout:free", "google/gemini-2.5-pro-exp-03-25:free", "mistralai/mistral-small-3.1-24b-instruct:free", "deepseek/deepseek-v3-base:free", "nousresearch/deephermes-3-llama-3-8b-preview:free", "huggingfaceh4/zephyr-7b-beta" # Always fallback to this older but reliable one ] return fallback_models[0] def _get_headers(self) -> Dict[str, str]: """ Get headers for OpenRouter API requests. Returns: Dictionary of headers. """ headers = { "Content-Type": "application/json" } # Add API key if available if self.api_key: headers["Authorization"] = f"Bearer {self.api_key}" headers["HTTP-Referer"] = self.app_url headers["X-Title"] = self.app_name return headers def list_models(self) -> List[Dict[str, Any]]: """ List available models on OpenRouter. Returns: List of model information dictionaries. """ try: headers = self._get_headers() response = requests.get( f"{self.base_url}/models", headers=headers ) if response.status_code == 200: return response.json().get("data", []) else: logger.error( f"Error listing models: {response.status_code} - {response.text}" ) return [] except Exception as e: logger.error(f"Exception listing models: {str(e)}") return [] def list_free_models(self) -> List[Dict[str, Any]]: """ List models that are free to use on OpenRouter. Returns: List of free model information dictionaries. """ # Get all models models = self.list_models() # Filter for free models - looking for multiple indicators free_models = [] for model in models: model_id = model.get("id", "").lower() pricing = model.get("pricing", {}) # Check various indicators that a model is free is_free = False # Check for explicit free tag in model ID if ":free" in model_id or "-free" in model_id: is_free = True # Check for zero pricing elif (pricing.get("prompt") == 0 and pricing.get("completion") == 0): is_free = True # Check for free_tier indicator if present elif model.get("free_tier", False): is_free = True if is_free: free_models.append(model) # Log the number of free models found logger.info(f"Found {len(free_models)} free models on OpenRouter") return free_models def _handle_streaming_response(self, response): """ Handle streaming response from OpenRouter API. Args: response: Response object from requests. Returns: Combined text from streaming response. """ result = "" for line in response.iter_lines(): if line: line_text = line.decode('utf-8') # Remove the "data: " prefix if line_text.startswith("data: "): line_text = line_text[6:] # Skip keep-alive lines if line_text.strip() == "[DONE]": break try: # Parse the JSON json_data = json.loads(line_text) # Extract the text if "choices" in json_data and json_data["choices"]: delta = json_data["choices"][0].get("delta", {}) if "content" in delta: result += delta["content"] except json.JSONDecodeError: pass return result def generate( self, prompt: str, temperature: float = 0.0, max_tokens: int = 1000, stream: bool = False ) -> str: """ Generate text using OpenRouter API with a free model. Args: prompt: The prompt to send to the model. temperature: Controls randomness. Lower is more deterministic. max_tokens: Maximum number of tokens to generate. stream: Whether to stream the response. Returns: Generated text from the model. """ # Ensure we have a model selected if not self.model: self.update_best_free_model() # If still no model, return error if not self.model: return "Error: No free models available on OpenRouter." try: headers = self._get_headers() # Use OpenAI-compatible format for the request payload = { "model": self.model, "messages": [ {"role": "user", "content": prompt} ], "temperature": temperature, "max_tokens": max_tokens, "stream": stream } response = requests.post( f"{self.base_url}/chat/completions", headers=headers, json=payload ) if response.status_code == 200: if stream: # Handle streaming response return self._handle_streaming_response(response) else: # Handle regular response content = response.json( )["choices"][0]["message"]["content"] # Log model usage for tracking usage = response.json().get("usage", {}) logger.info( f"Used model {self.model} - Input: {usage.get('prompt_tokens', 0)}, Output: {usage.get('completion_tokens', 0)}") return content else: error_info = f"Error {response.status_code}" try: error_detail = response.json() error_message = error_detail.get( "error", {}).get("message", "Unknown error") error_info = f"{error_info}: {error_message}" except: error_info = f"{error_info}: {response.text}" logger.error(f"Error generating text: {error_info}") # Check for specific error cases if "rate limit" in error_info.lower(): return "Error: Rate limit exceeded for this free model. Please try again later or try a different model." # If there's an issue with the model, try to get a different one if "model" in error_info.lower() or "no endpoints" in error_info.lower(): prev_model = self.model if self.update_best_free_model() and self.model != prev_model: logger.info( f"Retrying with different free model: {self.model}") return self.generate(prompt, temperature, max_tokens, stream) return f"Error: Failed to generate response. {error_info}" except Exception as e: logger.error(f"Exception during text generation: {str(e)}") return f"Error: {str(e)}" class OpenRouterFreeChain: """Chain for handling Q&A with OpenRouter free LLMs""" def __init__(self, adapter: OpenRouterFreeAdapter): """ Initialize the OpenRouter free chain. Args: adapter: An initialized OpenRouterFreeAdapter. """ self.adapter = adapter def create_prompt(self, query: str, context: List[str]) -> str: """ Create a prompt for the LLM based on the query and context. Args: query: The user's question. context: List of document contents to provide as context. Returns: Formatted prompt string. """ context_str = "\n\n".join( [f"Document {i+1}:\n{doc}" for i, doc in enumerate(context)]) prompt = f"""You are an AI assistant answering questions based on the provided documents. Context information: {context_str} Based on the above context, please answer the following question: {query} If the information to answer the question is not contained in the provided documents, respond with: "I don't have enough information in the provided documents to answer this question." Answer:""" return prompt def run(self, query: str, context: List[str]) -> str: """ Run the chain to get an answer. Args: query: The user's question. context: List of document contents to provide as context. Returns: Answer from the model. """ prompt = self.create_prompt(query, context) return self.adapter.generate(prompt) def get_best_free_model() -> str: """ Get the best available free model from OpenRouter. Returns: Model ID string for the recommended free model. """ adapter = OpenRouterFreeAdapter() adapter.update_best_free_model() return adapter.model