Spaces:
Running
Running
| """ | |
| Model management utility for the Fake News Detector application. | |
| This module provides functions for initializing, caching, and | |
| retrieving language models used throughout the application. | |
| It ensures models are loaded efficiently and reused appropriately. | |
| """ | |
| import os | |
| import logging | |
| import functools | |
| from langchain_openai import ChatOpenAI | |
| import spacy | |
| logger = logging.getLogger("misinformation_detector") | |
| # Global variables for models | |
| nlp = None | |
| model = None | |
| models_initialized = False | |
| # Add caching decorator | |
| def cached_model(func): | |
| """ | |
| Decorator to cache model loading for improved performance. | |
| This decorator ensures that models are only loaded once and | |
| then reused for subsequent calls, improving performance by | |
| avoiding redundant model loading. | |
| Args: | |
| func (callable): Function that loads a model | |
| Returns: | |
| callable: Wrapped function that returns a cached model | |
| """ | |
| cache = {} | |
| def wrapper(*args, **kwargs): | |
| # Use function name as cache key | |
| key = func.__name__ | |
| if key not in cache: | |
| logger.info(f"Model not in cache, calling {key}...") | |
| cache[key] = func(*args, **kwargs) | |
| return cache[key] | |
| return wrapper | |
| def initialize_models(): | |
| """ | |
| Initialize all required models. | |
| This function loads and initializes all the language models | |
| needed by the application, including spaCy for NLP tasks and | |
| OpenAI for LLM-based processing. | |
| Returns: | |
| str: Initialization status message | |
| Raises: | |
| ValueError: If OpenAI API key is not set | |
| """ | |
| global nlp, model, models_initialized | |
| # Skip initialization if already done | |
| if models_initialized: | |
| logger.info("Models already initialized, skipping initialization") | |
| return "Models already initialized" | |
| # Check OpenAI API key | |
| if "OPENAI_API_KEY" not in os.environ or not os.environ["OPENAI_API_KEY"].strip(): | |
| logger.error("OPENAI_API_KEY environment variable not set or empty") | |
| raise ValueError("OpenAI API key is required. Please set it in the Hugging Face Space secrets.") | |
| try: | |
| # Load NLP model | |
| try: | |
| logger.info("Loading spaCy NLP model...") | |
| nlp = spacy.load("en_core_web_sm") | |
| logger.info("Loaded spaCy NLP model") | |
| except OSError as e: | |
| # This handles the case if the model wasn't installed correctly | |
| logger.warning(f"Could not load spaCy model: {str(e)}") | |
| logger.info("Attempting to download spaCy model...") | |
| try: | |
| import subprocess | |
| import sys | |
| # This downloads the model if it's missing | |
| subprocess.check_call([sys.executable, "-m", "spacy", "download", "en_core_web_sm"]) | |
| # Try loading again | |
| nlp = spacy.load("en_core_web_sm") | |
| logger.info("Successfully downloaded and loaded spaCy model") | |
| except Exception as download_err: | |
| logger.error(f"Failed to download spaCy model: {str(download_err)}") | |
| # Continue with other initialization, we'll handle missing NLP model elsewhere | |
| # Set up OpenAI model | |
| logger.info("Initializing ChatOpenAI model...") | |
| model = ChatOpenAI(model="gpt-3.5-turbo", temperature=0) | |
| logger.info("Initialized ChatOpenAI model") | |
| # Mark initialization as complete | |
| models_initialized = True | |
| return "Models initialized successfully" | |
| except Exception as e: | |
| logger.error(f"Error initializing models: {str(e)}") | |
| raise e | |
| def get_nlp_model(): | |
| """ | |
| Get the spaCy NLP model, initializing if needed. | |
| This function returns a cached spaCy model for NLP tasks. | |
| If the model hasn't been loaded yet, it will be loaded. | |
| Returns: | |
| spacy.Language: Loaded spaCy model | |
| """ | |
| global nlp | |
| if nlp is None: | |
| try: | |
| # Try to load just the spaCy model if not loaded yet | |
| logger.info("Loading spaCy NLP model...") | |
| nlp = spacy.load("en_core_web_sm") | |
| logger.info("Loaded spaCy NLP model") | |
| except Exception as e: | |
| logger.error(f"Error loading spaCy model: {str(e)}") | |
| # Fall back to full initialization | |
| initialize_models() | |
| return nlp | |
| def get_llm_model(): | |
| """ | |
| Get the ChatOpenAI model, initializing if needed. | |
| This function returns a cached OpenAI LLM model. | |
| If the model hasn't been loaded yet, it will be loaded. | |
| Returns: | |
| ChatOpenAI: Loaded LLM model | |
| """ | |
| global model | |
| if model is None: | |
| try: | |
| # Try to load just the LLM model if not loaded yet | |
| logger.info("Initializing ChatOpenAI model...") | |
| model = ChatOpenAI(model="gpt-3.5-turbo", temperature=0) | |
| logger.info("Initialized ChatOpenAI model") | |
| except Exception as e: | |
| logger.error(f"Error initializing ChatOpenAI model: {str(e)}") | |
| # Fall back to full initialization | |
| initialize_models() | |
| return model |