Spaces:

maaroufabousaleh
/

advisorai-data-enhanced

Sleeping

advisorai-data-enhanced / src /fetchers /santiment /main.py

Maaroufabousaleh

c49b21b about 2 months ago

80.1 kB

	"""
	Comprehensive Santiment Data Fetcher
	====================================

	This module provides a complete data fetcher for the Santiment API using the sanpy library.
	It maximizes data retrieval by organizing metrics into categories and providing batch operations.

	Features:
	- Fetches all available metrics organized by category
	- Supports batch operations for efficient API usage
	- Handles rate limiting and error management
	- Provides data export capabilities
	- Supports both single asset and multi-asset queries
	- Includes SQL query execution for custom data needs

	Author: AI Assistant
	Version: 1.0.0
	"""

	import san
	import pandas as pd
	import numpy as np
	import time
	import logging
	from datetime import datetime, timedelta
	from typing import List, Dict, Optional, Union, Any
	import json
	import os
	from dataclasses import dataclass, field
	from concurrent.futures import ThreadPoolExecutor, as_completed

	# Load environment variables
	try:
	from dotenv import load_dotenv
	load_dotenv()
	except ImportError:
	pass # dotenv not available, continue without it
	import warnings

	# Configure logging
	logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
	logger = logging.getLogger(__name__)

	# Resolve data directory base
	try:
	from src.config import DATA_DIR as CFG_DATA_DIR
	except Exception:
	try:
	from config import DATA_DIR as CFG_DATA_DIR
	except Exception:
	CFG_DATA_DIR = "/data"

	from pathlib import Path

	def _resolve_under_data(path_like: str \| os.PathLike) -> str:
	p = Path(path_like)
	if p.is_absolute():
	return str(p)
	parts = p.parts
	if parts and parts[0].lower() == "data":
	rel = Path(*parts[1:]) if len(parts) > 1 else Path()
	else:
	rel = p
	return str(Path(CFG_DATA_DIR) / rel)

	@dataclass
	class FetchConfig:
	"""Configuration class for data fetching parameters - OPTIMIZED FOR API CONSERVATION"""
	from_date: str = "2024-01-01" # Reduced from 2020 to save API calls
	to_date: str = "utc_now"
	interval: str = "1d"
	include_incomplete_data: bool = False
	batch_size: int = 25 # Reduced from 50 to save API calls
	max_workers: int = 5 # Reduced from 10 to save API calls
	rate_limit_delay: int = 60
	export_format: str = "parquet" # csv, json, parquet
	export_directory: str = "data/santiment"

	class SantimentDataFetcher:
	"""
	Comprehensive Santiment Data Fetcher

	This class provides methods to fetch maximum possible data from Santiment API
	using the sanpy library with efficient batch operations and error handling.
	"""

	def __init__(self, api_key: Optional[str] = None, config: Optional[FetchConfig] = None):
	"""
	Initialize the Santiment Data Fetcher

	Args:
	api_key: Santiment API key(s) for accessing restricted data (comma-separated for multiple keys)
	config: FetchConfig object with fetching parameters
	"""
	self.config = config or FetchConfig()
	self._normalize_dates()

	# Set up multiple API keys
	self._setup_api_keys(api_key)

	# Resolve export directory under DATA_DIR, create and clean up existing files
	self.config.export_directory = _resolve_under_data(self.config.export_directory)
	os.makedirs(self.config.export_directory, exist_ok=True)
	self._cleanup_existing_files()

	# Initialize data storage
	self.fetched_data: Dict[str, pd.DataFrame] = {}
	self.failed_queries: List[Dict] = []

	# Define comprehensive metric categories
	self.metric_categories = self._define_metric_categories()

	# Get available metrics and projects
	self._initialize_metadata()

	# Initialize symbol normalization
	self.symbol_normalizer = self._setup_symbol_normalizer()

	def _setup_symbol_normalizer(self):
	"""
	Set up symbol normalization mapping for consistent asset identification

	Returns:
	Dictionary mapping various symbol formats to canonical slugs
	"""
	# Canonical mapping for major crypto assets
	# Maps various symbols/names to the official Santiment slug
	symbol_mapping = {
	# Bitcoin variants
	'bitcoin': 'bitcoin',
	'btc': 'bitcoin',
	'Bitcoin': 'bitcoin',
	'BTC': 'bitcoin',

	# Ethereum variants
	'ethereum': 'ethereum',
	'eth': 'ethereum',
	'Ethereum': 'ethereum',
	'ETH': 'ethereum',

	# Ripple/XRP variants
	'ripple': 'ripple',
	'xrp': 'ripple',
	'Ripple': 'ripple',
	'XRP': 'ripple',

	# Solana variants
	'solana': 'solana',
	'sol': 'solana',
	'Solana': 'solana',
	'SOL': 'solana',

	# Cardano variants
	'cardano': 'cardano',
	'ada': 'cardano',
	'Cardano': 'cardano',
	'ADA': 'cardano',

	# Polkadot variants
	'polkadot': 'polkadot',
	'dot': 'polkadot',
	'Polkadot': 'polkadot',
	'DOT': 'polkadot',

	# Chainlink variants
	'chainlink': 'chainlink',
	'link': 'chainlink',
	'Chainlink': 'chainlink',
	'LINK': 'chainlink',

	# Litecoin variants
	'litecoin': 'litecoin',
	'ltc': 'litecoin',
	'Litecoin': 'litecoin',
	'LTC': 'litecoin',

	# Bitcoin Cash variants
	'bitcoin-cash': 'bitcoin-cash',
	'bch': 'bitcoin-cash',
	'Bitcoin Cash': 'bitcoin-cash',
	'BCH': 'bitcoin-cash',

	# Stellar variants
	'stellar': 'stellar',
	'xlm': 'stellar',
	'Stellar': 'stellar',
	'XLM': 'stellar',

	# Ethereum Classic variants
	'ethereum-classic': 'ethereum-classic',
	'etc': 'ethereum-classic',
	'Ethereum Classic': 'ethereum-classic',
	'ETC': 'ethereum-classic',

	# EOS variants
	'eos': 'eos',
	'EOS': 'eos',
	}

	logger.info(f"Initialized symbol normalizer with {len(symbol_mapping)} mappings")
	return symbol_mapping

	def normalize_symbol(self, symbol: str) -> str:
	"""
	Normalize a symbol to its canonical Santiment slug

	Args:
	symbol: Symbol to normalize

	Returns:
	Canonical slug
	"""
	if symbol in self.symbol_normalizer:
	canonical = self.symbol_normalizer[symbol]
	if symbol != canonical:
	logger.debug(f"Normalized '{symbol}' -> '{canonical}'")
	return canonical

	# If not found in mapping, return as-is but log warning
	logger.warning(f"Unknown symbol '{symbol}' not found in normalization mapping")
	return symbol.lower()

	def get_symbol_alternatives(self, symbol: str) -> List[str]:
	"""
	Get all alternative symbols for a given symbol (both directions)

	Args:
	symbol: Symbol to find alternatives for

	Returns:
	List of alternative symbols including the original
	"""
	alternatives = [symbol]

	# Create reverse mapping to find alternatives
	reverse_mapping = {}
	for variant, canonical in self.symbol_normalizer.items():
	if canonical not in reverse_mapping:
	reverse_mapping[canonical] = []
	reverse_mapping[canonical].append(variant)

	# If symbol is a canonical, get all its variants
	if symbol in reverse_mapping:
	alternatives.extend(reverse_mapping[symbol])

	# If symbol is a variant, get the canonical and other variants
	canonical = self.normalize_symbol(symbol)
	if canonical in reverse_mapping:
	alternatives.extend(reverse_mapping[canonical])

	# Remove duplicates and return
	return list(set(alternatives))

	def fetch_single_metric_with_alternatives(self, metric: str, slug: str, **kwargs) -> Optional[pd.DataFrame]:
	"""
	Fetch a single metric for a single asset, trying alternative symbols if the primary fails

	Args:
	metric: The metric name
	slug: The asset slug (will try alternatives if this fails)
	**kwargs: Additional parameters for the API call

	Returns:
	DataFrame with the metric data or None if failed
	"""
	# Get all alternative symbols to try
	alternatives = self.get_symbol_alternatives(slug)
	logger.debug(f"Trying alternatives for {slug}: {alternatives}")

	# Try each alternative in order (start with the normalized canonical form)
	canonical = self.normalize_symbol(slug)
	if canonical != slug:
	alternatives = [canonical] + [alt for alt in alternatives if alt != canonical]

	for i, alt_slug in enumerate(alternatives):
	try:
	data = self.fetch_single_metric(metric, alt_slug, **kwargs)
	if data is not None and not data.empty:
	if i > 0 or alt_slug != slug: # Successfully fetched with alternative
	logger.info(f"[ALT_SUCCESS] {metric} for {slug} succeeded using alternative '{alt_slug}'")
	# Update slug column to reflect the original requested slug for consistency
	data['slug'] = slug
	data['alternative_slug_used'] = alt_slug
	return data
	except Exception as e:
	error_msg = str(e)
	# Check if this is a metric-level error that won't be fixed by trying other slugs
	if any(skip_phrase in error_msg.lower() for skip_phrase in [
	'not supported for',
	'not implemented for',
	'outside the allowed interval',
	'upgrade to a higher tier'
	]):
	logger.warning(f"[METRIC_SKIP] {metric} has fundamental issues, skipping all alternatives: {error_msg}")
	break # Don't try other alternatives for this metric

	# If it's just a slug issue, continue trying alternatives
	if 'is not an existing slug' in error_msg.lower():
	logger.debug(f"Alternative {alt_slug} failed for {metric}: {e}")
	continue
	else:
	logger.debug(f"Alternative {alt_slug} failed for {metric}: {e}")
	continue

	logger.warning(f"[ALT_FAILED] All alternatives failed for {metric} with slug {slug}")
	return None

	def normalize_slug_list(self, slugs: List[str]) -> List[str]:
	"""
	Normalize a list of slugs and remove duplicates

	Args:
	slugs: List of slugs to normalize

	Returns:
	List of normalized, deduplicated slugs
	"""
	normalized = []
	seen = set()

	for slug in slugs:
	canonical = self.normalize_symbol(slug)
	if canonical not in seen:
	normalized.append(canonical)
	seen.add(canonical)
	else:
	logger.debug(f"Removed duplicate slug: {slug} (canonical: {canonical})")

	logger.info(f"Normalized {len(slugs)} slugs to {len(normalized)} unique canonical slugs")
	return normalized

	def _normalize_dates(self):
	"""
	Convert relative date strings in self.config.from_date / to_date
	into absolute YYYY-MM-DD dates that Sanpy can parse.
	Supports:
	- "ND" (e.g. "30d") → today minus N days
	- "utc_now" → today
	"""
	now = datetime.utcnow()
	# from_date: e.g. "30d"
	fd = self.config.from_date.strip().lower()
	if fd.endswith('d') and fd[:-1].isdigit():
	days = int(fd[:-1])
	from_dt = now - timedelta(days=days)
	# Sanpy expects "YYYY-MM-DD"
	self.config.from_date = from_dt.strftime('%Y-%m-%d')

	# to_date: sometimes set to "utc_now"
	td = self.config.to_date.strip().lower()
	if td == 'utc_now':
	self.config.to_date = now.strftime('%Y-%m-%d')

	def _setup_api_keys(self, api_key: Optional[str] = None):
	"""
	Set up multiple API keys for rate limit handling

	Args:
	api_key: API key(s) - can be comma-separated for multiple keys
	"""
	# Parse API keys from parameter or environment
	api_key_string = api_key or os.getenv('SANTIMENT_API_KEY')

	if api_key_string:
	# Support comma-separated API keys
	self.api_keys = [key.strip() for key in api_key_string.split(',') if key.strip()]
	logger.info(f"Santiment fetcher initialized with {len(self.api_keys)} API key(s)")

	# Check if all keys are from the same account
	if len(self.api_keys) > 1:
	logger.info("Multiple API keys detected. Testing key diversity...")
	self._validate_api_key_diversity()
	else:
	self.api_keys = []
	logger.warning("No API key provided - limited to free tier data")

	# Initialize API key management
	self.current_key_index = 0
	self.rate_limit_switches = 0

	# Set initial API key
	if self.api_keys:
	self._set_current_api_key()

	def _validate_api_key_diversity(self):
	"""
	Validate that API keys are from different accounts for effective rate limit handling
	"""
	try:
	user_ids = set()
	functional_keys = 0
	rate_limited_keys = 0

	for i, key in enumerate(self.api_keys[:3]): # Test only first 3 to avoid exhausting quota
	# Temporarily set this key
	san.ApiConfig.api_key = key

	try:
	# Make a simple query to get user info
	result = san.execute_sql(query="SELECT 1", set_index=None)

	# If successful, key is functional but we can't determine user ID without error
	functional_keys += 1
	logger.info(f"API Key #{i+1}: {key[:8]}... appears functional")

	except Exception as e:
	error_str = str(e)
	if 'user with id' in error_str:
	# Extract user ID from error message
	import re
	match = re.search(r'user with id (\d+)', error_str)
	if match:
	user_id = match.group(1)
	user_ids.add(user_id)
	rate_limited_keys += 1
	logger.info(f"API Key #{i+1}: {key[:8]}... belongs to user ID {user_id} (rate limited)")
	else:
	logger.debug(f"API Key #{i+1}: {key[:8]}... - {error_str}")

	# Reset to first key
	self.current_key_index = 0
	self._set_current_api_key()

	# Analyze results
	if rate_limited_keys > 0 and len(user_ids) == 1:
	if functional_keys > 0:
	logger.warning("⚠️ WARNING: Cannot determine if all API keys are from different accounts!")
	logger.warning(f"⚠️ {rate_limited_keys} key(s) belong to user ID {list(user_ids)[0]}, {functional_keys} key(s) appear functional")
	logger.warning("⚠️ If functional keys are from the same account, rate limit switching won't work.")
	logger.warning("⚠️ For guaranteed effective rate limiting, use API keys from different Santiment accounts.")
	logger.warning("⚠️ Create additional accounts at https://app.santiment.net/")
	else:
	logger.warning("⚠️ WARNING: All tested API keys belong to the same Santiment account!")
	logger.warning("⚠️ Rate limits are applied per account, not per key.")
	logger.warning("⚠️ API key switching will not be effective with same-account keys.")
	logger.warning("⚠️ Create additional accounts at https://app.santiment.net/")
	elif len(user_ids) > 1:
	logger.info(f"✅ Good! API keys are from {len(user_ids)} different accounts.")
	logger.info("✅ This will provide effective rate limit distribution.")
	elif functional_keys == len(self.api_keys):
	logger.info("✅ All API keys appear functional.")
	logger.info("ℹ️ Cannot determine account diversity without rate limit errors.")
	logger.info("ℹ️ Monitor rate limit switches during operation to verify effectiveness.")

	except Exception as e:
	logger.debug(f"Could not validate API key diversity: {e}")
	logger.info("API key diversity validation skipped - continuing with provided keys")

	def _set_current_api_key(self):
	"""Set the current API key in san.ApiConfig"""
	if self.api_keys:
	current_key = self.api_keys[self.current_key_index]
	san.ApiConfig.api_key = current_key
	logger.info(f"Using API key #{self.current_key_index + 1}: {current_key[:8]}...")
	else:
	san.ApiConfig.api_key = None

	def _switch_api_key(self):
	"""Switch to the next available API key"""
	if len(self.api_keys) <= 1:
	logger.warning("Only one or no API keys available, cannot switch")
	return False

	old_index = self.current_key_index
	self.current_key_index = (self.current_key_index + 1) % len(self.api_keys)
	self.rate_limit_switches += 1

	logger.info(f"[SWITCH] Switching from API key #{old_index + 1} to #{self.current_key_index + 1} (switch #{self.rate_limit_switches})")

	# Warn if switching too frequently (indicates same account issue)
	if self.rate_limit_switches > len(self.api_keys) * 2:
	logger.warning("⚠️ High number of API key switches detected!")
	logger.warning("⚠️ This suggests all keys may be from the same account.")
	logger.warning("⚠️ Consider using API keys from different Santiment accounts.")

	# Set new API key
	self._set_current_api_key()

	# Add a delay after switching keys
	time.sleep(2.0)
	return True

	def _is_rate_limit_error(self, error_message):
	"""Check if the error indicates a rate limit issue"""
	rate_limit_indicators = [
	"429",
	"rate limit",
	"too many requests",
	"api limit",
	"quota exceeded",
	"limit exceeded",
	"rate_limit_exception",
	"API Rate Limit Reached",
	"rate limit reached"
	]
	error_str = str(error_message).lower()
	return any(indicator in error_str for indicator in rate_limit_indicators)

	def _cleanup_existing_files(self):
	"""
	Clean up all existing files in the export directory before starting a new fetch.
	This prevents accumulation of old data files from previous runs.
	"""
	import glob
	import shutil

	if not os.path.exists(self.config.export_directory):
	return

	try:
	# Get all files in the export directory
	all_files = glob.glob(os.path.join(self.config.export_directory, "*"))

	if all_files:
	logger.info(f"Cleaning up {len(all_files)} existing files in {self.config.export_directory}")

	for file_path in all_files:
	try:
	if os.path.isfile(file_path):
	os.remove(file_path)
	logger.debug(f"Removed file: {os.path.basename(file_path)}")
	elif os.path.isdir(file_path):
	shutil.rmtree(file_path)
	logger.debug(f"Removed directory: {os.path.basename(file_path)}")
	except Exception as e:
	logger.warning(f"Failed to remove {file_path}: {e}")

	logger.info(f"Successfully cleaned up export directory: {self.config.export_directory}")
	else:
	logger.info(f"Export directory is already clean: {self.config.export_directory}")

	except Exception as e:
	logger.error(f"Failed to cleanup export directory {self.config.export_directory}: {e}")
	# Don't raise the exception - just log it and continue

	def _define_metric_categories(self) -> Dict[str, List[str]]:
	"""Define REDUCED categories of Santiment metrics for API conservation."""
	return {
	# Essential Financial Metrics Only
	'financial': [
	'price_usd', 'marketcap_usd', 'volume_usd'
	# Reduced from 12 to 3 most important metrics
	],

	# Core Network Activity
	'network_activity': [
	'daily_active_addresses', 'new_addresses'
	# Reduced from 9 to 2 most important metrics
	],

	# Basic Transaction Metrics
	'transactions': [
	'transaction_count', 'transaction_volume_usd'
	# Reduced from 8 to 2 most important metrics
	],

	# Essential Exchange Metrics
	'exchange': [
	'exchange_inflow', 'exchange_outflow'
	# Reduced from 8 to 2 most important metrics
	]

	# Removed: supply, development, social, derivatives, whales
	# This reduces API calls by ~70% while keeping core metrics
	}

	def _initialize_metadata(self):
	"""Initialize metadata about available metrics and projects"""
	try:
	logger.info("Fetching available metrics...")
	self.available_metrics = san.available_metrics()
	logger.info(f"Found {len(self.available_metrics)} available metrics")

	logger.info("Fetching available projects...")
	self.projects_df = san.get("projects/all")
	self.available_slugs = self.projects_df['slug'].tolist()
	logger.info(f"Found {len(self.available_slugs)} available projects")

	except Exception as e:
	logger.error(f"Failed to initialize metadata: {e}")
	self.available_metrics = []
	self.available_slugs = []

	def get_metric_metadata(self, metric: str) -> Dict[str, Any]:
	"""
	Get metadata for a specific metric

	Args:
	metric: The metric name

	Returns:
	Dictionary containing metric metadata
	"""
	try:
	metadata = san.metadata(
	metric,
	arr=["availableSlugs", "defaultAggregation", "humanReadableName",
	"isAccessible", "isRestricted", "restrictedFrom", "restrictedTo"]
	)
	return metadata
	except Exception as e:
	logger.warning(f"Failed to get metadata for {metric}: {e}")
	return {}

	def fetch_single_metric(self, metric: str, slug: str, **kwargs) -> Optional[pd.DataFrame]:
	"""
	Fetch a single metric for a single asset

	Args:
	metric: The metric name
	slug: The asset slug
	**kwargs: Additional parameters for the API call

	Returns:
	DataFrame with the metric data or None if failed
	"""
	max_retries = len(self.api_keys) if self.api_keys else 1
	keys_tried = set()

	for attempt in range(max_retries):
	try:
	# If we've tried all keys, reset and wait
	if len(keys_tried) >= len(self.api_keys) and self.api_keys:
	logger.warning(f"All {len(self.api_keys)} API keys exhausted for {metric}, waiting 30 seconds...")
	time.sleep(30)
	keys_tried.clear()
	self.current_key_index = 0
	self._set_current_api_key()

	params = {
	'slug': slug,
	'from_date': kwargs.get('from_date', self.config.from_date),
	'to_date': kwargs.get('to_date', self.config.to_date),
	'interval': kwargs.get('interval', self.config.interval),
	'include_incomplete_data': kwargs.get('include_incomplete_data', self.config.include_incomplete_data)
	}

	# Add any additional selector parameters
	if 'selector' in kwargs:
	params['selector'] = kwargs['selector']

	data = san.get(metric, **params)

	if data is not None and not data.empty:
	# Add metadata columns
	data['metric'] = metric
	data['slug'] = slug
	if attempt > 0:
	logger.info(f"[SUCCESS] {metric} for {slug} succeeded on attempt {attempt + 1}")
	return data

	except Exception as e:
	error_msg = str(e)
	keys_tried.add(self.current_key_index)

	# Check if it's a rate limit error
	if self._is_rate_limit_error(error_msg) and self.api_keys:
	logger.warning(f"[RATE_LIMIT] API key #{self.current_key_index + 1} hit rate limit for {metric}: {error_msg}")

	# Check if we've tried all keys
	if len(keys_tried) >= len(self.api_keys):
	logger.error(f"All {len(self.api_keys)} API keys exhausted for {metric}. Skipping.")
	break # Exit retry loop since all keys are exhausted

	# Try to switch to next API key
	if self._switch_api_key():
	continue # Retry with new API key
	else:
	logger.error("No more API keys available for switching")

	# Handle rate limit with san library specific check
	if hasattr(san, 'is_rate_limit_exception') and san.is_rate_limit_exception(e):
	if hasattr(san, 'rate_limit_time_left'):
	rate_limit_seconds = san.rate_limit_time_left(e)
	logger.warning(f"Santiment rate limit hit. Sleeping for {rate_limit_seconds} seconds")
	time.sleep(rate_limit_seconds)
	else:
	# Try switching API key if available
	if self.api_keys and self._switch_api_key():
	continue
	else:
	time.sleep(60) # Default wait
	else:
	# Check for specific error types that mean we should skip this metric entirely
	if any(skip_phrase in error_msg.lower() for skip_phrase in [
	'not supported for',
	'is not an existing slug',
	'not implemented for',
	'missing_contract',
	'outside the allowed interval',
	'upgrade to a higher tier'
	]):
	logger.warning(f"[SKIP] {metric} for {slug} - {error_msg}")
	return None # Skip this metric/slug combination entirely

	logger.error(f"Failed to fetch {metric} for {slug}: {error_msg}")

	error_info = {
	'metric': metric,
	'slug': slug,
	'error': error_msg,
	'timestamp': datetime.now().isoformat(),
	'api_key_index': self.current_key_index
	}
	self.failed_queries.append(error_info)

	return None

	def fetch_multi_asset_metric(self, metric: str, slugs: List[str], **kwargs) -> Optional[pd.DataFrame]:
	"""
	Fetch a single metric for multiple assets using get_many

	Args:
	metric: The metric name
	slugs: List of asset slugs
	**kwargs: Additional parameters for the API call

	Returns:
	DataFrame with the metric data or None if failed
	"""
	max_retries = len(self.api_keys) if self.api_keys else 1
	keys_tried = set()

	for attempt in range(max_retries):
	try:
	# If we've tried all keys, reset and wait
	if len(keys_tried) >= len(self.api_keys) and self.api_keys:
	logger.warning(f"All {len(self.api_keys)} API keys exhausted for {metric}, waiting 30 seconds...")
	time.sleep(30)
	keys_tried.clear()
	self.current_key_index = 0
	self._set_current_api_key()

	params = {
	'slugs': slugs,
	'from_date': kwargs.get('from_date', self.config.from_date),
	'to_date': kwargs.get('to_date', self.config.to_date),
	'interval': kwargs.get('interval', self.config.interval),
	'include_incomplete_data': kwargs.get('include_incomplete_data', self.config.include_incomplete_data)
	}

	data = san.get_many(metric, **params)

	if data is not None and not data.empty:
	# Reshape data for consistent format
	data_melted = data.reset_index().melt(
	id_vars=['datetime'],
	var_name='slug',
	value_name='value'
	)
	data_melted['metric'] = metric
	data_melted.set_index('datetime', inplace=True)
	if attempt > 0:
	logger.info(f"[SUCCESS] {metric} for multiple assets succeeded on attempt {attempt + 1}")
	return data_melted

	except Exception as e:
	error_msg = str(e)
	keys_tried.add(self.current_key_index)

	# Check if it's a rate limit error
	if self._is_rate_limit_error(error_msg) and self.api_keys:
	logger.warning(f"[RATE_LIMIT] API key #{self.current_key_index + 1} hit rate limit for {metric}: {error_msg}")

	# Check if we've tried all keys
	if len(keys_tried) >= len(self.api_keys):
	logger.error(f"All {len(self.api_keys)} API keys exhausted for {metric}. Skipping.")
	break # Exit retry loop since all keys are exhausted

	# Try to switch to next API key
	if self._switch_api_key():
	continue # Retry with new API key
	else:
	logger.error("No more API keys available for switching")

	# Handle rate limit with san library specific check
	if hasattr(san, 'is_rate_limit_exception') and san.is_rate_limit_exception(e):
	if hasattr(san, 'rate_limit_time_left'):
	rate_limit_seconds = san.rate_limit_time_left(e)
	logger.warning(f"Santiment rate limit hit. Sleeping for {rate_limit_seconds} seconds")
	time.sleep(rate_limit_seconds)
	else:
	# Try switching API key if available
	if self.api_keys and self._switch_api_key():
	continue
	else:
	time.sleep(60) # Default wait
	else:
	logger.error(f"Failed to fetch {metric} for multiple assets: {error_msg}")

	error_info = {
	'metric': metric,
	'slugs': slugs,
	'error': error_msg,
	'timestamp': datetime.now().isoformat(),
	'api_key_index': self.current_key_index
	}
	self.failed_queries.append(error_info)

	return None

	def fetch_category_batch(self, category: str, slugs: List[str], use_async_batch: bool = True) -> Dict[str, pd.DataFrame]:
	"""
	Fetch all metrics in a category using batch operations with symbol alternatives fallback

	Args:
	category: The metric category name
	slugs: List of asset slugs to fetch for
	use_async_batch: Whether to use AsyncBatch (recommended) or Batch

	Returns:
	Dictionary mapping metric names to DataFrames
	"""
	if category not in self.metric_categories:
	logger.error(f"Unknown category: {category}")
	return {}

	metrics = self.metric_categories[category]
	category_data = {}

	# Filter metrics that are actually available
	available_metrics_in_category = [m for m in metrics if m in self.available_metrics]

	if not available_metrics_in_category:
	logger.warning(f"No available metrics found for category: {category}")
	return {}

	logger.info(f"Fetching {len(available_metrics_in_category)} metrics for category: {category}")

	# First try batch operation with normalized slugs
	normalized_slugs = self.normalize_slug_list(slugs)
	batch_success = self._try_batch_fetch(category, available_metrics_in_category, normalized_slugs, use_async_batch)
	category_data.update(batch_success)

	# For failed metrics, try individual fetches with alternatives
	failed_metrics = [m for m in available_metrics_in_category if m not in batch_success]
	if failed_metrics:
	logger.info(f"Retrying {len(failed_metrics)} failed metrics with alternatives")
	individual_results = self._fetch_failed_metrics_with_alternatives(failed_metrics, slugs)
	category_data.update(individual_results)

	return category_data

	def _try_batch_fetch(self, category: str, metrics: List[str], slugs: List[str], use_async_batch: bool) -> Dict[str, pd.DataFrame]:
	"""Try batch fetch operation"""
	category_data = {}

	try:
	if use_async_batch:
	batch = san.AsyncBatch()
	else:
	batch = san.Batch()

	# Add queries to batch
	for metric in metrics:
	try:
	if len(slugs) == 1:
	batch.get(
	metric,
	slug=slugs[0],
	from_date=self.config.from_date,
	to_date=self.config.to_date,
	interval=self.config.interval,
	include_incomplete_data=self.config.include_incomplete_data
	)
	else:
	batch.get_many(
	metric,
	slugs=slugs,
	from_date=self.config.from_date,
	to_date=self.config.to_date,
	interval=self.config.interval,
	include_incomplete_data=self.config.include_incomplete_data
	)
	except Exception as e:
	logger.warning(f"Failed to add {metric} to batch: {e}")

	# Execute batch
	if use_async_batch:
	results = batch.execute(max_workers=self.config.max_workers)
	else:
	results = batch.execute()

	# Process results
	for i, (metric, result) in enumerate(zip(metrics, results)):
	if result is not None and not result.empty:
	if len(slugs) > 1:
	# Reshape multi-asset data
	result_melted = result.reset_index().melt(
	id_vars=['datetime'],
	var_name='slug',
	value_name='value'
	)
	result_melted['metric'] = metric
	result_melted.set_index('datetime', inplace=True)
	category_data[metric] = result_melted
	else:
	result['metric'] = metric
	result['slug'] = slugs[0]
	category_data[metric] = result
	else:
	logger.debug(f"No data received for metric: {metric} in batch")

	except Exception as e:
	logger.error(f"Batch execution failed for category {category}: {e}")

	return category_data

	def _fetch_failed_metrics_with_alternatives(self, metrics: List[str], original_slugs: List[str]) -> Dict[str, pd.DataFrame]:
	"""Fetch failed metrics individually using symbol alternatives"""
	individual_data = {}

	for metric in metrics:
	logger.info(f"Retrying {metric} with symbol alternatives...")

	if len(original_slugs) == 1:
	# Single asset - use alternatives
	result = self.fetch_single_metric_with_alternatives(metric, original_slugs[0])
	if result is not None:
	individual_data[metric] = result
	else:
	# Multiple assets - try each with alternatives and combine
	all_results = []
	for slug in original_slugs:
	result = self.fetch_single_metric_with_alternatives(metric, slug)
	if result is not None:
	all_results.append(result)

	if all_results:
	# Concatenate results - they already have datetime as index
	combined_result = pd.concat(all_results, ignore_index=False, sort=False)
	# Ensure datetime index is properly set
	if not isinstance(combined_result.index, pd.DatetimeIndex):
	if 'datetime' in combined_result.columns:
	combined_result.set_index('datetime', inplace=True)
	individual_data[metric] = combined_result

	return individual_data

	def fetch_special_metrics(self, slugs: List[str]) -> Dict[str, pd.DataFrame]:
	"""
	Fetch special metrics that have different API signatures

	Args:
	slugs: List of asset slugs

	Returns:
	Dictionary mapping metric names to DataFrames
	"""
	special_data = {}

	for slug in slugs:
	max_retries = len(self.api_keys) if self.api_keys else 1
	keys_tried = set()

	for attempt in range(max_retries):
	try:
	# If we've tried all keys, reset and wait
	if len(keys_tried) >= len(self.api_keys) and self.api_keys:
	logger.warning(f"All {len(self.api_keys)} API keys exhausted for special metrics on {slug}, waiting 30 seconds...")
	time.sleep(30)
	keys_tried.clear()
	self.current_key_index = 0
	self._set_current_api_key()

	# OHLCV data
	logger.info(f"Fetching OHLCV data for {slug}")
	ohlcv = san.get(
	f"ohlcv/{slug}",
	from_date=self.config.from_date,
	to_date=self.config.to_date,
	interval=self.config.interval
	)
	if ohlcv is not None and not ohlcv.empty:
	ohlcv['metric'] = 'ohlcv'
	ohlcv['slug'] = slug
	special_data[f'ohlcv_{slug}'] = ohlcv

	# Prices with OHLC format
	logger.info(f"Fetching detailed prices for {slug}")
	prices = san.get(
	"prices",
	slug=slug,
	from_date=self.config.from_date,
	to_date=self.config.to_date,
	interval=self.config.interval
	)
	if prices is not None and not prices.empty:
	prices['metric'] = 'prices_detailed'
	prices['slug'] = slug
	special_data[f'prices_{slug}'] = prices

	# If we get here, the attempt was successful
	break

	except Exception as e:
	error_msg = str(e)
	keys_tried.add(self.current_key_index)

	# Check if it's a rate limit error
	if self._is_rate_limit_error(error_msg) and self.api_keys:
	logger.warning(f"[RATE_LIMIT] API key #{self.current_key_index + 1} hit rate limit for special metrics on {slug}: {error_msg}")

	# Check if we've tried all keys
	if len(keys_tried) >= len(self.api_keys):
	logger.error(f"All {len(self.api_keys)} API keys exhausted for special metrics on {slug}. Skipping.")
	break # Exit retry loop since all keys are exhausted

	# Try to switch to next API key
	if self._switch_api_key():
	continue # Retry with new API key
	else:
	logger.error("No more API keys available for switching")

	logger.error(f"Failed to fetch special metrics for {slug}: {e}")
	break # Exit retry loop for this slug

	return special_data

	def fetch_blockchain_address_data(self, addresses: List[str], slugs: List[str]) -> Dict[str, pd.DataFrame]:
	"""
	Fetch blockchain address-related data

	Args:
	addresses: List of blockchain addresses
	slugs: List of asset slugs for context

	Returns:
	Dictionary mapping data types to DataFrames
	"""
	address_data = {}

	for slug in slugs:
	for address in addresses:
	try:
	# Historical balance
	balance = san.get(
	"historical_balance",
	slug=slug,
	address=address,
	from_date=self.config.from_date,
	to_date=self.config.to_date,
	interval=self.config.interval
	)
	if balance is not None and not balance.empty:
	balance['address'] = address
	balance['slug'] = slug
	address_data[f'historical_balance_{slug}_{address[:8]}'] = balance

	# Top transactions
	top_txs = san.get(
	"eth_top_transactions",
	slug=slug,
	from_date=self.config.from_date,
	to_date=self.config.to_date,
	limit=100,
	transaction_type="ALL"
	)
	if top_txs is not None and not top_txs.empty:
	top_txs['slug'] = slug
	address_data[f'eth_top_transactions_{slug}'] = top_txs

	except Exception as e:
	logger.error(f"Failed to fetch address data for {address} on {slug}: {e}")

	return address_data

	def execute_custom_sql_queries(self) -> Dict[str, pd.DataFrame]:
	"""
	Execute custom SQL queries for additional data insights, using dictGetString for asset metadata.

	Returns:
	Dictionary mapping query names to DataFrames
	"""
	sql_data = {}
	custom_queries = {
	'top_assets_by_volume': """
	SELECT
	dictGetString('default.asset_metadata_dict', 'name', asset_id) as asset_name,
	dictGetString('default.asset_metadata_dict', 'slug', asset_id) as slug,
	SUM(value) as total_volume
	FROM daily_metrics_v2
	WHERE metric_id = get_metric_id('volume_usd')
	AND dt >= now() - INTERVAL 30 DAY
	GROUP BY asset_id
	ORDER BY total_volume DESC
	LIMIT 50
	""",
	'recent_high_activity_addresses': """
	SELECT
	dictGetString('default.asset_metadata_dict', 'name', asset_id) as asset_name,
	get_metric_name(metric_id) as metric_name,
	dt,
	value
	FROM daily_metrics_v2
	WHERE metric_id = get_metric_id('daily_active_addresses')
	AND dt >= now() - INTERVAL 7 DAY
	AND value > 1000
	ORDER BY dt DESC, value DESC
	LIMIT 100
	""",
	'exchange_flow_summary': """
	SELECT
	dictGetString('default.asset_metadata_dict', 'name', asset_id) as asset_name,
	dt,
	SUM(CASE WHEN metric_id = get_metric_id('exchange_inflow') THEN value ELSE 0 END) as inflow,
	SUM(CASE WHEN metric_id = get_metric_id('exchange_outflow') THEN value ELSE 0 END) as outflow
	FROM daily_metrics_v2
	WHERE metric_id IN (get_metric_id('exchange_inflow'), get_metric_id('exchange_outflow'))
	AND dt >= now() - INTERVAL 30 DAY
	GROUP BY asset_id, dt
	ORDER BY dt DESC
	LIMIT 1000
	"""
	}
	for query_name, query in custom_queries.items():
	try:
	logger.info(f"Executing SQL query: {query_name}")
	result = san.execute_sql(query=query, set_index="dt" if "dt" in query else None)
	if result is not None and not result.empty:
	sql_data[query_name] = result
	logger.info(f"SQL query {query_name} returned {len(result)} rows")
	except Exception as e:
	logger.error(f"Failed to execute SQL query {query_name}: {e}")
	return sql_data

	def fetch_comprehensive_data(self,
	slugs: List[str] = None,
	categories: List[str] = None,
	include_special_metrics: bool = True,
	include_sql_queries: bool = True,
	addresses: List[str] = None) -> Dict[str, Any]:
	"""
	Fetch comprehensive data across all categories and metrics

	Args:
	slugs: List of asset slugs (if None, uses top assets)
	categories: List of categories to fetch (if None, fetches all)
	include_special_metrics: Whether to include special format metrics
	include_sql_queries: Whether to execute custom SQL queries
	addresses: List of blockchain addresses for address-specific data

	Returns:
	Dictionary containing all fetched data organized by category
	"""
	# Set defaults
	if slugs is None:
	slugs = ['bitcoin', 'ethereum', 'cardano', 'polkadot', 'chainlink',
	'litecoin', 'bitcoin-cash', 'stellar', 'ethereum-classic', 'eos']

	# Normalize and deduplicate slugs
	slugs = self.normalize_slug_list(slugs)

	if categories is None:
	categories = list(self.metric_categories.keys())

	# Limit slugs for free tier
	if not san.ApiConfig.api_key:
	slugs = slugs[:3] # Limit to 3 assets for free tier
	logger.warning("No API key detected. Limiting to 3 assets to avoid rate limits.")

	all_data = {}
	start_time = datetime.now()

	logger.info(f"Starting comprehensive data fetch for {len(slugs)} assets across {len(categories)} categories")

	# Check if all API keys are exhausted early
	all_keys_exhausted = False
	if self.api_keys and self.rate_limit_switches > len(self.api_keys) * 3:
	logger.warning("⚠️ All API keys appear to be rate-limited. Attempting reduced fetch...")
	all_keys_exhausted = True

	# Fetch data by category
	for category in categories:
	if all_keys_exhausted:
	logger.info(f"Skipping category {category} due to API exhaustion")
	continue

	logger.info(f"Fetching category: {category}")
	category_data = self.fetch_category_batch(category, slugs, use_async_batch=True)

	if category_data:
	all_data[category] = category_data
	# Store individual DataFrames for later use
	for metric_name, df in category_data.items():
	self.fetched_data[f"{category}_{metric_name}"] = df

	# Check if we should stop due to rate limits
	if self.rate_limit_switches > len(self.api_keys) * 5:
	logger.warning("⚠️ Excessive rate limit switches detected. Stopping data fetch to avoid further exhaustion.")
	all_keys_exhausted = True
	break

	# Fetch special metrics (only if not exhausted)
	if include_special_metrics and not all_keys_exhausted:
	logger.info("Fetching special metrics...")
	special_data = self.fetch_special_metrics(slugs)
	if special_data:
	all_data['special_metrics'] = special_data
	self.fetched_data.update(special_data)
	elif all_keys_exhausted:
	logger.info("Skipping special metrics due to API exhaustion")

	# Fetch blockchain address data
	if addresses and not all_keys_exhausted:
	logger.info("Fetching blockchain address data...")
	address_data = self.fetch_blockchain_address_data(addresses, slugs)
	if address_data:
	all_data['address_data'] = address_data
	self.fetched_data.update(address_data)
	elif addresses and all_keys_exhausted:
	logger.info("Skipping blockchain address data due to API exhaustion")

	# Execute SQL queries (only if not exhausted)
	if include_sql_queries and san.ApiConfig.api_key and not all_keys_exhausted:
	logger.info("Executing custom SQL queries...")
	sql_data = self.execute_custom_sql_queries()
	if sql_data:
	all_data['sql_queries'] = sql_data
	self.fetched_data.update(sql_data)
	elif all_keys_exhausted:
	logger.info("Skipping SQL queries due to API exhaustion")

	end_time = datetime.now()
	duration = end_time - start_time

	logger.info(f"Comprehensive data fetch completed in {duration}")
	logger.info(f"Successfully fetched {len(self.fetched_data)} datasets")
	logger.info(f"Failed queries: {len(self.failed_queries)}")

	# Add exhaustion notice to summary
	if all_keys_exhausted:
	logger.warning("⚠️ Data fetch completed with API rate limit exhaustion - some data may be missing")

	# Generate summary
	summary = self._generate_fetch_summary(all_data, duration)
	summary['all_keys_exhausted'] = all_keys_exhausted
	summary['rate_limit_switches'] = self.rate_limit_switches
	all_data['fetch_summary'] = summary

	return all_data

	def _generate_fetch_summary(self, data: Dict[str, Any], duration: timedelta) -> Dict[str, Any]:
	"""Generate a summary of the data fetching operation"""
	summary = {
	'fetch_duration': str(duration),
	'total_datasets': len(self.fetched_data),
	'failed_queries': len(self.failed_queries),
	'categories_fetched': list(data.keys()),
	'data_points_by_category': {},
	'date_range': f"{self.config.from_date} to {self.config.to_date}",
	'interval': self.config.interval,
	'timestamp': datetime.now().isoformat()
	}

	# Count data points by category
	for category, category_data in data.items():
	if isinstance(category_data, dict):
	total_points = sum(len(df) for df in category_data.values() if isinstance(df, pd.DataFrame))
	summary['data_points_by_category'][category] = total_points

	return summary

	def export_data(self,
	export_format: str = None,
	combine_categories: bool = False,
	include_metadata: bool = True) -> Dict[str, str]:
	"""
	Export fetched data to files

	Args:
	export_format: Export format ('csv', 'json', 'parquet')
	combine_categories: Whether to combine all data into single files
	include_metadata: Whether to include metadata files

	Returns:
	Dictionary mapping data names to file paths
	"""
	export_format = export_format or self.config.export_format
	exported_files = {}

	if not self.fetched_data:
	logger.warning("No data to export")
	return exported_files

	timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

	if combine_categories:
	# Combine all DataFrames
	all_dfs = []
	for name, df in self.fetched_data.items():
	if isinstance(df, pd.DataFrame) and not df.empty:
	df_copy = df.copy()
	df_copy['dataset_name'] = name
	all_dfs.append(df_copy)

	if all_dfs:
	combined_df = pd.concat(all_dfs, ignore_index=True, sort=False)
	filename = f"santiment_comprehensive_data_{timestamp}.{export_format}"
	filepath = os.path.join(self.config.export_directory, filename)

	self._export_dataframe(combined_df, filepath, export_format)
	exported_files['combined_data'] = filepath
	else:
	# Export individual datasets
	for name, df in self.fetched_data.items():
	if isinstance(df, pd.DataFrame) and not df.empty:
	filename = f"santiment_{name}_{timestamp}.{export_format}"
	filepath = os.path.join(self.config.export_directory, filename)

	self._export_dataframe(df, filepath, export_format)
	exported_files[name] = filepath

	# Export metadata and summary
	if include_metadata:
	metadata = {
	'failed_queries': self.failed_queries,
	'available_metrics': self.available_metrics,
	'config': {
	'from_date': self.config.from_date,
	'to_date': self.config.to_date,
	'interval': self.config.interval,
	'batch_size': self.config.batch_size
	},
	'export_timestamp': datetime.now().isoformat()
	}

	metadata_file = os.path.join(self.config.export_directory, f"santiment_metadata_{timestamp}.json")
	with open(metadata_file, 'w') as f:
	json.dump(metadata, f, indent=2)
	exported_files['metadata'] = metadata_file

	logger.info(f"Exported {len(exported_files)} files to {self.config.export_directory}")
	return exported_files

	def _export_dataframe(self, df: pd.DataFrame, filepath: str, format_type: str):
	"""Export a DataFrame to the specified format"""
	try:
	if format_type == 'csv':
	df.to_csv(filepath)
	elif format_type == 'json':
	df.to_json(filepath, date_format='iso', orient='records')
	elif format_type == 'parquet':
	df.to_parquet(filepath)
	else:
	logger.error(f"Unsupported export format: {format_type}")
	return

	logger.info(f"Exported DataFrame to {filepath}")

	except Exception as e:
	logger.error(f"Failed to export DataFrame to {filepath}: {e}")

	def get_api_usage_stats(self) -> Dict[str, Any]:
	"""Get API usage statistics"""
	try:
	stats = {
	'calls_made': san.api_calls_made(),
	'calls_remaining': san.api_calls_remaining(),
	'failed_queries': len(self.failed_queries),
	'successful_datasets': len(self.fetched_data)
	}
	return stats
	except Exception as e:
	logger.error(f"Failed to get API usage stats: {e}")
	return {}

	def print_summary(self):
	"""Print a comprehensive summary of the fetching operation"""
	print("\n" + "="*60)
	print("SANTIMENT DATA FETCHER SUMMARY")
	print("="*60)

	# Basic stats
	print(f"Total datasets fetched: {len(self.fetched_data)}")
	print(f"Failed queries: {len(self.failed_queries)}")

	# Configuration info
	print(f"\nConfiguration:")
	print(f" Date range: {self.config.from_date} to {self.config.to_date}")
	print(f" Interval: {self.config.interval}")
	print(f" Export directory: {self.config.export_directory}")

	# Categories summary
	if self.fetched_data:
	print(f"\nData by category:")
	category_counts = {}
	for key in self.fetched_data.keys():
	if '_' in key:
	category = key.split('_')[0]
	category_counts[category] = category_counts.get(category, 0) + 1

	for category, count in sorted(category_counts.items()):
	print(f" {category}: {count} datasets")

	# Sample data info
	if self.fetched_data:
	print(f"\nSample datasets:")
	for i, (name, df) in enumerate(list(self.fetched_data.items())[:5]):
	if isinstance(df, pd.DataFrame):
	print(f" {name}: {len(df)} rows, {len(df.columns)} columns")
	if not df.empty:
	date_range = f"{df.index.min()} to {df.index.max()}" if hasattr(df.index, 'min') else "N/A"
	print(f" Date range: {date_range}")

	# Failed queries summary
	if self.failed_queries:
	print(f"\nFailed queries summary:")
	error_types = {}
	for failed in self.failed_queries:
	error_msg = str(failed.get('error', 'Unknown error'))
	error_type = error_msg.split(':')[0] if ':' in error_msg else error_msg
	error_types[error_type] = error_types.get(error_type, 0) + 1

	for error_type, count in sorted(error_types.items()):
	print(f" {error_type}: {count} occurrences")

	# API usage stats
	try:
	api_stats = self.get_api_usage_stats()
	if api_stats:
	print(f"\nAPI Usage:")
	print(f" Calls made: {api_stats.get('calls_made', 'N/A')}")
	print(f" Calls remaining: {api_stats.get('calls_remaining', 'N/A')}")
	except:
	pass

	print("="*60)

	def analyze_data_quality(self) -> Dict[str, Any]:
	"""Analyze the quality of fetched data"""
	quality_report = {
	'total_datasets': len(self.fetched_data),
	'empty_datasets': 0,
	'datasets_with_nulls': 0,
	'date_coverage': {},
	'data_completeness': {},
	'outliers_detected': {}
	}

	for name, df in self.fetched_data.items():
	if isinstance(df, pd.DataFrame):
	# Check if dataset is empty
	if df.empty:
	quality_report['empty_datasets'] += 1
	continue

	# Check for null values
	if df.isnull().any().any():
	quality_report['datasets_with_nulls'] += 1
	null_percentage = (df.isnull().sum().sum() / (len(df) * len(df.columns))) * 100
	quality_report['data_completeness'][name] = f"{100 - null_percentage:.2f}%"

	# Analyze date coverage
	if hasattr(df.index, 'min') and hasattr(df.index, 'max'):
	try:
	date_range = {
	'start': str(df.index.min()),
	'end': str(df.index.max()),
	'days': (df.index.max() - df.index.min()).days if hasattr(df.index.max() - df.index.min(), 'days') else 'N/A'
	}
	quality_report['date_coverage'][name] = date_range
	except:
	quality_report['date_coverage'][name] = 'Unable to determine'

	# Simple outlier detection for numeric columns
	numeric_cols = df.select_dtypes(include=[np.number]).columns
	outlier_info = {}
	for col in numeric_cols:
	if col not in ['metric', 'slug']: # Skip metadata columns
	try:
	q1 = df[col].quantile(0.25)
	q3 = df[col].quantile(0.75)
	iqr = q3 - q1
	lower_bound = q1 - 1.5 * iqr
	upper_bound = q3 + 1.5 * iqr
	outliers = df[(df[col] < lower_bound) \| (df[col] > upper_bound)]
	if len(outliers) > 0:
	outlier_info[col] = len(outliers)
	except:
	continue

	if outlier_info:
	quality_report['outliers_detected'][name] = outlier_info

	return quality_report

	def create_data_dashboard(self) -> str:
	"""Create a simple HTML dashboard summarizing the fetched data"""
	timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
	total_datasets = len(self.fetched_data)
	date_range = f"{self.config.from_date} to {self.config.to_date}"

	html_content = f"""
	<!DOCTYPE html>
	<html>
	<head>
	<title>Santiment Data Dashboard</title>
	<style>
	body {{ font-family: Arial, sans-serif; margin: 20px; }}
	.header {{ background-color: #f0f0f0; padding: 20px; border-radius: 5px; }}
	.section {{ margin: 20px 0; padding: 15px; border: 1px solid #ddd; border-radius: 5px; }}
	.metric-card {{ display: inline-block; margin: 10px; padding: 15px; background-color: #f9f9f9; border-radius: 5px; }}
	table {{ border-collapse: collapse; width: 100%; }}
	th, td {{ border: 1px solid #ddd; padding: 8px; text-align: left; }}
	th {{ background-color: #f2f2f2; }}
	</style>
	</head>
	<body>
	<div class="header">
	<h1>Santiment Data Dashboard</h1>
	<p>Generated on: {timestamp}</p>
	<p>Total Datasets: {total_datasets}</p>
	<p>Date Range: {date_range}</p>
	</div>
	"""

	# Add category summary
	if self.fetched_data:
	category_counts = {}
	for key in self.fetched_data.keys():
	if '_' in key:
	category = key.split('_')[0]
	category_counts[category] = category_counts.get(category, 0) + 1

	html_content += """
	<div class="section">
	<h2>Categories Overview</h2>
	"""
	for category, count in sorted(category_counts.items()):
	html_content += f'<div class="metric-card"><strong>{category}</strong><br>{count} datasets</div>'
	html_content += "</div>"

	# Add failed queries section
	if self.failed_queries:
	html_content += """
	<div class="section">
	<h2>Failed Queries</h2>
	<table>
	<tr><th>Metric</th><th>Slug</th><th>Error</th></tr>
	"""
	for failed in self.failed_queries[:10]: # Show first 10
	metric = failed.get('metric', 'N/A')
	slug = failed.get('slug', failed.get('slugs', 'N/A'))
	error = str(failed.get('error', 'Unknown'))[:100] + '...' if len(str(failed.get('error', ''))) > 100 else failed.get('error', 'Unknown')
	html_content += f"<tr><td>{metric}</td><td>{slug}</td><td>{error}</td></tr>"
	html_content += "</table></div>"

	html_content += "</body></html>"

	# Save dashboard
	dashboard_path = os.path.join(
	self.config.export_directory,
	f"santiment_dashboard_{datetime.now().strftime('%Y%m%d_%H%M%S')}.html"
	)
	with open(dashboard_path, 'w') as f:
	f.write(html_content)

	logger.info(f"Dashboard created at {dashboard_path}")
	return dashboard_path

	def get_top_performing_assets(self, metric: str = 'price_usd', days: int = 30) -> pd.DataFrame:
	"""
	Analyze top performing assets based on a specific metric

	Args:
	metric: The metric to analyze performance on
	days: Number of days to look back for performance calculation

	Returns:
	DataFrame with performance analysis
	"""
	performance_data = []

	for name, df in self.fetched_data.items():
	if isinstance(df, pd.DataFrame) and metric in str(name) and not df.empty:
	try:
	if 'slug' in df.columns:
	# Group by slug and calculate performance
	for slug in df['slug'].unique():
	slug_data = df[df['slug'] == slug].copy()
	if len(slug_data) >= 2:
	slug_data = slug_data.sort_index()

	# Calculate performance over the specified period
	if len(slug_data) > days:
	recent_data = slug_data.tail(days)
	else:
	recent_data = slug_data

	if 'value' in recent_data.columns and not recent_data['value'].empty:
	start_value = recent_data['value'].iloc[0]
	end_value = recent_data['value'].iloc[-1]

	if start_value and start_value != 0:
	performance = ((end_value - start_value) / start_value) * 100

	performance_data.append({
	'slug': slug,
	'metric': metric,
	'start_value': start_value,
	'end_value': end_value,
	'performance_pct': performance,
	'data_points': len(recent_data),
	'period_days': days
	})
	except Exception as e:
	logger.warning(f"Failed to analyze performance for {name}: {e}")

	if performance_data:
	performance_df = pd.DataFrame(performance_data)
	return performance_df.sort_values('performance_pct', ascending=False)
	else:
	return pd.DataFrame()

	def cleanup_export_directory(self) -> bool:
	"""
	Manually clean up the export directory.

	Returns:
	bool: True if cleanup was successful, False otherwise
	"""
	try:
	self._cleanup_existing_files()
	return True
	except Exception as e:
	logger.error(f"Manual cleanup failed: {e}")
	return False

	def get_api_key_status(self):
	"""Get status information about API key usage"""
	if not self.api_keys:
	return {
	"total_keys": 0,
	"current_key": "None",
	"rate_limit_switches": self.rate_limit_switches,
	"current_key_preview": "No API key"
	}

	return {
	"total_keys": len(self.api_keys),
	"current_key": self.current_key_index + 1,
	"rate_limit_switches": self.rate_limit_switches,
	"current_key_preview": self.api_keys[self.current_key_index][:8] + "..."
	}

	def print_api_key_status(self):
	"""Print API key usage status"""
	status = self.get_api_key_status()
	print(f"\n[API_STATUS] Using {status['total_keys']} API key(s)")
	if status['total_keys'] > 0:
	print(f"[API_STATUS] Current: Key #{status['current_key']} ({status['current_key_preview']})")
	print(f"[API_STATUS] Rate limit switches: {status['rate_limit_switches']}")
	if status['rate_limit_switches'] > 0:
	print(f"[API_STATUS] Effective rate limit handling active")
	else:
	print(f"[API_STATUS] No API keys configured - using free tier")
	print()

	def save_configuration(self, config_path: str = None) -> str:
	"""Save current configuration to a JSON file"""
	if config_path is None:
	config_path = os.path.join(self.config.export_directory, "santiment_config.json")

	config_dict = {
	'from_date': self.config.from_date,
	'to_date': self.config.to_date,
	'interval': self.config.interval,
	'include_incomplete_data': self.config.include_incomplete_data,
	'batch_size': self.config.batch_size,
	'max_workers': self.config.max_workers,
	'rate_limit_delay': self.config.rate_limit_delay,
	'export_format': self.config.export_format,
	'export_directory': self.config.export_directory,
	'saved_at': datetime.now().isoformat()
	}

	with open(config_path, 'w') as f:
	json.dump(config_dict, f, indent=2)

	logger.info(f"Configuration saved to {config_path}")
	return config_path

	@classmethod
	def load_configuration(cls, config_path: str) -> 'SantimentDataFetcher':
	"""Load configuration from a JSON file and create a fetcher instance"""
	with open(config_path, 'r') as f:
	config_dict = json.load(f)

	# Remove metadata fields
	config_dict.pop('saved_at', None)

	config = FetchConfig(**config_dict)
	return cls(config=config)


	# Utility functions for easy usage
	def cleanup_santiment_directory(directory_path: str = "data/santiment") -> bool:
	"""
	Utility function to clean up a Santiment data directory without creating a fetcher instance.

	Args:
	directory_path: Path to the directory to clean up

	Returns:
	bool: True if cleanup was successful, False otherwise
	"""
	import glob
	import shutil

	try:
	if not os.path.exists(directory_path):
	logger.info(f"Directory does not exist: {directory_path}")
	return True

	# Get all files in the directory
	all_files = glob.glob(os.path.join(directory_path, "*"))

	if all_files:
	logger.info(f"Cleaning up {len(all_files)} existing files in {directory_path}")

	for file_path in all_files:
	try:
	if os.path.isfile(file_path):
	os.remove(file_path)
	logger.debug(f"Removed file: {os.path.basename(file_path)}")
	elif os.path.isdir(file_path):
	shutil.rmtree(file_path)
	logger.debug(f"Removed directory: {os.path.basename(file_path)}")
	except Exception as e:
	logger.warning(f"Failed to remove {file_path}: {e}")

	logger.info(f"Successfully cleaned up directory: {directory_path}")
	else:
	logger.info(f"Directory is already clean: {directory_path}")

	return True

	except Exception as e:
	logger.error(f"Failed to cleanup directory {directory_path}: {e}")
	return False

	def fetch_quick_crypto_overview(assets: List[str] = None, api_key: str = None) -> Dict[str, pd.DataFrame]:
	"""
	Quick function to fetch essential crypto data for analysis

	Args:
	assets: List of asset slugs (defaults to top 10 cryptos)
	api_key: Santiment API key

	Returns:
	Dictionary with essential data
	"""
	if assets is None:
	assets = ['bitcoin', 'ethereum', 'solana', 'ripple', 'cardano']

	config = FetchConfig(
	from_date="2025-07-01", # Changed to be within free tier allowed range
	to_date="2025-07-06", # Use last valid date for free tier
	interval="30m",
	export_format="parquet"
	)

	fetcher = SantimentDataFetcher(api_key=api_key, config=config)

	# Fetch essential categories
	essential_categories = ['financial', 'network_activity', 'exchange']

	data = fetcher.fetch_comprehensive_data(
	slugs=assets,
	categories=essential_categories,
	include_special_metrics=True,
	include_sql_queries=False
	)

	return data

	def create_crypto_report(assets: List[str], output_dir: str = "./crypto_report", api_key: str = None):
	"""
	Create a comprehensive crypto analysis report

	Args:
	assets: List of asset slugs to analyze
	output_dir: Directory to save the report
	api_key: Santiment API key(s) - can be comma-separated for multiple keys
	"""
	config = FetchConfig(
	from_date="2025-07-01", # Changed to be within free tier allowed range
	to_date="2025-07-06", # Use last valid date for free tier
	interval="30m",
	export_directory=output_dir,
	export_format="parquet" # Use Parquet for output
	)

	fetcher = SantimentDataFetcher(api_key=api_key, config=config)

	# Print API key status
	fetcher.print_api_key_status()

	# Fetch comprehensive data
	logger.info("Fetching comprehensive cryptocurrency data...")
	data = fetcher.fetch_comprehensive_data(
	slugs=assets,
	include_special_metrics=True,
	include_sql_queries=True
	)

	# Export data
	logger.info("Exporting data to files...")
	exported_files = fetcher.export_data(combine_categories=False, include_metadata=True)

	# Create dashboard
	logger.info("Creating data dashboard...")
	dashboard_path = fetcher.create_data_dashboard()

	# Analyze data quality
	logger.info("Analyzing data quality...")
	quality_report = fetcher.analyze_data_quality()

	# Save quality report
	quality_path = os.path.join(output_dir, "data_quality_report.json")
	with open(quality_path, 'w') as f:
	json.dump(quality_report, f, indent=2, default=str)

	# Print summary
	fetcher.print_summary()

	print(f"\nReport generated successfully!")
	print(f"Dashboard: {dashboard_path}")
	print(f"Data files: {len(exported_files)} files in {output_dir}")
	print(f"Quality report: {quality_path}")

	# Print final API key status
	print("\n[FINAL_STATUS] Santiment API Key Usage Summary:")
	fetcher.print_api_key_status()

	# Example usage
	def main():
	# Get API key from environment (already loaded at module top)
	santiment_api_key = os.getenv("SANTIMENT_API_KEY")

	# Create fetcher instance
	fetcher = SantimentDataFetcher(api_key=santiment_api_key)

	# Print API key status
	fetcher.print_api_key_status()

	# DISABLED: Do not cleanup Santiment directory to preserve data
	# cleanup_santiment_directory("./data/santiment")
	print("[SANTIMENT] Data preservation mode - keeping existing data")

	# Reduced scope for API conservation - only top 2 crypto assets
	print("Fetching reduced crypto overview (API conservation mode)...")
	# Note: Reduced from 5 to 2 assets to conserve API calls
	overview_data = fetch_quick_crypto_overview(['bitcoin', 'ethereum'], api_key=santiment_api_key)

	# Comprehensive analysis - reduced scope
	print("\nCreating conservative crypto report...")
	# Note: Reduced scope - only Bitcoin and Ethereum to preserve API limits
	create_crypto_report(
	assets=['bitcoin', 'ethereum'], # Reduced from 5 to 2 assets
	output_dir="./data/santiment",
	api_key=santiment_api_key
	)

	# Print final API key status
	print("\n[FINAL_STATUS] Santiment API Key Usage Summary:")
	fetcher.print_api_key_status()

	if __name__ == "__main__":
	main()