|
""" |
|
Crypto Symbol Normalizer |
|
======================== |
|
|
|
Provides consistent symbol normalization across all data fetchers and mergers. |
|
This ensures that different representations of the same cryptocurrency (e.g., XRP vs ripple) |
|
are treated consistently throughout the entire pipeline. |
|
|
|
Features: |
|
- Maps various symbol formats to canonical identifiers |
|
- Supports both short symbols (BTC, ETH) and long names (bitcoin, ethereum) |
|
- Case-insensitive matching |
|
- Logging for debugging normalization process |
|
|
|
Author: AI Assistant |
|
Date: August 2025 |
|
""" |
|
|
|
import logging |
|
from typing import Dict, List, Set |
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
class CryptoSymbolNormalizer: |
|
""" |
|
Centralized crypto symbol normalization for consistent asset identification |
|
""" |
|
|
|
def __init__(self): |
|
"""Initialize the symbol normalizer with predefined mappings""" |
|
self.symbol_mapping = self._build_symbol_mapping() |
|
logger.info(f"Initialized CryptoSymbolNormalizer with {len(self.symbol_mapping)} mappings") |
|
|
|
def _build_symbol_mapping(self) -> Dict[str, str]: |
|
""" |
|
Build comprehensive symbol mapping dictionary |
|
|
|
Returns: |
|
Dictionary mapping various symbol formats to canonical slugs |
|
""" |
|
|
|
|
|
symbol_mapping = { |
|
|
|
'bitcoin': 'bitcoin', |
|
'btc': 'bitcoin', |
|
'Bitcoin': 'bitcoin', |
|
'BTC': 'bitcoin', |
|
|
|
|
|
'ethereum': 'ethereum', |
|
'eth': 'ethereum', |
|
'Ethereum': 'ethereum', |
|
'ETH': 'ethereum', |
|
|
|
|
|
'ripple': 'ripple', |
|
'xrp': 'ripple', |
|
'Ripple': 'ripple', |
|
'XRP': 'ripple', |
|
|
|
|
|
'solana': 'solana', |
|
'sol': 'solana', |
|
'Solana': 'solana', |
|
'SOL': 'solana', |
|
|
|
|
|
'cardano': 'cardano', |
|
'ada': 'cardano', |
|
'Cardano': 'cardano', |
|
'ADA': 'cardano', |
|
|
|
|
|
'polkadot': 'polkadot', |
|
'dot': 'polkadot', |
|
'Polkadot': 'polkadot', |
|
'DOT': 'polkadot', |
|
|
|
|
|
'chainlink': 'chainlink', |
|
'link': 'chainlink', |
|
'Chainlink': 'chainlink', |
|
'LINK': 'chainlink', |
|
|
|
|
|
'litecoin': 'litecoin', |
|
'ltc': 'litecoin', |
|
'Litecoin': 'litecoin', |
|
'LTC': 'litecoin', |
|
|
|
|
|
'bitcoin-cash': 'bitcoin-cash', |
|
'bch': 'bitcoin-cash', |
|
'Bitcoin Cash': 'bitcoin-cash', |
|
'BCH': 'bitcoin-cash', |
|
|
|
|
|
'stellar': 'stellar', |
|
'xlm': 'stellar', |
|
'Stellar': 'stellar', |
|
'XLM': 'stellar', |
|
|
|
|
|
'ethereum-classic': 'ethereum-classic', |
|
'etc': 'ethereum-classic', |
|
'Ethereum Classic': 'ethereum-classic', |
|
'ETC': 'ethereum-classic', |
|
|
|
|
|
'eos': 'eos', |
|
'EOS': 'eos', |
|
} |
|
|
|
return symbol_mapping |
|
|
|
def normalize(self, symbol: str) -> str: |
|
""" |
|
Normalize a symbol to its canonical identifier |
|
|
|
Args: |
|
symbol: Symbol to normalize |
|
|
|
Returns: |
|
Canonical identifier |
|
""" |
|
if symbol in self.symbol_mapping: |
|
canonical = self.symbol_mapping[symbol] |
|
if symbol != canonical: |
|
logger.debug(f"Normalized '{symbol}' -> '{canonical}'") |
|
return canonical |
|
|
|
|
|
logger.warning(f"Unknown symbol '{symbol}' not found in normalization mapping") |
|
return symbol.lower() |
|
|
|
def normalize_list(self, symbols: List[str]) -> List[str]: |
|
""" |
|
Normalize a list of symbols and remove duplicates |
|
|
|
Args: |
|
symbols: List of symbols to normalize |
|
|
|
Returns: |
|
List of normalized, deduplicated symbols |
|
""" |
|
normalized = [] |
|
seen = set() |
|
|
|
for symbol in symbols: |
|
canonical = self.normalize(symbol) |
|
if canonical not in seen: |
|
normalized.append(canonical) |
|
seen.add(canonical) |
|
else: |
|
logger.debug(f"Removed duplicate symbol: {symbol} (canonical: {canonical})") |
|
|
|
logger.info(f"Normalized {len(symbols)} symbols to {len(normalized)} unique canonical symbols") |
|
return normalized |
|
|
|
def get_all_variants(self, canonical_symbol: str) -> List[str]: |
|
""" |
|
Get all known variants for a canonical symbol |
|
|
|
Args: |
|
canonical_symbol: The canonical symbol to find variants for |
|
|
|
Returns: |
|
List of all variants that map to this canonical symbol |
|
""" |
|
variants = [key for key, value in self.symbol_mapping.items() |
|
if value == canonical_symbol] |
|
return variants |
|
|
|
def get_canonical_symbols(self) -> Set[str]: |
|
""" |
|
Get set of all canonical symbols |
|
|
|
Returns: |
|
Set of canonical symbols |
|
""" |
|
return set(self.symbol_mapping.values()) |
|
|
|
def add_mapping(self, symbol: str, canonical: str): |
|
""" |
|
Add a new symbol mapping |
|
|
|
Args: |
|
symbol: Symbol variant to add |
|
canonical: Canonical symbol it maps to |
|
""" |
|
self.symbol_mapping[symbol] = canonical |
|
logger.info(f"Added new mapping: '{symbol}' -> '{canonical}'") |
|
|
|
|
|
|
|
_normalizer = None |
|
|
|
def get_normalizer() -> CryptoSymbolNormalizer: |
|
""" |
|
Get the global normalizer instance (singleton pattern) |
|
|
|
Returns: |
|
CryptoSymbolNormalizer instance |
|
""" |
|
global _normalizer |
|
if _normalizer is None: |
|
_normalizer = CryptoSymbolNormalizer() |
|
return _normalizer |
|
|
|
def normalize_symbol(symbol: str) -> str: |
|
""" |
|
Convenience function to normalize a single symbol |
|
|
|
Args: |
|
symbol: Symbol to normalize |
|
|
|
Returns: |
|
Canonical symbol |
|
""" |
|
return get_normalizer().normalize(symbol) |
|
|
|
def normalize_symbol_list(symbols: List[str]) -> List[str]: |
|
""" |
|
Convenience function to normalize a list of symbols |
|
|
|
Args: |
|
symbols: List of symbols to normalize |
|
|
|
Returns: |
|
List of normalized symbols |
|
""" |
|
return get_normalizer().normalize_list(symbols) |
|
|