Spaces:

ReallyFloppyPenguin
/

AstonishingSuperIntelV2

Running

App Files Files Community

ReallyFloppyPenguin commited on Jul 15

Commit

7c79299

verified ·

1 Parent(s): a769262

Update app.py

Browse files

Files changed (1) hide show

app.py +247 -692

app.py CHANGED Viewed

@@ -1,793 +1,348 @@
 import gradio as gr
 import requests
 import os
-import pandas as pd
 import json
 from typing import List, Dict, Optional
 import time
-from datetime import datetime
-# Updated dictionary of allowed models with current HF Inference Providers
-ALLOWED_MODELS = {
-    # Text Generation Models - HF Inference API
-    "microsoft/DialoGPT-medium": {
-        "provider": "HF Inference",
-        "pipeline": "text-generation",
-        "description": "Conversational AI model for dialog generation",
-        "endpoint": "https://api-inference.huggingface.co/models/microsoft/DialoGPT-medium",
-        "api_format": "hf_inference"
-    },
-    "meta-llama/Llama-3.1-8B-Instruct": {
-        "provider": "HF Inference",
-        "pipeline": "text-generation",
-        "description": "Meta's Llama 3.1 8B Instruct model",
-        "endpoint": "https://api-inference.huggingface.co/models/meta-llama/Llama-3.1-8B-Instruct",
-        "api_format": "hf_inference"
-    },
-    "deepseek-ai/DeepSeek-V3-0324": {
-        "provider": "HF Inference",
-        "pipeline": "text-generation",
-        "description": "DeepSeek V3 state-of-the-art conversational model",
-        "endpoint": "https://router.huggingface.co/v1/chat/completions",
-        "api_format": "openai_compatible"
-    },
-    # Cerebras Models (Chat completion LLM only)
     "meta-llama/Llama-3.3-70B-Instruct": {
         "provider": "Cerebras",
-        "pipeline": "text-generation",
-        "description": "Meta's Llama 3.3 70B Instruct model via Cerebras ultra-fast LPUs",
-        "endpoint": "https://router.huggingface.co/v1/chat/completions",
-        "api_format": "openai_compatible"
-    },
-    # Cohere Models (Chat completion LLM + VLM)
-    "cohere/command-r-plus": {
-        "provider": "Cohere",
-        "pipeline": "text-generation",
-        "description": "Cohere's Command R+ enterprise-grade NLP model",
-        "endpoint": "https://router.huggingface.co/v1/chat/completions",
-        "api_format": "openai_compatible"
-    },
-    # Fal AI Models (Text-to-Image, Text-to-Video, Speech-to-Text)
-    "black-forest-labs/FLUX.1-schnell": {
-        "provider": "Fal AI",
-        "pipeline": "text-to-image",
-        "description": "FLUX.1 schnell model for fast image generation via Fal AI",
-        "endpoint": "https://router.huggingface.co/v1/text-to-image",
-        "api_format": "hf_router"
     },
-    # Featherless AI Models (Chat completion LLM + VLM)
-    "meta-llama/Llama-3.1-70B-Instruct": {
-        "provider": "Featherless AI",
-        "pipeline": "text-generation",
-        "description": "Meta's Llama 3.1 70B Instruct via Featherless AI",
-        "endpoint": "https://router.huggingface.co/v1/chat/completions",
-        "api_format": "openai_compatible"
-    },
-    # Fireworks Models (Chat completion LLM + VLM)
-    "accounts/fireworks/models/llama-v3p1-8b-instruct": {
-        "provider": "Fireworks",
-        "pipeline": "text-generation",
-        "description": "Llama 3.1 8B Instruct via Fireworks AI production-ready serving",
-        "endpoint": "https://router.huggingface.co/v1/chat/completions",
-        "api_format": "openai_compatible"
-    },
-    # Groq Models (Chat completion LLM only)
     "deepseek-ai/DeepSeek-R1": {
-        "provider": "Groq",
-        "pipeline": "text-generation",
-        "description": "DeepSeek R1 model via Groq hardware acceleration",
-        "endpoint": "https://router.huggingface.co/v1/chat/completions",
-        "api_format": "openai_compatible"
-    },
-    # Hyperbolic Models (Chat completion LLM + VLM)
-    "meta-llama/Meta-Llama-3-8B-Instruct": {
-        "provider": "Hyperbolic",
-        "pipeline": "text-generation",
-        "description": "Meta's Llama 3 8B Instruct via Hyperbolic",
-        "endpoint": "https://router.huggingface.co/v1/chat/completions",
-        "api_format": "openai_compatible"
-    },
-    # Nebius Models (Chat completion LLM + VLM, Feature Extraction, Text-to-Image)
-    "mistralai/Mixtral-8x7B-Instruct-v0.1": {
-        "provider": "Nebius",
-        "pipeline": "text-generation",
-        "description": "Mistral's Mixtral 8x7B Instruct via Nebius cloud platform",
-        "endpoint": "https://router.huggingface.co/v1/chat/completions",
-        "api_format": "openai_compatible"
-    },
-    # Novita Models (Chat completion LLM + VLM, Text-to-Video)
-    "Qwen/Qwen2.5-72B-Instruct": {
-        "provider": "Novita",
-        "pipeline": "text-generation",
-        "description": "Qwen 2.5 72B Instruct via Novita",
-        "endpoint": "https://router.huggingface.co/v1/chat/completions",
-        "api_format": "openai_compatible"
     },
-    # Nscale Models (Chat completion LLM + VLM, Feature Extraction, Text-to-Image)
-    "microsoft/Phi-3-medium-4k-instruct": {
-        "provider": "Nscale",
-        "pipeline": "text-generation",
-        "description": "Microsoft Phi-3 Medium via Nscale",
-        "endpoint": "https://router.huggingface.co/v1/chat/completions",
-        "api_format": "openai_compatible"
-    },
-    # Replicate Models (Text-to-Image, Text-to-Video, Speech-to-Text)
-    "stabilityai/stable-diffusion-xl-base-1.0": {
-        "provider": "Replicate",
-        "pipeline": "text-to-image",
-        "description": "Stable Diffusion XL via Replicate cloud platform",
-        "endpoint": "https://router.huggingface.co/v1/text-to-image",
-        "api_format": "hf_router"
-    },
-    # SambaNova Models (Chat completion LLM, Feature Extraction)
     "meta-llama/Meta-Llama-3.1-405B-Instruct": {
         "provider": "SambaNova",
-        "pipeline": "text-generation",
-        "description": "Meta's Llama 3.1 405B Instruct via SambaNova",
-        "endpoint": "https://router.huggingface.co/v1/chat/completions",
-        "api_format": "openai_compatible"
     },
-    # Together AI Models (Chat completion LLM + VLM, Text-to-Image)
     "meta-llama/Meta-Llama-3-70B-Instruct": {
         "provider": "Together",
-        "pipeline": "text-generation",
-        "description": "Meta's Llama 3 70B Instruct via Together AI high-performance inference",
-        "endpoint": "https://router.huggingface.co/v1/chat/completions",
-        "api_format": "openai_compatible"
-    },
-    # HF Inference - Additional Models for various tasks
-    "black-forest-labs/FLUX.1-dev": {
-        "provider": "HF Inference",
-        "pipeline": "text-to-image",
-        "description": "FLUX.1 development model for high-quality text-to-image generation",
-        "endpoint": "https://api-inference.huggingface.co/models/black-forest-labs/FLUX.1-dev",
-        "api_format": "hf_inference"
-    },
-    "openai/whisper-large-v3": {
-        "provider": "HF Inference",
-        "pipeline": "automatic-speech-recognition",
-        "description": "Whisper Large V3 for speech recognition",
-        "endpoint": "https://api-inference.huggingface.co/models/openai/whisper-large-v3",
-        "api_format": "hf_inference"
-    },
-    "sentence-transformers/all-MiniLM-L6-v2": {
-        "provider": "HF Inference",
-        "pipeline": "feature-extraction",
-        "description": "Sentence transformer for embeddings and semantic search",
-        "endpoint": "https://api-inference.huggingface.co/models/sentence-transformers/all-MiniLM-L6-v2",
-        "api_format": "hf_inference"
-    },
-    "cardiffnlp/twitter-roberta-base-sentiment-latest": {
-        "provider": "HF Inference",
-        "pipeline": "text-classification",
-        "description": "Sentiment analysis model trained on Twitter data",
-        "endpoint": "https://api-inference.huggingface.co/models/cardiffnlp/twitter-roberta-base-sentiment-latest",
-        "api_format": "hf_inference"
-    }
-}
-# Updated provider configuration for current HF Inference Providers ecosystem
-PROVIDER_CONFIG = {
-    "HF Inference": {
-        "description": "HuggingFace's native serverless inference API",
-        "auth_header": "Authorization",
-        "auth_format": "Bearer {token}",
-        "env_var": "HF_TOKEN",
-        "base_url": "https://api-inference.huggingface.co",
-        "pricing": "Free tier + pay-per-use",
-        "docs_url": "https://huggingface.co/docs/inference-providers/providers/hf-inference",
-        "capabilities": ["Chat completion (LLM)", "Chat completion (VLM)", "Feature Extraction", "Text to Image", "Speech to text"]
-    },
-    "Cerebras": {
-        "description": "Ultra-fast inference with Language Processing Units (LPUs)",
-        "auth_header": "Authorization",
-        "auth_format": "Bearer {token}",
-        "env_var": "HF_TOKEN",
-        "base_url": "https://router.huggingface.co/v1",
-        "pricing": "Pay-per-token via HF routing",
-        "docs_url": "https://huggingface.co/docs/inference-providers/providers/cerebras",
-        "capabilities": ["Chat completion (LLM)"]
     },
-    "Cohere": {
-        "description": "Enterprise-grade NLP models and APIs",
-        "auth_header": "Authorization",
-        "auth_format": "Bearer {token}",
-        "env_var": "HF_TOKEN",
-        "base_url": "https://router.huggingface.co/v1",
-        "pricing": "Pay-per-token via HF routing",
-        "docs_url": "https://huggingface.co/docs/inference-providers/providers/cohere",
-        "capabilities": ["Chat completion (LLM)", "Chat completion (VLM)"]
-    },
-    "Fal AI": {
-        "description": "Fast and reliable model inference platform",
-        "auth_header": "Authorization",
-        "auth_format": "Bearer {token}",
-        "env_var": "HF_TOKEN",
-        "base_url": "https://router.huggingface.co/v1",
-        "pricing": "Pay-per-token via HF routing",
-        "docs_url": "https://huggingface.co/docs/inference-providers/providers/fal-ai",
-        "capabilities": ["Text to Image", "Text to video", "Speech to text"]
-    },
-    "Featherless AI": {
-        "description": "Optimized inference for open-source models",
-        "auth_header": "Authorization",
-        "auth_format": "Bearer {token}",
-        "env_var": "HF_TOKEN",
-        "base_url": "https://router.huggingface.co/v1",
-        "pricing": "Pay-per-token via HF routing",
-        "docs_url": "https://huggingface.co/docs/inference-providers/providers/featherless-ai",
-        "capabilities": ["Chat completion (LLM)", "Chat completion (VLM)"]
-    },
-    "Fireworks": {
-        "description": "Production-ready inference with fast model serving",
-        "auth_header": "Authorization",
-        "auth_format": "Bearer {token}",
-        "env_var": "HF_TOKEN",
-        "base_url": "https://router.huggingface.co/v1",
-        "pricing": "Pay-per-token via HF routing",
-        "docs_url": "https://huggingface.co/docs/inference-providers/providers/fireworks-ai",
-        "capabilities": ["Chat completion (LLM)", "Chat completion (VLM)"]
-    },
-    "Groq": {
-        "description": "Fast inference with specialized hardware acceleration",
-        "auth_header": "Authorization",
-        "auth_format": "Bearer {token}",
-        "env_var": "HF_TOKEN",
-        "base_url": "https://router.huggingface.co/v1",
-        "pricing": "Pay-per-token via HF routing",
-        "docs_url": "https://huggingface.co/docs/inference-providers/providers/groq",
-        "capabilities": ["Chat completion (LLM)"]
-    },
-    "Hyperbolic": {
-        "description": "GPU-accelerated inference platform",
-        "auth_header": "Authorization",
-        "auth_format": "Bearer {token}",
-        "env_var": "HF_TOKEN",
-        "base_url": "https://router.huggingface.co/v1",
-        "pricing": "Pay-per-token via HF routing",
-        "docs_url": "https://huggingface.co/docs/inference-providers/providers/hyperbolic",
-        "capabilities": ["Chat completion (LLM)", "Chat completion (VLM)"]
-    },
-    "Nebius": {
-        "description": "Cloud-based AI infrastructure platform",
-        "auth_header": "Authorization",
-        "auth_format": "Bearer {token}",
-        "env_var": "HF_TOKEN",
-        "base_url": "https://router.huggingface.co/v1",
-        "pricing": "Pay-per-token via HF routing",
-        "docs_url": "https://huggingface.co/docs/inference-providers/providers/nebius",
-        "capabilities": ["Chat completion (LLM)", "Chat completion (VLM)", "Feature Extraction", "Text to Image"]
-    },
-    "Novita": {
-        "description": "AI inference platform with video generation",
-        "auth_header": "Authorization",
-        "auth_format": "Bearer {token}",
-        "env_var": "HF_TOKEN",
-        "base_url": "https://router.huggingface.co/v1",
-        "pricing": "Pay-per-token via HF routing",
-        "docs_url": "https://huggingface.co/docs/inference-providers/providers/novita",
-        "capabilities": ["Chat completion (LLM)", "Chat completion (VLM)", "Text to video"]
-    },
-    "Nscale": {
-        "description": "Scalable AI model deployment platform",
-        "auth_header": "Authorization",
-        "auth_format": "Bearer {token}",
-        "env_var": "HF_TOKEN",
-        "base_url": "https://router.huggingface.co/v1",
-        "pricing": "Pay-per-token via HF routing",
-        "docs_url": "https://huggingface.co/docs/inference-providers/providers/nscale",
-        "capabilities": ["Chat completion (LLM)", "Chat completion (VLM)", "Feature Extraction", "Text to Image"]
-    },
-    "Replicate": {
-        "description": "Run models in the cloud with simple API",
-        "auth_header": "Authorization",
-        "auth_format": "Bearer {token}",
-        "env_var": "HF_TOKEN",
-        "base_url": "https://router.huggingface.co/v1",
-        "pricing": "Pay-per-token via HF routing",
-        "docs_url": "https://huggingface.co/docs/inference-providers/providers/replicate",
-        "capabilities": ["Text to Image", "Text to video", "Speech to text"]
     },
-    "SambaNova": {
-        "description": "Enterprise AI platform with DataFlow architecture",
-        "auth_header": "Authorization",
-        "auth_format": "Bearer {token}",
-        "env_var": "HF_TOKEN",
-        "base_url": "https://router.huggingface.co/v1",
-        "pricing": "Pay-per-token via HF routing",
-        "docs_url": "https://huggingface.co/docs/inference-providers/providers/sambanova",
-        "capabilities": ["Chat completion (LLM)", "Feature Extraction"]
     },
-    "Together": {
-        "description": "High-performance inference for open-source models",
-        "auth_header": "Authorization",
-        "auth_format": "Bearer {token}",
-        "env_var": "HF_TOKEN",
-        "base_url": "https://router.huggingface.co/v1",
-        "pricing": "Pay-per-token via HF routing",
-        "docs_url": "https://huggingface.co/docs/inference-providers/providers/together",
-        "capabilities": ["Chat completion (LLM)", "Chat completion (VLM)", "Text to Image"]
     }
 }
-class ModernHFInferenceExplorer:
     def __init__(self):
-        self.allowed_models = ALLOWED_MODELS
-        self.provider_config = PROVIDER_CONFIG
         self.hf_token = os.getenv("HF_TOKEN")
         if not self.hf_token:
-            raise ValueError("HF_TOKEN environment variable is required for HuggingFace Inference Providers")
-        self.headers = {"Authorization": f"Bearer {self.hf_token}"}
-    def get_available_models(self) -> List[Dict]:
-        """Get the predefined allowed models with provider info and live status"""
-        models = []
-        for model_id, model_info in self.allowed_models.items():
-            provider = model_info["provider"]
-            models.append({
-                "model_id": model_id,
-                "provider": provider,
-                "pipeline": model_info["pipeline"],
-                "description": model_info["description"],
-                "endpoint": model_info["endpoint"],
-                "api_format": model_info["api_format"],
-                "status": self._check_model_status(model_id, provider),
-                "pricing": self.provider_config[provider]["pricing"]
-            })
-        return models
-    def _check_model_status(self, model_id: str, provider: str) -> str:
-        """Check if a specific model is currently available via HF Inference Providers"""
-        try:
-            # For models using the new HF Router API
-            if provider in ["Cerebras", "Groq", "Together", "Fireworks", "Replicate", "Cohere", "Fal AI"]:
-                # Use the models endpoint to check availability
-                url = "https://router.huggingface.co/v1/models"
-                response = requests.get(url, headers=self.headers, timeout=5)
-                if response.status_code == 200:
-                    available_models = response.json()
-                    if isinstance(available_models, dict) and "data" in available_models:
-                        model_ids = [m["id"] for m in available_models["data"]]
-                        return "✅ Available" if model_id in model_ids else "❓ Check Provider"
-                    return "✅ Available"
-                else:
-                    return "❓ Unknown"
-            # For traditional HF Inference API models
-            elif provider == "HF Inference":
-                url = f"https://api-inference.huggingface.co/models/{model_id}"
-                response = requests.get(url, headers=self.headers, timeout=5)
-                if response.status_code == 200:
-                    return "✅ Available"
-                elif response.status_code == 503:
-                    return "🔄 Loading"
-                else:
-                    return "❌ Unavailable"
-            return "❓ Unknown"
-        except Exception:
-            return "❓ Connection Error"
-    def test_model_inference(self, model_id: str, input_text: str) -> Dict:
-        """Test inference on a specific allowed model using current HF Inference Providers API"""
-        if model_id not in self.allowed_models:
             return {
-                "status": "error",
-                "error": f"Model '{model_id}' is not in the allowed models list",
-                "response_time": None
             }
-        model_info = self.allowed_models[model_id]
-        api_format = model_info["api_format"]
         try:
-            start_time = time.time()
-            if api_format == "openai_compatible":
-                # Use the new OpenAI-compatible chat completions endpoint
-                result = self._test_openai_compatible_model(model_id, input_text)
-            elif api_format == "hf_inference":
-                # Use traditional HF Inference API
-                result = self._test_hf_inference_model(model_id, input_text, model_info)
-            elif api_format == "hf_router":
-                # Use HF Router for other tasks
-                result = self._test_hf_router_model(model_id, input_text, model_info)
             else:
                 return {
-                    "status": "error",
-                    "error": f"Unsupported API format: {api_format}",
-                    "response_time": None
                 }
-            result["response_time"] = time.time() - start_time
-            return result
         except Exception as e:
             return {
-                "status": "error",
-                "error": str(e),
-                "response_time": time.time() - start_time if 'start_time' in locals() else None
             }
-    def _test_openai_compatible_model(self, model_id: str, input_text: str) -> Dict:
-        """Test model using OpenAI-compatible chat completions API"""
-        url = "https://router.huggingface.co/v1/chat/completions"
-        payload = {
-            "model": model_id,
-            "messages": [
-                {"role": "user", "content": input_text}
-            ],
-            "max_tokens": 100,
-            "temperature": 0.7
-        }
-        response = requests.post(url, headers=self.headers, json=payload, timeout=30)
-        if response.status_code == 200:
-            return {
-                "status": "success",
-                "result": response.json()
-            }
-        else:
-            return {
-                "status": "error",
-                "error": f"HTTP {response.status_code}: {response.text}"
-            }
-    def _test_hf_inference_model(self, model_id: str, input_text: str, model_info: Dict) -> Dict:
-        """Test model using traditional HF Inference API"""
-        url = model_info["endpoint"]
-        # Adjust payload based on pipeline type
-        pipeline = model_info["pipeline"]
-        if pipeline in ["text-generation", "text2text-generation"]:
-            payload = {"inputs": input_text, "parameters": {"max_new_tokens": 100}}
-        elif pipeline == "text-to-image":
-            payload = {"inputs": input_text}
-        elif pipeline == "feature-extraction":
-            payload = {"inputs": input_text}
-        else:
-            payload = {"inputs": input_text}
-        response = requests.post(url, headers=self.headers, json=payload, timeout=30)
-        if response.status_code == 200:
-            return {
-                "status": "success",
-                "result": response.json()
-            }
-        else:
-            return {
-                "status": "error",
-                "error": f"HTTP {response.status_code}: {response.text}"
-            }
-    def _test_hf_router_model(self, model_id: str, input_text: str, model_info: Dict) -> Dict:
-        """Test model using HF Router API for specialized tasks"""
-        pipeline = model_info["pipeline"]
-        if pipeline == "text-to-image":
-            # Use the text-to-image endpoint via HF Router
-            payload = {
-                "model": model_id,
-                "prompt": input_text,
-                "num_inference_steps": 20
-            }
-            # Note: This would need to be implemented based on actual HF Router text-to-image API
-            return {
-                "status": "info",
-                "result": "Text-to-image testing via HF Router not fully implemented in demo"
-            }
-        return {
-            "status": "error",
-            "error": f"HF Router testing not implemented for pipeline: {pipeline}"
-        }
-def create_interface():
     try:
-        explorer = ModernHFInferenceExplorer()
     except ValueError as e:
-        # Create a dummy interface that shows the error
-        with gr.Blocks(title="❌ Configuration Error") as demo:
             gr.Markdown(f"""
-            # ❌ Configuration Error
-            **Error:** {str(e)}
             Please set the `HF_TOKEN` environment variable with your HuggingFace token.
-            You can get a token from: https://huggingface.co/settings/tokens
             """)
         return demo
-    def get_models_by_provider(provider_filter: str = "All"):
-        """Get models filtered by provider"""
-        models = explorer.get_available_models()
-        if provider_filter != "All":
-            models = [m for m in models if m['provider'] == provider_filter]
-        if not models:
-            return "No models found for the selected provider"
-        df = pd.DataFrame(models)
-        return df
-    def get_models_by_pipeline(pipeline_filter: str = "All"):
-        """Get models filtered by pipeline"""
-        models = explorer.get_available_models()
-        if pipeline_filter != "All":
-            models = [m for m in models if m['pipeline'] == pipeline_filter]
-        if not models:
-            return "No models found for the selected pipeline"
-        df = pd.DataFrame(models)
-        return df
-    def test_model(model_id: str, test_input: str):
-        """Test inference on a selected model"""
-        if not model_id or model_id.strip() == "":
-            return "Please select a model ID from the dropdown"
-        if model_id not in explorer.allowed_models:
-            available_models = "\n".join([f"- {mid}" for mid in explorer.allowed_models.keys()])
-            return f"""
-**Error:** Model '{model_id}' is not in the allowed models list.
-**Available models:**
-{available_models}
-"""
-        if not test_input.strip():
-            test_input = "Hello, how are you today?"
-        result = explorer.test_model_inference(model_id, test_input)
-        model_info = explorer.allowed_models[model_id]
-        if result["status"] == "success":
-            return f"""
-**Model:** {model_id}
-**Provider:** {model_info['provider']}
-**Pipeline:** {model_info['pipeline']}
-**API Format:** {model_info['api_format']}
-**Status:** ✅ Success
-**Response Time:** {result['response_time']:.2f}s
-**Result:**
-```json
-{json.dumps(result['result'], indent=2)}
-```
-"""
-        elif result["status"] == "info":
-            return f"""
-**Model:** {model_id}
-**Provider:** {model_info['provider']}
-**Pipeline:** {model_info['pipeline']}
-**Status:** ℹ️ Info
-**Response Time:** {result['response_time']:.2f}s if result['response_time'] else 'N/A'
-**Info:**
-{result['result']}
-"""
-        else:
-            return f"""
-**Model:** {model_id}
-**Provider:** {model_info['provider']}
-**Pipeline:** {model_info['pipeline']}
-**Status:** ❌ Error
-**Response Time:** {result['response_time']:.2f}s if result['response_time'] else 'N/A'
-**Error:**
-{result['error']}
 """
-    def get_provider_status():
-        """Get comprehensive status of all providers"""
-        status_info = []
-        for provider, config in explorer.provider_config.items():
-            model_count = len([m for m in explorer.allowed_models.values() if m["provider"] == provider])
-            capabilities_str = ", ".join(config.get("capabilities", ["N/A"]))
-            status_info.append({
-                "Provider": provider,
-                "Description": config["description"],
-                "Capabilities": capabilities_str,
-                "Models Available": model_count,
-                "Pricing": config["pricing"],
-                "Documentation": config["docs_url"]
-            })
-        return pd.DataFrame(status_info)
-    # Get unique providers and pipelines for filters
-    providers = ["All"] + list(set(model["provider"] for model in explorer.allowed_models.values()))
-    pipelines = ["All"] + list(set(model["pipeline"] for model in explorer.allowed_models.values()))
-    model_ids = list(explorer.allowed_models.keys())
-    # Create Gradio interface
-    with gr.Blocks(title="🤗 HuggingFace Inference Providers Explorer", theme=gr.themes.Soft()) as demo:
         gr.Markdown("""
-        # 🤗 HuggingFace Inference Providers Explorer
-        **Modern Inference Ecosystem**: Explore models from HuggingFace's unified inference providers platform!
-        ## 🚀 Current Inference Providers:
-        - **HF Inference**: Native serverless inference API (free tier available)
-        - **Cerebras**: Ultra-fast LPU-powered inference
-        - **Groq**: Hardware-accelerated language processing
-        - **Together AI**: High-performance open-source models
-        - **Fireworks AI**: Production-ready model serving
-        - **Replicate**: Cloud-based model deployment
-        - **Cohere**: Enterprise NLP models
-        - **Fal AI**: Fast and reliable inference
-        All providers use **HuggingFace routing** with unified billing and authentication!
-        ---
         """)
-        with gr.Tabs():
-            # Provider Status Tab
-            with gr.TabItem("🏢 Provider Overview"):
-                gr.Markdown("### HuggingFace Inference Providers Status")
-                status_btn = gr.Button("📊 View Provider Details", variant="primary")
-                provider_status_output = gr.Dataframe(
-                    headers=["Provider", "Description", "Capabilities", "Models", "Pricing", "Documentation"],
-                    label="Provider Information"
-                )
-                status_btn.click(get_provider_status, outputs=provider_status_output)
-            # Models by Provider Tab
-            with gr.TabItem("🔍 Browse by Provider"):
-                gr.Markdown("### Models Available by Provider")
-                provider_filter = gr.Dropdown(
-                    choices=providers,
-                    value="All",
-                    label="Select Provider"
                 )
-                provider_models_btn = gr.Button("📋 Show Models", variant="primary")
-                provider_models_output = gr.Dataframe(
-                    headers=["Model ID", "Provider", "Pipeline", "Description", "API Format", "Status", "Pricing"],
-                    label="Models by Provider"
                 )
-                provider_models_btn.click(
-                    get_models_by_provider,
-                    inputs=provider_filter,
-                    outputs=provider_models_output
                 )
-            # Models by Pipeline Tab
-            with gr.TabItem("⚙️ Browse by Task"):
-                gr.Markdown("### Models Available by Task/Pipeline")
-                pipeline_filter = gr.Dropdown(
-                    choices=pipelines,
-                    value="All",
-                    label="Select Task/Pipeline"
-                )
-                pipeline_models_btn = gr.Button("📋 Show Models", variant="primary")
-                pipeline_models_output = gr.Dataframe(
-                    headers=["Model ID", "Provider", "Pipeline", "Description", "API Format", "Status"],
-                    label="Models by Task"
                 )
-                pipeline_models_btn.click(
-                    get_models_by_pipeline,
-                    inputs=pipeline_filter,
-                    outputs=pipeline_models_output
-                )
-            # Model Testing Tab
-            with gr.TabItem("🧪 Test Models"):
-                gr.Markdown("### Test Live Model Inference")
                 with gr.Row():
-                    model_id_dropdown = gr.Dropdown(
-                        choices=model_ids,
-                        label="Select Model",
-                        info="Choose from curated inference provider models"
                     )
-                    test_input = gr.Textbox(
-                        placeholder="Hello, how are you today?",
-                        label="Test Input",
-                        info="Text to send to the model"
-                    )
-                test_btn = gr.Button("🚀 Test Model", variant="primary")
-                test_output = gr.Markdown(label="Inference Results")
-                test_btn.click(
-                    test_model,
-                    inputs=[model_id_dropdown, test_input],
-                    outputs=test_output
-                )
-            # All Models Tab
-            with gr.TabItem("📊 All Available Models"):
-                gr.Markdown("### Complete Model Catalog")
-                all_models_btn = gr.Button("📋 Load All Models", variant="primary")
-                all_models_output = gr.Dataframe(
-                    headers=["Model ID", "Provider", "Pipeline", "Description", "API Format", "Status", "Pricing"],
-                    label="Complete Model Catalog"
-                )
-                all_models_btn.click(
-                    lambda: get_models_by_provider("All"),
-                    outputs=all_models_output
-                )
-        # Footer
-        gr.Markdown(f"""
-        ---
-        ## 🔧 Setup Instructions:
-        1. **Get HuggingFace Token**: Visit [HF Settings](https://huggingface.co/settings/tokens)
-        2. **Set Environment Variable**: `export HF_TOKEN=hf_your_token_here`
-        3. **Start Testing**: All providers use unified HF authentication!
-        ## 📋 Current Statistics:
-        - **Total Models**: {len(explorer.allowed_models)}
-        - **Providers**: {len(explorer.provider_config)}
-        - **Pipelines**: {len(set(model['pipeline'] for model in explorer.allowed_models.values()))}
-        ## 🔗 Useful Links:
-        - 📚 [Inference Providers Docs](https://huggingface.co/docs/inference-providers/index)
-        - 💰 [Pricing Information](https://huggingface.co/docs/inference-providers/pricing-and-billing)
-        - 🔑 [Authentication Guide](https://huggingface.co/docs/inference-providers/get-started#authentication)
-        - 🌟 [Provider Comparison](https://huggingface.co/inference-providers/models)
         ---
-        *Powered by HuggingFace Inference Providers - Unified access to the best AI models!*
         """)
     return demo
 if __name__ == "__main__":
     try:
-        demo = create_interface()
         demo.launch(
             server_name="0.0.0.0",
             server_port=7860,
             share=False
         )
     except Exception as e:
-        print(f"Error starting application: {e}")
         print("Please ensure HF_TOKEN environment variable is set.")

 import gradio as gr
 import requests
 import os
 import json
 from typing import List, Dict, Optional
 import time
+# Curated selection of advanced AI models for general users
+ADVANCED_MODELS = {
     "meta-llama/Llama-3.3-70B-Instruct": {
         "provider": "Cerebras",
+        "display_name": "Llama 3.3 70B (Ultra Fast)",
+        "description": "Meta's latest and most capable model, optimized for speed",
+        "category": "General Purpose",
+        "endpoint": "https://router.huggingface.co/v1/chat/completions"
     },
     "deepseek-ai/DeepSeek-R1": {
+        "provider": "Groq",
+        "display_name": "DeepSeek R1 (Reasoning)",
+        "description": "Advanced reasoning model for complex problem solving",
+        "category": "Reasoning & Analysis",
+        "endpoint": "https://router.huggingface.co/v1/chat/completions"
     },
     "meta-llama/Meta-Llama-3.1-405B-Instruct": {
         "provider": "SambaNova",
+        "display_name": "Llama 3.1 405B (Most Powerful)",
+        "description": "Meta's largest and most capable language model",
+        "category": "Expert Level",
+        "endpoint": "https://router.huggingface.co/v1/chat/completions"
     },
     "meta-llama/Meta-Llama-3-70B-Instruct": {
         "provider": "Together",
+        "display_name": "Llama 3 70B (Balanced)",
+        "description": "Excellent balance of capability and speed",
+        "category": "General Purpose",
+        "endpoint": "https://router.huggingface.co/v1/chat/completions"
     },
+    "cohere/command-r-plus": {
+        "provider": "Cohere",
+        "display_name": "Command R+ (Enterprise)",
+        "description": "Enterprise-grade model for professional use",
+        "category": "Business & Professional",
+        "endpoint": "https://router.huggingface.co/v1/chat/completions"
     },
+    "Qwen/Qwen2.5-72B-Instruct": {
+        "provider": "Novita",
+        "display_name": "Qwen 2.5 72B (Multilingual)",
+        "description": "Excellent for multiple languages and coding",
+        "category": "Multilingual & Code",
+        "endpoint": "https://router.huggingface.co/v1/chat/completions"
     },
+    "mistralai/Mixtral-8x7B-Instruct-v0.1": {
+        "provider": "Nebius",
+        "display_name": "Mixtral 8x7B (Efficient)",
+        "description": "Fast and efficient for everyday tasks",
+        "category": "Daily Tasks",
+        "endpoint": "https://router.huggingface.co/v1/chat/completions"
     }
 }
+class AIChat:
     def __init__(self):
         self.hf_token = os.getenv("HF_TOKEN")
         if not self.hf_token:
+            raise ValueError("HF_TOKEN environment variable is required")
+        self.headers = {
+            "Authorization": f"Bearer {self.hf_token}",
+            "Content-Type": "application/json"
+        }
+    def send_message(self, model_id: str, message: str, conversation_history: List = None) -> Dict:
+        """Send a chat message to the selected AI model"""
+        if model_id not in ADVANCED_MODELS:
             return {
+                "success": False,
+                "error": "Selected model is not available"
             }
+        model_info = ADVANCED_MODELS[model_id]
+        # Build conversation with history
+        messages = []
+        if conversation_history:
+            messages.extend(conversation_history)
+        messages.append({"role": "user", "content": message})
+        payload = {
+            "model": model_id,
+            "messages": messages,
+            "max_tokens": 1000,
+            "temperature": 0.7,
+            "stream": False
+        }
         try:
+            response = requests.post(
+                model_info["endpoint"],
+                headers=self.headers,
+                json=payload,
+                timeout=60
+            )
+            if response.status_code == 200:
+                result = response.json()
+                if "choices" in result and len(result["choices"]) > 0:
+                    ai_response = result["choices"][0]["message"]["content"]
+                    return {
+                        "success": True,
+                        "response": ai_response,
+                        "model": model_info["display_name"],
+                        "provider": model_info["provider"]
+                    }
+                else:
+                    return {
+                        "success": False,
+                        "error": "No response generated"
+                    }
             else:
                 return {
+                    "success": False,
+                    "error": f"API Error: {response.status_code} - {response.text}"
                 }
         except Exception as e:
             return {
+                "success": False,
+                "error": f"Connection error: {str(e)}"
             }
+def create_chat_interface():
     try:
+        chat_ai = AIChat()
     except ValueError as e:
+        # Create error interface
+        with gr.Blocks(title="❌ Setup Required") as demo:
             gr.Markdown(f"""
+            # ❌ Setup Required
+            **{str(e)}**
             Please set the `HF_TOKEN` environment variable with your HuggingFace token.
+            Get your token at: https://huggingface.co/settings/tokens
             """)
         return demo
+    # Create model choices for dropdown
+    model_choices = [
+        (f"🚀 {info['display_name']} - {info['description']}", model_id)
+        for model_id, info in ADVANCED_MODELS.items()
+    ]
+    def chat_with_ai(message, history, selected_model):
+        """Handle chat conversation"""
+        if not message.strip():
+            return history, ""
+        if not selected_model:
+            history.append([message, "❌ Please select an AI model first"])
+            return history, ""
+        # Show typing indicator
+        history.append([message, "🤔 Thinking..."])
+        yield history, ""
+        # Convert gradio history to API format
+        conversation_history = []
+        for i, (user_msg, ai_msg) in enumerate(history[:-1]):  # Exclude the current "thinking" message
+            if user_msg and ai_msg and ai_msg != "🤔 Thinking...":
+                conversation_history.append({"role": "user", "content": user_msg})
+                conversation_history.append({"role": "assistant", "content": ai_msg})
+        # Send message to AI
+        result = chat_ai.send_message(selected_model, message, conversation_history)
+        if result["success"]:
+            # Update the last message with the real response
+            history[-1] = [message, result["response"]]
+            yield history, ""
+        else:
+            # Update with error message
+            history[-1] = [message, f"❌ Error: {result['error']}"]
+            yield history, ""
+    def clear_chat():
+        """Clear the chat history"""
+        return [], ""
+    def get_model_info(selected_model):
+        """Get information about the selected model"""
+        if not selected_model or selected_model not in ADVANCED_MODELS:
+            return "Select a model to see details"
+        info = ADVANCED_MODELS[selected_model]
+        return f"""
+**🤖 {info['display_name']}**
+**Provider:** {info['provider']}
+**Category:** {info['category']}
+**Description:** {info['description']}
+Ready to chat! Type your message below.
 """
+    # Create the interface
+    with gr.Blocks(
+        title="🤖 Chat with Advanced AI Models",
+        theme=gr.themes.Soft(),
+        css="""
+        .chat-container {
+            max-width: 1000px;
+            margin: 0 auto;
+        }
+        .model-info {
+            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+            color: white;
+            padding: 15px;
+            border-radius: 10px;
+            margin: 10px 0;
+        }
+        """
+    ) as demo:
         gr.Markdown("""
+        # 🤖 Chat with Advanced AI Models
+        **Experience the latest AI technology!** Choose from powerful models and start chatting instantly.
+        ✨ **What you can do:**
+        - Ask questions and get intelligent answers
+        - Get help with writing, analysis, and creative tasks
+        - Solve problems and get explanations
+        - Have natural conversations
         """)
+        with gr.Row():
+            # Left column - Model selection
+            with gr.Column(scale=1):
+                gr.Markdown("### 🎯 Choose Your AI")
+                model_selector = gr.Dropdown(
+                    choices=model_choices,
+                    label="Select AI Model",
+                    info="Each model has different strengths",
+                    interactive=True
                 )
+                model_info_display = gr.Markdown(
+                    "Select a model to see details",
+                    elem_classes=["model-info"]
                 )
+                # Update model info when selection changes
+                model_selector.change(
+                    get_model_info,
+                    inputs=model_selector,
+                    outputs=model_info_display
                 )
+            # Right column - Chat interface
+            with gr.Column(scale=2):
+                gr.Markdown("### 💬 Chat Interface")
+                chatbot = gr.Chatbot(
+                    label="Conversation",
+                    height=400,
+                    show_label=False,
+                    container=True,
+                    elem_classes=["chat-container"]
                 )
                 with gr.Row():
+                    message_input = gr.Textbox(
+                        placeholder="Type your message here...",
+                        label="Your Message",
+                        scale=4,
+                        lines=1
                     )
+                    send_btn = gr.Button("Send 📤", variant="primary", scale=1)
+                with gr.Row():
+                    clear_btn = gr.Button("Clear Chat 🗑️", variant="secondary")
+        # Chat functionality
+        def submit_message(message, history, model):
+            return chat_with_ai(message, history, model)
+        # Send message on button click or enter
+        send_btn.click(
+            submit_message,
+            inputs=[message_input, chatbot, model_selector],
+            outputs=[chatbot, message_input]
+        ).then(
+            lambda: "", outputs=message_input  # Clear input after sending
+        )
+        message_input.submit(
+            submit_message,
+            inputs=[message_input, chatbot, model_selector],
+            outputs=[chatbot, message_input]
+        ).then(
+            lambda: "", outputs=message_input  # Clear input after sending
+        )
+        # Clear chat
+        clear_btn.click(clear_chat, outputs=[chatbot, message_input])
+        # Footer
+        gr.Markdown("""
+        ---
+        ## 🚀 **Featured AI Models:**
+        - **🚀 Ultra Fast**: Llama 3.3 70B on Cerebras chips
+        - **🧠 Reasoning**: DeepSeek R1 for complex problem solving
+        - **💪 Most Powerful**: Llama 3.1 405B for expert tasks
+        - **⚖️ Balanced**: Llama 3 70B for everyday use
+        - **💼 Enterprise**: Command R+ for professional work
+        - **🌍 Multilingual**: Qwen 2.5 72B for global communication
+        - **⚡ Efficient**: Mixtral 8x7B for quick responses
+        ## 💡 **Tips for Better Conversations:**
+        - Be specific about what you want
+        - Ask follow-up questions for deeper insights
+        - Try different models for different types of tasks
+        - Use clear, natural language
         ---
+        *Powered by HuggingFace Inference Providers* 🤗
         """)
     return demo
 if __name__ == "__main__":
     try:
+        demo = create_chat_interface()
         demo.launch(
             server_name="0.0.0.0",
             server_port=7860,
             share=False
         )
     except Exception as e:
+        print(f"Error starting chat application: {e}")
         print("Please ensure HF_TOKEN environment variable is set.")