ReallyFloppyPenguin commited on
Commit
7c79299
·
verified ·
1 Parent(s): a769262

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +247 -692
app.py CHANGED
@@ -1,793 +1,348 @@
1
  import gradio as gr
2
  import requests
3
  import os
4
- import pandas as pd
5
  import json
6
  from typing import List, Dict, Optional
7
  import time
8
- from datetime import datetime
9
 
10
- # Updated dictionary of allowed models with current HF Inference Providers
11
- ALLOWED_MODELS = {
12
- # Text Generation Models - HF Inference API
13
- "microsoft/DialoGPT-medium": {
14
- "provider": "HF Inference",
15
- "pipeline": "text-generation",
16
- "description": "Conversational AI model for dialog generation",
17
- "endpoint": "https://api-inference.huggingface.co/models/microsoft/DialoGPT-medium",
18
- "api_format": "hf_inference"
19
- },
20
- "meta-llama/Llama-3.1-8B-Instruct": {
21
- "provider": "HF Inference",
22
- "pipeline": "text-generation",
23
- "description": "Meta's Llama 3.1 8B Instruct model",
24
- "endpoint": "https://api-inference.huggingface.co/models/meta-llama/Llama-3.1-8B-Instruct",
25
- "api_format": "hf_inference"
26
- },
27
- "deepseek-ai/DeepSeek-V3-0324": {
28
- "provider": "HF Inference",
29
- "pipeline": "text-generation",
30
- "description": "DeepSeek V3 state-of-the-art conversational model",
31
- "endpoint": "https://router.huggingface.co/v1/chat/completions",
32
- "api_format": "openai_compatible"
33
- },
34
-
35
- # Cerebras Models (Chat completion LLM only)
36
  "meta-llama/Llama-3.3-70B-Instruct": {
37
  "provider": "Cerebras",
38
- "pipeline": "text-generation",
39
- "description": "Meta's Llama 3.3 70B Instruct model via Cerebras ultra-fast LPUs",
40
- "endpoint": "https://router.huggingface.co/v1/chat/completions",
41
- "api_format": "openai_compatible"
42
- },
43
-
44
- # Cohere Models (Chat completion LLM + VLM)
45
- "cohere/command-r-plus": {
46
- "provider": "Cohere",
47
- "pipeline": "text-generation",
48
- "description": "Cohere's Command R+ enterprise-grade NLP model",
49
- "endpoint": "https://router.huggingface.co/v1/chat/completions",
50
- "api_format": "openai_compatible"
51
- },
52
-
53
- # Fal AI Models (Text-to-Image, Text-to-Video, Speech-to-Text)
54
- "black-forest-labs/FLUX.1-schnell": {
55
- "provider": "Fal AI",
56
- "pipeline": "text-to-image",
57
- "description": "FLUX.1 schnell model for fast image generation via Fal AI",
58
- "endpoint": "https://router.huggingface.co/v1/text-to-image",
59
- "api_format": "hf_router"
60
  },
61
-
62
- # Featherless AI Models (Chat completion LLM + VLM)
63
- "meta-llama/Llama-3.1-70B-Instruct": {
64
- "provider": "Featherless AI",
65
- "pipeline": "text-generation",
66
- "description": "Meta's Llama 3.1 70B Instruct via Featherless AI",
67
- "endpoint": "https://router.huggingface.co/v1/chat/completions",
68
- "api_format": "openai_compatible"
69
- },
70
-
71
- # Fireworks Models (Chat completion LLM + VLM)
72
- "accounts/fireworks/models/llama-v3p1-8b-instruct": {
73
- "provider": "Fireworks",
74
- "pipeline": "text-generation",
75
- "description": "Llama 3.1 8B Instruct via Fireworks AI production-ready serving",
76
- "endpoint": "https://router.huggingface.co/v1/chat/completions",
77
- "api_format": "openai_compatible"
78
- },
79
-
80
- # Groq Models (Chat completion LLM only)
81
  "deepseek-ai/DeepSeek-R1": {
82
- "provider": "Groq",
83
- "pipeline": "text-generation",
84
- "description": "DeepSeek R1 model via Groq hardware acceleration",
85
- "endpoint": "https://router.huggingface.co/v1/chat/completions",
86
- "api_format": "openai_compatible"
87
- },
88
-
89
- # Hyperbolic Models (Chat completion LLM + VLM)
90
- "meta-llama/Meta-Llama-3-8B-Instruct": {
91
- "provider": "Hyperbolic",
92
- "pipeline": "text-generation",
93
- "description": "Meta's Llama 3 8B Instruct via Hyperbolic",
94
- "endpoint": "https://router.huggingface.co/v1/chat/completions",
95
- "api_format": "openai_compatible"
96
- },
97
-
98
- # Nebius Models (Chat completion LLM + VLM, Feature Extraction, Text-to-Image)
99
- "mistralai/Mixtral-8x7B-Instruct-v0.1": {
100
- "provider": "Nebius",
101
- "pipeline": "text-generation",
102
- "description": "Mistral's Mixtral 8x7B Instruct via Nebius cloud platform",
103
- "endpoint": "https://router.huggingface.co/v1/chat/completions",
104
- "api_format": "openai_compatible"
105
- },
106
-
107
- # Novita Models (Chat completion LLM + VLM, Text-to-Video)
108
- "Qwen/Qwen2.5-72B-Instruct": {
109
- "provider": "Novita",
110
- "pipeline": "text-generation",
111
- "description": "Qwen 2.5 72B Instruct via Novita",
112
- "endpoint": "https://router.huggingface.co/v1/chat/completions",
113
- "api_format": "openai_compatible"
114
  },
115
-
116
- # Nscale Models (Chat completion LLM + VLM, Feature Extraction, Text-to-Image)
117
- "microsoft/Phi-3-medium-4k-instruct": {
118
- "provider": "Nscale",
119
- "pipeline": "text-generation",
120
- "description": "Microsoft Phi-3 Medium via Nscale",
121
- "endpoint": "https://router.huggingface.co/v1/chat/completions",
122
- "api_format": "openai_compatible"
123
- },
124
-
125
- # Replicate Models (Text-to-Image, Text-to-Video, Speech-to-Text)
126
- "stabilityai/stable-diffusion-xl-base-1.0": {
127
- "provider": "Replicate",
128
- "pipeline": "text-to-image",
129
- "description": "Stable Diffusion XL via Replicate cloud platform",
130
- "endpoint": "https://router.huggingface.co/v1/text-to-image",
131
- "api_format": "hf_router"
132
- },
133
-
134
- # SambaNova Models (Chat completion LLM, Feature Extraction)
135
  "meta-llama/Meta-Llama-3.1-405B-Instruct": {
136
  "provider": "SambaNova",
137
- "pipeline": "text-generation",
138
- "description": "Meta's Llama 3.1 405B Instruct via SambaNova",
139
- "endpoint": "https://router.huggingface.co/v1/chat/completions",
140
- "api_format": "openai_compatible"
141
  },
142
-
143
- # Together AI Models (Chat completion LLM + VLM, Text-to-Image)
144
  "meta-llama/Meta-Llama-3-70B-Instruct": {
145
  "provider": "Together",
146
- "pipeline": "text-generation",
147
- "description": "Meta's Llama 3 70B Instruct via Together AI high-performance inference",
148
- "endpoint": "https://router.huggingface.co/v1/chat/completions",
149
- "api_format": "openai_compatible"
150
- },
151
-
152
- # HF Inference - Additional Models for various tasks
153
- "black-forest-labs/FLUX.1-dev": {
154
- "provider": "HF Inference",
155
- "pipeline": "text-to-image",
156
- "description": "FLUX.1 development model for high-quality text-to-image generation",
157
- "endpoint": "https://api-inference.huggingface.co/models/black-forest-labs/FLUX.1-dev",
158
- "api_format": "hf_inference"
159
- },
160
- "openai/whisper-large-v3": {
161
- "provider": "HF Inference",
162
- "pipeline": "automatic-speech-recognition",
163
- "description": "Whisper Large V3 for speech recognition",
164
- "endpoint": "https://api-inference.huggingface.co/models/openai/whisper-large-v3",
165
- "api_format": "hf_inference"
166
- },
167
- "sentence-transformers/all-MiniLM-L6-v2": {
168
- "provider": "HF Inference",
169
- "pipeline": "feature-extraction",
170
- "description": "Sentence transformer for embeddings and semantic search",
171
- "endpoint": "https://api-inference.huggingface.co/models/sentence-transformers/all-MiniLM-L6-v2",
172
- "api_format": "hf_inference"
173
- },
174
- "cardiffnlp/twitter-roberta-base-sentiment-latest": {
175
- "provider": "HF Inference",
176
- "pipeline": "text-classification",
177
- "description": "Sentiment analysis model trained on Twitter data",
178
- "endpoint": "https://api-inference.huggingface.co/models/cardiffnlp/twitter-roberta-base-sentiment-latest",
179
- "api_format": "hf_inference"
180
- }
181
- }
182
-
183
- # Updated provider configuration for current HF Inference Providers ecosystem
184
- PROVIDER_CONFIG = {
185
- "HF Inference": {
186
- "description": "HuggingFace's native serverless inference API",
187
- "auth_header": "Authorization",
188
- "auth_format": "Bearer {token}",
189
- "env_var": "HF_TOKEN",
190
- "base_url": "https://api-inference.huggingface.co",
191
- "pricing": "Free tier + pay-per-use",
192
- "docs_url": "https://huggingface.co/docs/inference-providers/providers/hf-inference",
193
- "capabilities": ["Chat completion (LLM)", "Chat completion (VLM)", "Feature Extraction", "Text to Image", "Speech to text"]
194
- },
195
- "Cerebras": {
196
- "description": "Ultra-fast inference with Language Processing Units (LPUs)",
197
- "auth_header": "Authorization",
198
- "auth_format": "Bearer {token}",
199
- "env_var": "HF_TOKEN",
200
- "base_url": "https://router.huggingface.co/v1",
201
- "pricing": "Pay-per-token via HF routing",
202
- "docs_url": "https://huggingface.co/docs/inference-providers/providers/cerebras",
203
- "capabilities": ["Chat completion (LLM)"]
204
  },
205
- "Cohere": {
206
- "description": "Enterprise-grade NLP models and APIs",
207
- "auth_header": "Authorization",
208
- "auth_format": "Bearer {token}",
209
- "env_var": "HF_TOKEN",
210
- "base_url": "https://router.huggingface.co/v1",
211
- "pricing": "Pay-per-token via HF routing",
212
- "docs_url": "https://huggingface.co/docs/inference-providers/providers/cohere",
213
- "capabilities": ["Chat completion (LLM)", "Chat completion (VLM)"]
214
- },
215
- "Fal AI": {
216
- "description": "Fast and reliable model inference platform",
217
- "auth_header": "Authorization",
218
- "auth_format": "Bearer {token}",
219
- "env_var": "HF_TOKEN",
220
- "base_url": "https://router.huggingface.co/v1",
221
- "pricing": "Pay-per-token via HF routing",
222
- "docs_url": "https://huggingface.co/docs/inference-providers/providers/fal-ai",
223
- "capabilities": ["Text to Image", "Text to video", "Speech to text"]
224
- },
225
- "Featherless AI": {
226
- "description": "Optimized inference for open-source models",
227
- "auth_header": "Authorization",
228
- "auth_format": "Bearer {token}",
229
- "env_var": "HF_TOKEN",
230
- "base_url": "https://router.huggingface.co/v1",
231
- "pricing": "Pay-per-token via HF routing",
232
- "docs_url": "https://huggingface.co/docs/inference-providers/providers/featherless-ai",
233
- "capabilities": ["Chat completion (LLM)", "Chat completion (VLM)"]
234
- },
235
- "Fireworks": {
236
- "description": "Production-ready inference with fast model serving",
237
- "auth_header": "Authorization",
238
- "auth_format": "Bearer {token}",
239
- "env_var": "HF_TOKEN",
240
- "base_url": "https://router.huggingface.co/v1",
241
- "pricing": "Pay-per-token via HF routing",
242
- "docs_url": "https://huggingface.co/docs/inference-providers/providers/fireworks-ai",
243
- "capabilities": ["Chat completion (LLM)", "Chat completion (VLM)"]
244
- },
245
- "Groq": {
246
- "description": "Fast inference with specialized hardware acceleration",
247
- "auth_header": "Authorization",
248
- "auth_format": "Bearer {token}",
249
- "env_var": "HF_TOKEN",
250
- "base_url": "https://router.huggingface.co/v1",
251
- "pricing": "Pay-per-token via HF routing",
252
- "docs_url": "https://huggingface.co/docs/inference-providers/providers/groq",
253
- "capabilities": ["Chat completion (LLM)"]
254
- },
255
- "Hyperbolic": {
256
- "description": "GPU-accelerated inference platform",
257
- "auth_header": "Authorization",
258
- "auth_format": "Bearer {token}",
259
- "env_var": "HF_TOKEN",
260
- "base_url": "https://router.huggingface.co/v1",
261
- "pricing": "Pay-per-token via HF routing",
262
- "docs_url": "https://huggingface.co/docs/inference-providers/providers/hyperbolic",
263
- "capabilities": ["Chat completion (LLM)", "Chat completion (VLM)"]
264
- },
265
- "Nebius": {
266
- "description": "Cloud-based AI infrastructure platform",
267
- "auth_header": "Authorization",
268
- "auth_format": "Bearer {token}",
269
- "env_var": "HF_TOKEN",
270
- "base_url": "https://router.huggingface.co/v1",
271
- "pricing": "Pay-per-token via HF routing",
272
- "docs_url": "https://huggingface.co/docs/inference-providers/providers/nebius",
273
- "capabilities": ["Chat completion (LLM)", "Chat completion (VLM)", "Feature Extraction", "Text to Image"]
274
- },
275
- "Novita": {
276
- "description": "AI inference platform with video generation",
277
- "auth_header": "Authorization",
278
- "auth_format": "Bearer {token}",
279
- "env_var": "HF_TOKEN",
280
- "base_url": "https://router.huggingface.co/v1",
281
- "pricing": "Pay-per-token via HF routing",
282
- "docs_url": "https://huggingface.co/docs/inference-providers/providers/novita",
283
- "capabilities": ["Chat completion (LLM)", "Chat completion (VLM)", "Text to video"]
284
- },
285
- "Nscale": {
286
- "description": "Scalable AI model deployment platform",
287
- "auth_header": "Authorization",
288
- "auth_format": "Bearer {token}",
289
- "env_var": "HF_TOKEN",
290
- "base_url": "https://router.huggingface.co/v1",
291
- "pricing": "Pay-per-token via HF routing",
292
- "docs_url": "https://huggingface.co/docs/inference-providers/providers/nscale",
293
- "capabilities": ["Chat completion (LLM)", "Chat completion (VLM)", "Feature Extraction", "Text to Image"]
294
- },
295
- "Replicate": {
296
- "description": "Run models in the cloud with simple API",
297
- "auth_header": "Authorization",
298
- "auth_format": "Bearer {token}",
299
- "env_var": "HF_TOKEN",
300
- "base_url": "https://router.huggingface.co/v1",
301
- "pricing": "Pay-per-token via HF routing",
302
- "docs_url": "https://huggingface.co/docs/inference-providers/providers/replicate",
303
- "capabilities": ["Text to Image", "Text to video", "Speech to text"]
304
  },
305
- "SambaNova": {
306
- "description": "Enterprise AI platform with DataFlow architecture",
307
- "auth_header": "Authorization",
308
- "auth_format": "Bearer {token}",
309
- "env_var": "HF_TOKEN",
310
- "base_url": "https://router.huggingface.co/v1",
311
- "pricing": "Pay-per-token via HF routing",
312
- "docs_url": "https://huggingface.co/docs/inference-providers/providers/sambanova",
313
- "capabilities": ["Chat completion (LLM)", "Feature Extraction"]
314
  },
315
- "Together": {
316
- "description": "High-performance inference for open-source models",
317
- "auth_header": "Authorization",
318
- "auth_format": "Bearer {token}",
319
- "env_var": "HF_TOKEN",
320
- "base_url": "https://router.huggingface.co/v1",
321
- "pricing": "Pay-per-token via HF routing",
322
- "docs_url": "https://huggingface.co/docs/inference-providers/providers/together",
323
- "capabilities": ["Chat completion (LLM)", "Chat completion (VLM)", "Text to Image"]
324
  }
325
  }
326
 
327
- class ModernHFInferenceExplorer:
328
  def __init__(self):
329
- self.allowed_models = ALLOWED_MODELS
330
- self.provider_config = PROVIDER_CONFIG
331
  self.hf_token = os.getenv("HF_TOKEN")
332
-
333
  if not self.hf_token:
334
- raise ValueError("HF_TOKEN environment variable is required for HuggingFace Inference Providers")
335
-
336
- self.headers = {"Authorization": f"Bearer {self.hf_token}"}
337
 
338
- def get_available_models(self) -> List[Dict]:
339
- """Get the predefined allowed models with provider info and live status"""
340
- models = []
341
- for model_id, model_info in self.allowed_models.items():
342
- provider = model_info["provider"]
343
-
344
- models.append({
345
- "model_id": model_id,
346
- "provider": provider,
347
- "pipeline": model_info["pipeline"],
348
- "description": model_info["description"],
349
- "endpoint": model_info["endpoint"],
350
- "api_format": model_info["api_format"],
351
- "status": self._check_model_status(model_id, provider),
352
- "pricing": self.provider_config[provider]["pricing"]
353
- })
354
 
355
- return models
356
-
357
- def _check_model_status(self, model_id: str, provider: str) -> str:
358
- """Check if a specific model is currently available via HF Inference Providers"""
359
- try:
360
- # For models using the new HF Router API
361
- if provider in ["Cerebras", "Groq", "Together", "Fireworks", "Replicate", "Cohere", "Fal AI"]:
362
- # Use the models endpoint to check availability
363
- url = "https://router.huggingface.co/v1/models"
364
- response = requests.get(url, headers=self.headers, timeout=5)
365
-
366
- if response.status_code == 200:
367
- available_models = response.json()
368
- if isinstance(available_models, dict) and "data" in available_models:
369
- model_ids = [m["id"] for m in available_models["data"]]
370
- return "✅ Available" if model_id in model_ids else "❓ Check Provider"
371
- return "✅ Available"
372
- else:
373
- return "❓ Unknown"
374
-
375
- # For traditional HF Inference API models
376
- elif provider == "HF Inference":
377
- url = f"https://api-inference.huggingface.co/models/{model_id}"
378
- response = requests.get(url, headers=self.headers, timeout=5)
379
-
380
- if response.status_code == 200:
381
- return "✅ Available"
382
- elif response.status_code == 503:
383
- return "🔄 Loading"
384
- else:
385
- return "❌ Unavailable"
386
-
387
- return "❓ Unknown"
388
-
389
- except Exception:
390
- return "❓ Connection Error"
391
-
392
- def test_model_inference(self, model_id: str, input_text: str) -> Dict:
393
- """Test inference on a specific allowed model using current HF Inference Providers API"""
394
- if model_id not in self.allowed_models:
395
  return {
396
- "status": "error",
397
- "error": f"Model '{model_id}' is not in the allowed models list",
398
- "response_time": None
399
  }
400
 
401
- model_info = self.allowed_models[model_id]
402
- api_format = model_info["api_format"]
 
 
 
 
 
 
 
 
 
 
 
 
 
403
 
404
  try:
405
- start_time = time.time()
 
 
 
 
 
406
 
407
- if api_format == "openai_compatible":
408
- # Use the new OpenAI-compatible chat completions endpoint
409
- result = self._test_openai_compatible_model(model_id, input_text)
410
- elif api_format == "hf_inference":
411
- # Use traditional HF Inference API
412
- result = self._test_hf_inference_model(model_id, input_text, model_info)
413
- elif api_format == "hf_router":
414
- # Use HF Router for other tasks
415
- result = self._test_hf_router_model(model_id, input_text, model_info)
 
 
 
 
 
 
416
  else:
417
  return {
418
- "status": "error",
419
- "error": f"Unsupported API format: {api_format}",
420
- "response_time": None
421
  }
422
-
423
- result["response_time"] = time.time() - start_time
424
- return result
425
 
426
  except Exception as e:
427
  return {
428
- "status": "error",
429
- "error": str(e),
430
- "response_time": time.time() - start_time if 'start_time' in locals() else None
431
  }
432
-
433
- def _test_openai_compatible_model(self, model_id: str, input_text: str) -> Dict:
434
- """Test model using OpenAI-compatible chat completions API"""
435
- url = "https://router.huggingface.co/v1/chat/completions"
436
-
437
- payload = {
438
- "model": model_id,
439
- "messages": [
440
- {"role": "user", "content": input_text}
441
- ],
442
- "max_tokens": 100,
443
- "temperature": 0.7
444
- }
445
-
446
- response = requests.post(url, headers=self.headers, json=payload, timeout=30)
447
-
448
- if response.status_code == 200:
449
- return {
450
- "status": "success",
451
- "result": response.json()
452
- }
453
- else:
454
- return {
455
- "status": "error",
456
- "error": f"HTTP {response.status_code}: {response.text}"
457
- }
458
-
459
- def _test_hf_inference_model(self, model_id: str, input_text: str, model_info: Dict) -> Dict:
460
- """Test model using traditional HF Inference API"""
461
- url = model_info["endpoint"]
462
-
463
- # Adjust payload based on pipeline type
464
- pipeline = model_info["pipeline"]
465
- if pipeline in ["text-generation", "text2text-generation"]:
466
- payload = {"inputs": input_text, "parameters": {"max_new_tokens": 100}}
467
- elif pipeline == "text-to-image":
468
- payload = {"inputs": input_text}
469
- elif pipeline == "feature-extraction":
470
- payload = {"inputs": input_text}
471
- else:
472
- payload = {"inputs": input_text}
473
-
474
- response = requests.post(url, headers=self.headers, json=payload, timeout=30)
475
-
476
- if response.status_code == 200:
477
- return {
478
- "status": "success",
479
- "result": response.json()
480
- }
481
- else:
482
- return {
483
- "status": "error",
484
- "error": f"HTTP {response.status_code}: {response.text}"
485
- }
486
-
487
- def _test_hf_router_model(self, model_id: str, input_text: str, model_info: Dict) -> Dict:
488
- """Test model using HF Router API for specialized tasks"""
489
- pipeline = model_info["pipeline"]
490
-
491
- if pipeline == "text-to-image":
492
- # Use the text-to-image endpoint via HF Router
493
- payload = {
494
- "model": model_id,
495
- "prompt": input_text,
496
- "num_inference_steps": 20
497
- }
498
- # Note: This would need to be implemented based on actual HF Router text-to-image API
499
- return {
500
- "status": "info",
501
- "result": "Text-to-image testing via HF Router not fully implemented in demo"
502
- }
503
-
504
- return {
505
- "status": "error",
506
- "error": f"HF Router testing not implemented for pipeline: {pipeline}"
507
- }
508
 
509
- def create_interface():
510
  try:
511
- explorer = ModernHFInferenceExplorer()
512
  except ValueError as e:
513
- # Create a dummy interface that shows the error
514
- with gr.Blocks(title="❌ Configuration Error") as demo:
515
  gr.Markdown(f"""
516
- # ❌ Configuration Error
517
 
518
- **Error:** {str(e)}
519
 
520
  Please set the `HF_TOKEN` environment variable with your HuggingFace token.
521
 
522
- You can get a token from: https://huggingface.co/settings/tokens
523
  """)
524
  return demo
525
 
526
- def get_models_by_provider(provider_filter: str = "All"):
527
- """Get models filtered by provider"""
528
- models = explorer.get_available_models()
529
-
530
- if provider_filter != "All":
531
- models = [m for m in models if m['provider'] == provider_filter]
532
-
533
- if not models:
534
- return "No models found for the selected provider"
535
-
536
- df = pd.DataFrame(models)
537
- return df
538
 
539
- def get_models_by_pipeline(pipeline_filter: str = "All"):
540
- """Get models filtered by pipeline"""
541
- models = explorer.get_available_models()
542
-
543
- if pipeline_filter != "All":
544
- models = [m for m in models if m['pipeline'] == pipeline_filter]
545
-
546
- if not models:
547
- return "No models found for the selected pipeline"
548
-
549
- df = pd.DataFrame(models)
550
- return df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
551
 
552
- def test_model(model_id: str, test_input: str):
553
- """Test inference on a selected model"""
554
- if not model_id or model_id.strip() == "":
555
- return "Please select a model ID from the dropdown"
556
-
557
- if model_id not in explorer.allowed_models:
558
- available_models = "\n".join([f"- {mid}" for mid in explorer.allowed_models.keys()])
559
- return f"""
560
- **Error:** Model '{model_id}' is not in the allowed models list.
561
-
562
- **Available models:**
563
- {available_models}
564
- """
565
-
566
- if not test_input.strip():
567
- test_input = "Hello, how are you today?"
568
-
569
- result = explorer.test_model_inference(model_id, test_input)
570
-
571
- model_info = explorer.allowed_models[model_id]
572
-
573
- if result["status"] == "success":
574
- return f"""
575
- **Model:** {model_id}
576
- **Provider:** {model_info['provider']}
577
- **Pipeline:** {model_info['pipeline']}
578
- **API Format:** {model_info['api_format']}
579
- **Status:** ✅ Success
580
- **Response Time:** {result['response_time']:.2f}s
581
-
582
- **Result:**
583
- ```json
584
- {json.dumps(result['result'], indent=2)}
585
- ```
586
- """
587
- elif result["status"] == "info":
588
- return f"""
589
- **Model:** {model_id}
590
- **Provider:** {model_info['provider']}
591
- **Pipeline:** {model_info['pipeline']}
592
- **Status:** ℹ️ Info
593
- **Response Time:** {result['response_time']:.2f}s if result['response_time'] else 'N/A'
594
 
595
- **Info:**
596
- {result['result']}
597
- """
598
- else:
599
- return f"""
600
- **Model:** {model_id}
601
- **Provider:** {model_info['provider']}
602
- **Pipeline:** {model_info['pipeline']}
603
- **Status:** ❌ Error
604
- **Response Time:** {result['response_time']:.2f}s if result['response_time'] else 'N/A'
605
 
606
- **Error:**
607
- {result['error']}
608
  """
609
 
610
- def get_provider_status():
611
- """Get comprehensive status of all providers"""
612
- status_info = []
613
-
614
- for provider, config in explorer.provider_config.items():
615
- model_count = len([m for m in explorer.allowed_models.values() if m["provider"] == provider])
616
- capabilities_str = ", ".join(config.get("capabilities", ["N/A"]))
617
-
618
- status_info.append({
619
- "Provider": provider,
620
- "Description": config["description"],
621
- "Capabilities": capabilities_str,
622
- "Models Available": model_count,
623
- "Pricing": config["pricing"],
624
- "Documentation": config["docs_url"]
625
- })
 
 
626
 
627
- return pd.DataFrame(status_info)
628
-
629
- # Get unique providers and pipelines for filters
630
- providers = ["All"] + list(set(model["provider"] for model in explorer.allowed_models.values()))
631
- pipelines = ["All"] + list(set(model["pipeline"] for model in explorer.allowed_models.values()))
632
- model_ids = list(explorer.allowed_models.keys())
633
-
634
- # Create Gradio interface
635
- with gr.Blocks(title="🤗 HuggingFace Inference Providers Explorer", theme=gr.themes.Soft()) as demo:
636
  gr.Markdown("""
637
- # 🤗 HuggingFace Inference Providers Explorer
638
-
639
- **Modern Inference Ecosystem**: Explore models from HuggingFace's unified inference providers platform!
640
 
641
- ## 🚀 Current Inference Providers:
642
- - **HF Inference**: Native serverless inference API (free tier available)
643
- - **Cerebras**: Ultra-fast LPU-powered inference
644
- - **Groq**: Hardware-accelerated language processing
645
- - **Together AI**: High-performance open-source models
646
- - **Fireworks AI**: Production-ready model serving
647
- - **Replicate**: Cloud-based model deployment
648
- - **Cohere**: Enterprise NLP models
649
- - **Fal AI**: Fast and reliable inference
650
 
651
- All providers use **HuggingFace routing** with unified billing and authentication!
 
 
 
 
652
 
653
- ---
654
  """)
655
 
656
- with gr.Tabs():
657
- # Provider Status Tab
658
- with gr.TabItem("🏢 Provider Overview"):
659
- gr.Markdown("### HuggingFace Inference Providers Status")
660
-
661
- status_btn = gr.Button("📊 View Provider Details", variant="primary")
662
- provider_status_output = gr.Dataframe(
663
- headers=["Provider", "Description", "Capabilities", "Models", "Pricing", "Documentation"],
664
- label="Provider Information"
665
- )
666
-
667
- status_btn.click(get_provider_status, outputs=provider_status_output)
668
-
669
- # Models by Provider Tab
670
- with gr.TabItem("🔍 Browse by Provider"):
671
- gr.Markdown("### Models Available by Provider")
672
 
673
- provider_filter = gr.Dropdown(
674
- choices=providers,
675
- value="All",
676
- label="Select Provider"
 
677
  )
678
 
679
- provider_models_btn = gr.Button("📋 Show Models", variant="primary")
680
- provider_models_output = gr.Dataframe(
681
- headers=["Model ID", "Provider", "Pipeline", "Description", "API Format", "Status", "Pricing"],
682
- label="Models by Provider"
683
  )
684
 
685
- provider_models_btn.click(
686
- get_models_by_provider,
687
- inputs=provider_filter,
688
- outputs=provider_models_output
 
689
  )
690
 
691
- # Models by Pipeline Tab
692
- with gr.TabItem("⚙️ Browse by Task"):
693
- gr.Markdown("### Models Available by Task/Pipeline")
694
-
695
- pipeline_filter = gr.Dropdown(
696
- choices=pipelines,
697
- value="All",
698
- label="Select Task/Pipeline"
699
- )
700
 
701
- pipeline_models_btn = gr.Button("📋 Show Models", variant="primary")
702
- pipeline_models_output = gr.Dataframe(
703
- headers=["Model ID", "Provider", "Pipeline", "Description", "API Format", "Status"],
704
- label="Models by Task"
 
 
705
  )
706
 
707
- pipeline_models_btn.click(
708
- get_models_by_pipeline,
709
- inputs=pipeline_filter,
710
- outputs=pipeline_models_output
711
- )
712
-
713
- # Model Testing Tab
714
- with gr.TabItem("🧪 Test Models"):
715
- gr.Markdown("### Test Live Model Inference")
716
-
717
  with gr.Row():
718
- model_id_dropdown = gr.Dropdown(
719
- choices=model_ids,
720
- label="Select Model",
721
- info="Choose from curated inference provider models"
 
722
  )
723
- test_input = gr.Textbox(
724
- placeholder="Hello, how are you today?",
725
- label="Test Input",
726
- info="Text to send to the model"
727
- )
728
-
729
- test_btn = gr.Button("🚀 Test Model", variant="primary")
730
- test_output = gr.Markdown(label="Inference Results")
731
-
732
- test_btn.click(
733
- test_model,
734
- inputs=[model_id_dropdown, test_input],
735
- outputs=test_output
736
- )
737
-
738
- # All Models Tab
739
- with gr.TabItem("📊 All Available Models"):
740
- gr.Markdown("### Complete Model Catalog")
741
-
742
- all_models_btn = gr.Button("📋 Load All Models", variant="primary")
743
- all_models_output = gr.Dataframe(
744
- headers=["Model ID", "Provider", "Pipeline", "Description", "API Format", "Status", "Pricing"],
745
- label="Complete Model Catalog"
746
- )
747
 
748
- all_models_btn.click(
749
- lambda: get_models_by_provider("All"),
750
- outputs=all_models_output
751
- )
 
 
 
 
 
 
 
 
 
 
 
752
 
753
- # Footer
754
- gr.Markdown(f"""
755
- ---
 
 
 
 
756
 
757
- ## 🔧 Setup Instructions:
 
758
 
759
- 1. **Get HuggingFace Token**: Visit [HF Settings](https://huggingface.co/settings/tokens)
760
- 2. **Set Environment Variable**: `export HF_TOKEN=hf_your_token_here`
761
- 3. **Start Testing**: All providers use unified HF authentication!
762
 
763
- ## 📋 Current Statistics:
764
 
765
- - **Total Models**: {len(explorer.allowed_models)}
766
- - **Providers**: {len(explorer.provider_config)}
767
- - **Pipelines**: {len(set(model['pipeline'] for model in explorer.allowed_models.values()))}
 
 
 
 
768
 
769
- ## 🔗 Useful Links:
770
 
771
- - 📚 [Inference Providers Docs](https://huggingface.co/docs/inference-providers/index)
772
- - 💰 [Pricing Information](https://huggingface.co/docs/inference-providers/pricing-and-billing)
773
- - 🔑 [Authentication Guide](https://huggingface.co/docs/inference-providers/get-started#authentication)
774
- - 🌟 [Provider Comparison](https://huggingface.co/inference-providers/models)
775
 
776
  ---
777
 
778
- *Powered by HuggingFace Inference Providers - Unified access to the best AI models!*
779
  """)
780
 
781
  return demo
782
 
783
  if __name__ == "__main__":
784
  try:
785
- demo = create_interface()
786
  demo.launch(
787
  server_name="0.0.0.0",
788
  server_port=7860,
789
  share=False
790
  )
791
  except Exception as e:
792
- print(f"Error starting application: {e}")
793
  print("Please ensure HF_TOKEN environment variable is set.")
 
1
  import gradio as gr
2
  import requests
3
  import os
 
4
  import json
5
  from typing import List, Dict, Optional
6
  import time
 
7
 
8
+ # Curated selection of advanced AI models for general users
9
+ ADVANCED_MODELS = {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  "meta-llama/Llama-3.3-70B-Instruct": {
11
  "provider": "Cerebras",
12
+ "display_name": "Llama 3.3 70B (Ultra Fast)",
13
+ "description": "Meta's latest and most capable model, optimized for speed",
14
+ "category": "General Purpose",
15
+ "endpoint": "https://router.huggingface.co/v1/chat/completions"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  "deepseek-ai/DeepSeek-R1": {
18
+ "provider": "Groq",
19
+ "display_name": "DeepSeek R1 (Reasoning)",
20
+ "description": "Advanced reasoning model for complex problem solving",
21
+ "category": "Reasoning & Analysis",
22
+ "endpoint": "https://router.huggingface.co/v1/chat/completions"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  "meta-llama/Meta-Llama-3.1-405B-Instruct": {
25
  "provider": "SambaNova",
26
+ "display_name": "Llama 3.1 405B (Most Powerful)",
27
+ "description": "Meta's largest and most capable language model",
28
+ "category": "Expert Level",
29
+ "endpoint": "https://router.huggingface.co/v1/chat/completions"
30
  },
 
 
31
  "meta-llama/Meta-Llama-3-70B-Instruct": {
32
  "provider": "Together",
33
+ "display_name": "Llama 3 70B (Balanced)",
34
+ "description": "Excellent balance of capability and speed",
35
+ "category": "General Purpose",
36
+ "endpoint": "https://router.huggingface.co/v1/chat/completions"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  },
38
+ "cohere/command-r-plus": {
39
+ "provider": "Cohere",
40
+ "display_name": "Command R+ (Enterprise)",
41
+ "description": "Enterprise-grade model for professional use",
42
+ "category": "Business & Professional",
43
+ "endpoint": "https://router.huggingface.co/v1/chat/completions"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  },
45
+ "Qwen/Qwen2.5-72B-Instruct": {
46
+ "provider": "Novita",
47
+ "display_name": "Qwen 2.5 72B (Multilingual)",
48
+ "description": "Excellent for multiple languages and coding",
49
+ "category": "Multilingual & Code",
50
+ "endpoint": "https://router.huggingface.co/v1/chat/completions"
 
 
 
51
  },
52
+ "mistralai/Mixtral-8x7B-Instruct-v0.1": {
53
+ "provider": "Nebius",
54
+ "display_name": "Mixtral 8x7B (Efficient)",
55
+ "description": "Fast and efficient for everyday tasks",
56
+ "category": "Daily Tasks",
57
+ "endpoint": "https://router.huggingface.co/v1/chat/completions"
 
 
 
58
  }
59
  }
60
 
61
+ class AIChat:
62
  def __init__(self):
 
 
63
  self.hf_token = os.getenv("HF_TOKEN")
 
64
  if not self.hf_token:
65
+ raise ValueError("HF_TOKEN environment variable is required")
 
 
66
 
67
+ self.headers = {
68
+ "Authorization": f"Bearer {self.hf_token}",
69
+ "Content-Type": "application/json"
70
+ }
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
+ def send_message(self, model_id: str, message: str, conversation_history: List = None) -> Dict:
73
+ """Send a chat message to the selected AI model"""
74
+ if model_id not in ADVANCED_MODELS:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  return {
76
+ "success": False,
77
+ "error": "Selected model is not available"
 
78
  }
79
 
80
+ model_info = ADVANCED_MODELS[model_id]
81
+
82
+ # Build conversation with history
83
+ messages = []
84
+ if conversation_history:
85
+ messages.extend(conversation_history)
86
+ messages.append({"role": "user", "content": message})
87
+
88
+ payload = {
89
+ "model": model_id,
90
+ "messages": messages,
91
+ "max_tokens": 1000,
92
+ "temperature": 0.7,
93
+ "stream": False
94
+ }
95
 
96
  try:
97
+ response = requests.post(
98
+ model_info["endpoint"],
99
+ headers=self.headers,
100
+ json=payload,
101
+ timeout=60
102
+ )
103
 
104
+ if response.status_code == 200:
105
+ result = response.json()
106
+ if "choices" in result and len(result["choices"]) > 0:
107
+ ai_response = result["choices"][0]["message"]["content"]
108
+ return {
109
+ "success": True,
110
+ "response": ai_response,
111
+ "model": model_info["display_name"],
112
+ "provider": model_info["provider"]
113
+ }
114
+ else:
115
+ return {
116
+ "success": False,
117
+ "error": "No response generated"
118
+ }
119
  else:
120
  return {
121
+ "success": False,
122
+ "error": f"API Error: {response.status_code} - {response.text}"
 
123
  }
 
 
 
124
 
125
  except Exception as e:
126
  return {
127
+ "success": False,
128
+ "error": f"Connection error: {str(e)}"
 
129
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
 
131
+ def create_chat_interface():
132
  try:
133
+ chat_ai = AIChat()
134
  except ValueError as e:
135
+ # Create error interface
136
+ with gr.Blocks(title="❌ Setup Required") as demo:
137
  gr.Markdown(f"""
138
+ # ❌ Setup Required
139
 
140
+ **{str(e)}**
141
 
142
  Please set the `HF_TOKEN` environment variable with your HuggingFace token.
143
 
144
+ Get your token at: https://huggingface.co/settings/tokens
145
  """)
146
  return demo
147
 
148
+ # Create model choices for dropdown
149
+ model_choices = [
150
+ (f"🚀 {info['display_name']} - {info['description']}", model_id)
151
+ for model_id, info in ADVANCED_MODELS.items()
152
+ ]
 
 
 
 
 
 
 
153
 
154
+ def chat_with_ai(message, history, selected_model):
155
+ """Handle chat conversation"""
156
+ if not message.strip():
157
+ return history, ""
158
+
159
+ if not selected_model:
160
+ history.append([message, "❌ Please select an AI model first"])
161
+ return history, ""
162
+
163
+ # Show typing indicator
164
+ history.append([message, "🤔 Thinking..."])
165
+ yield history, ""
166
+
167
+ # Convert gradio history to API format
168
+ conversation_history = []
169
+ for i, (user_msg, ai_msg) in enumerate(history[:-1]): # Exclude the current "thinking" message
170
+ if user_msg and ai_msg and ai_msg != "🤔 Thinking...":
171
+ conversation_history.append({"role": "user", "content": user_msg})
172
+ conversation_history.append({"role": "assistant", "content": ai_msg})
173
+
174
+ # Send message to AI
175
+ result = chat_ai.send_message(selected_model, message, conversation_history)
176
+
177
+ if result["success"]:
178
+ # Update the last message with the real response
179
+ history[-1] = [message, result["response"]]
180
+ yield history, ""
181
+ else:
182
+ # Update with error message
183
+ history[-1] = [message, f"❌ Error: {result['error']}"]
184
+ yield history, ""
185
 
186
+ def clear_chat():
187
+ """Clear the chat history"""
188
+ return [], ""
189
+
190
+ def get_model_info(selected_model):
191
+ """Get information about the selected model"""
192
+ if not selected_model or selected_model not in ADVANCED_MODELS:
193
+ return "Select a model to see details"
194
+
195
+ info = ADVANCED_MODELS[selected_model]
196
+ return f"""
197
+ **🤖 {info['display_name']}**
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
 
199
+ **Provider:** {info['provider']}
200
+ **Category:** {info['category']}
201
+ **Description:** {info['description']}
 
 
 
 
 
 
 
202
 
203
+ Ready to chat! Type your message below.
 
204
  """
205
 
206
+ # Create the interface
207
+ with gr.Blocks(
208
+ title="🤖 Chat with Advanced AI Models",
209
+ theme=gr.themes.Soft(),
210
+ css="""
211
+ .chat-container {
212
+ max-width: 1000px;
213
+ margin: 0 auto;
214
+ }
215
+ .model-info {
216
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
217
+ color: white;
218
+ padding: 15px;
219
+ border-radius: 10px;
220
+ margin: 10px 0;
221
+ }
222
+ """
223
+ ) as demo:
224
 
 
 
 
 
 
 
 
 
 
225
  gr.Markdown("""
226
+ # 🤖 Chat with Advanced AI Models
 
 
227
 
228
+ **Experience the latest AI technology!** Choose from powerful models and start chatting instantly.
 
 
 
 
 
 
 
 
229
 
230
+ **What you can do:**
231
+ - Ask questions and get intelligent answers
232
+ - Get help with writing, analysis, and creative tasks
233
+ - Solve problems and get explanations
234
+ - Have natural conversations
235
 
 
236
  """)
237
 
238
+ with gr.Row():
239
+ # Left column - Model selection
240
+ with gr.Column(scale=1):
241
+ gr.Markdown("### 🎯 Choose Your AI")
 
 
 
 
 
 
 
 
 
 
 
 
242
 
243
+ model_selector = gr.Dropdown(
244
+ choices=model_choices,
245
+ label="Select AI Model",
246
+ info="Each model has different strengths",
247
+ interactive=True
248
  )
249
 
250
+ model_info_display = gr.Markdown(
251
+ "Select a model to see details",
252
+ elem_classes=["model-info"]
 
253
  )
254
 
255
+ # Update model info when selection changes
256
+ model_selector.change(
257
+ get_model_info,
258
+ inputs=model_selector,
259
+ outputs=model_info_display
260
  )
261
 
262
+ # Right column - Chat interface
263
+ with gr.Column(scale=2):
264
+ gr.Markdown("### 💬 Chat Interface")
 
 
 
 
 
 
265
 
266
+ chatbot = gr.Chatbot(
267
+ label="Conversation",
268
+ height=400,
269
+ show_label=False,
270
+ container=True,
271
+ elem_classes=["chat-container"]
272
  )
273
 
 
 
 
 
 
 
 
 
 
 
274
  with gr.Row():
275
+ message_input = gr.Textbox(
276
+ placeholder="Type your message here...",
277
+ label="Your Message",
278
+ scale=4,
279
+ lines=1
280
  )
281
+ send_btn = gr.Button("Send 📤", variant="primary", scale=1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
282
 
283
+ with gr.Row():
284
+ clear_btn = gr.Button("Clear Chat 🗑️", variant="secondary")
285
+
286
+ # Chat functionality
287
+ def submit_message(message, history, model):
288
+ return chat_with_ai(message, history, model)
289
+
290
+ # Send message on button click or enter
291
+ send_btn.click(
292
+ submit_message,
293
+ inputs=[message_input, chatbot, model_selector],
294
+ outputs=[chatbot, message_input]
295
+ ).then(
296
+ lambda: "", outputs=message_input # Clear input after sending
297
+ )
298
 
299
+ message_input.submit(
300
+ submit_message,
301
+ inputs=[message_input, chatbot, model_selector],
302
+ outputs=[chatbot, message_input]
303
+ ).then(
304
+ lambda: "", outputs=message_input # Clear input after sending
305
+ )
306
 
307
+ # Clear chat
308
+ clear_btn.click(clear_chat, outputs=[chatbot, message_input])
309
 
310
+ # Footer
311
+ gr.Markdown("""
312
+ ---
313
 
314
+ ## 🚀 **Featured AI Models:**
315
 
316
+ - **🚀 Ultra Fast**: Llama 3.3 70B on Cerebras chips
317
+ - **🧠 Reasoning**: DeepSeek R1 for complex problem solving
318
+ - **💪 Most Powerful**: Llama 3.1 405B for expert tasks
319
+ - **⚖️ Balanced**: Llama 3 70B for everyday use
320
+ - **💼 Enterprise**: Command R+ for professional work
321
+ - **🌍 Multilingual**: Qwen 2.5 72B for global communication
322
+ - **⚡ Efficient**: Mixtral 8x7B for quick responses
323
 
324
+ ## 💡 **Tips for Better Conversations:**
325
 
326
+ - Be specific about what you want
327
+ - Ask follow-up questions for deeper insights
328
+ - Try different models for different types of tasks
329
+ - Use clear, natural language
330
 
331
  ---
332
 
333
+ *Powered by HuggingFace Inference Providers* 🤗
334
  """)
335
 
336
  return demo
337
 
338
  if __name__ == "__main__":
339
  try:
340
+ demo = create_chat_interface()
341
  demo.launch(
342
  server_name="0.0.0.0",
343
  server_port=7860,
344
  share=False
345
  )
346
  except Exception as e:
347
+ print(f"Error starting chat application: {e}")
348
  print("Please ensure HF_TOKEN environment variable is set.")