Spaces:
Running
Running
feat(app): add Gradio AI chat and image generation app
Browse files- [add] Implement new Gradio web application (app.py)
- [feat] Add chat completion function with token management (app.py:chat_respond())
- [feat] Add image generation function with token management (app.py:generate_image())
- [feat] Implement image dimension validation utility (app.py:validate_dimensions())
- [feat] Set up Gradio UI with Chat Assistant and Image Generator tabs (app.py)
- [feat] Add handler for image generation button (app.py:on_generate_image())
- [add] Create new module for HF-Inferoxy proxy token utilities (hf_token_utils.py)
- [add] Define function to provision proxy tokens (hf_token_utils.py:get_proxy_token())
- [add] Define function to report token usage status (hf_token_utils.py:report_token_status())
- .gitattributes +35 -0
- README.md +272 -0
- app.py +432 -0
- hf_token_utils.py +83 -0
- requirements.txt +4 -0
.gitattributes
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
|
@@ -0,0 +1,272 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: HF-Inferoxy AI Hub
|
| 3 |
+
emoji: 🚀
|
| 4 |
+
colorFrom: purple
|
| 5 |
+
colorTo: blue
|
| 6 |
+
sdk: gradio
|
| 7 |
+
app_file: app.py
|
| 8 |
+
pinned: false
|
| 9 |
+
---
|
| 10 |
+
|
| 11 |
+
# 🚀 HF-Inferoxy AI Hub
|
| 12 |
+
|
| 13 |
+
A comprehensive AI platform that combines conversational AI and text-to-image generation capabilities with intelligent HuggingFace API token management through HF-Inferoxy.
|
| 14 |
+
|
| 15 |
+
## ✨ Features
|
| 16 |
+
|
| 17 |
+
### 💬 Chat Assistant
|
| 18 |
+
- **🤖 Smart Conversations**: Advanced chat interface with streaming responses
|
| 19 |
+
- **🎯 Model Flexibility**: Support for any HuggingFace chat model
|
| 20 |
+
- **⚙️ Customizable Parameters**: Control temperature, top-p, max tokens, and system messages
|
| 21 |
+
- **🌐 Multi-Provider Support**: Works with Cerebras, Cohere, Groq, Together, and more
|
| 22 |
+
|
| 23 |
+
### 🎨 Image Generator
|
| 24 |
+
- **🖼️ Text-to-Image Generation**: Create stunning images from text descriptions
|
| 25 |
+
- **🎛️ Advanced Controls**: Fine-tune dimensions, inference steps, guidance scale, and seeds
|
| 26 |
+
- **🎯 Multiple Providers**: HF Inference, Fal.ai, Nebius, NScale, Replicate, Together
|
| 27 |
+
- **📱 Beautiful UI**: Modern interface with preset configurations and examples
|
| 28 |
+
|
| 29 |
+
### 🔄 Smart Token Management
|
| 30 |
+
- **🚀 Automatic Token Provisioning**: No manual token management required
|
| 31 |
+
- **⚡ Intelligent Rotation**: Automatic switching when tokens fail or reach limits
|
| 32 |
+
- **🛡️ Error Resilience**: Failed tokens are quarantined and replaced seamlessly
|
| 33 |
+
- **📊 Usage Tracking**: Comprehensive monitoring of token usage and errors
|
| 34 |
+
|
| 35 |
+
## 🛠️ Setup
|
| 36 |
+
|
| 37 |
+
### 1. HuggingFace Space Secrets
|
| 38 |
+
|
| 39 |
+
Add the following secret to your HuggingFace Space:
|
| 40 |
+
|
| 41 |
+
- **Key**: `PROXY_KEY`
|
| 42 |
+
- **Value**: Your HF-Inferoxy proxy API key
|
| 43 |
+
|
| 44 |
+
### 2. HF-Inferoxy Server
|
| 45 |
+
|
| 46 |
+
The app is configured to use the HF-Inferoxy server at: `http://scw.nazdev.tech:11155`
|
| 47 |
+
|
| 48 |
+
### 3. Dependencies
|
| 49 |
+
|
| 50 |
+
The app requires:
|
| 51 |
+
- `gradio` - Modern web interface framework
|
| 52 |
+
- `huggingface-hub` - HuggingFace API integration
|
| 53 |
+
- `requests` - HTTP communication with the proxy
|
| 54 |
+
- `Pillow` - Image processing capabilities
|
| 55 |
+
- `torch` & `transformers` - Model support
|
| 56 |
+
|
| 57 |
+
## 🎯 How It Works
|
| 58 |
+
|
| 59 |
+
### Token Management Flow
|
| 60 |
+
1. **Token Provisioning**: The app requests a valid token from the HF-Inferoxy server
|
| 61 |
+
2. **API Calls**: Uses the provisioned token for HuggingFace API requests
|
| 62 |
+
3. **Status Reporting**: Reports token usage success/failure back to the proxy
|
| 63 |
+
4. **Automatic Rotation**: HF-Inferoxy handles token rotation and error management
|
| 64 |
+
|
| 65 |
+
### Chat Assistant
|
| 66 |
+
1. **Model Selection**: Choose any HuggingFace model with optional provider specification
|
| 67 |
+
2. **Conversation**: Engage in natural conversations with streaming responses
|
| 68 |
+
3. **Customization**: Adjust the AI's personality with system messages and parameters
|
| 69 |
+
|
| 70 |
+
### Image Generation
|
| 71 |
+
1. **Prompt Creation**: Write detailed descriptions of desired images
|
| 72 |
+
2. **Model & Provider**: Select from preset combinations or specify custom ones
|
| 73 |
+
3. **Parameter Tuning**: Fine-tune generation settings for optimal results
|
| 74 |
+
4. **Image Creation**: Generate high-quality images with automatic token management
|
| 75 |
+
|
| 76 |
+
## 🌟 Supported Models & Providers
|
| 77 |
+
|
| 78 |
+
### Chat Models
|
| 79 |
+
|
| 80 |
+
| Model | Provider | Description |
|
| 81 |
+
|-------|----------|-------------|
|
| 82 |
+
| `openai/gpt-oss-20b` | Fireworks AI, Cerebras, Groq | Fast general purpose model |
|
| 83 |
+
| `meta-llama/Llama-2-7b-chat-hf` | HF Inference | Chat-optimized model |
|
| 84 |
+
| `mistralai/Mistral-7B-Instruct-v0.2` | Featherless AI | Instruction following |
|
| 85 |
+
| `CohereLabs/c4ai-command-r-plus` | Cohere | Advanced language model |
|
| 86 |
+
|
| 87 |
+
### Image Models
|
| 88 |
+
|
| 89 |
+
| Model | Provider | Description |
|
| 90 |
+
|-------|----------|-------------|
|
| 91 |
+
| `stabilityai/stable-diffusion-xl-base-1.0` | HF Inference, NScale | High-quality SDXL model |
|
| 92 |
+
| `black-forest-labs/FLUX.1-dev` | Nebius, Together | State-of-the-art image model |
|
| 93 |
+
| `Qwen/Qwen-Image` | Fal.ai, Replicate | Advanced image generation |
|
| 94 |
+
|
| 95 |
+
## 🎨 Usage Examples
|
| 96 |
+
|
| 97 |
+
### Chat Assistant
|
| 98 |
+
|
| 99 |
+
#### Basic Conversation
|
| 100 |
+
1. Go to the "💬 Chat Assistant" tab
|
| 101 |
+
2. Type your message in the chat input
|
| 102 |
+
3. Adjust parameters if needed (temperature, model, etc.)
|
| 103 |
+
4. Watch the AI respond with streaming text
|
| 104 |
+
|
| 105 |
+
#### Custom Model with Provider
|
| 106 |
+
```
|
| 107 |
+
Model Name: openai/gpt-oss-20b:fireworks-ai
|
| 108 |
+
System Message: You are a helpful coding assistant specializing in Python.
|
| 109 |
+
```
|
| 110 |
+
|
| 111 |
+
### Image Generation
|
| 112 |
+
|
| 113 |
+
#### Basic Image Creation
|
| 114 |
+
1. Go to the "🎨 Image Generator" tab
|
| 115 |
+
2. Enter your prompt: "A serene mountain lake at sunset, photorealistic, 8k"
|
| 116 |
+
3. Choose a model and provider
|
| 117 |
+
4. Click "🎨 Generate Image"
|
| 118 |
+
|
| 119 |
+
#### Advanced Settings
|
| 120 |
+
- **Dimensions**: 1024x1024 (must be divisible by 8)
|
| 121 |
+
- **Inference Steps**: 20-50 for good quality
|
| 122 |
+
- **Guidance Scale**: 7-10 for following prompts closely
|
| 123 |
+
- **Negative Prompt**: "blurry, low quality, distorted"
|
| 124 |
+
|
| 125 |
+
## ⚙️ Configuration Options
|
| 126 |
+
|
| 127 |
+
### Chat Parameters
|
| 128 |
+
- **System Message**: Define the AI's personality and behavior
|
| 129 |
+
- **Max New Tokens**: Control response length (1-4096)
|
| 130 |
+
- **Temperature**: Creativity level (0.1-2.0)
|
| 131 |
+
- **Top-p**: Response diversity (0.1-1.0)
|
| 132 |
+
|
| 133 |
+
### Image Parameters
|
| 134 |
+
- **Prompt**: Detailed description of desired image
|
| 135 |
+
- **Negative Prompt**: What to avoid in the image
|
| 136 |
+
- **Dimensions**: Width and height (256-2048, divisible by 8)
|
| 137 |
+
- **Inference Steps**: Quality vs speed trade-off (10-100)
|
| 138 |
+
- **Guidance Scale**: Prompt adherence (1.0-20.0)
|
| 139 |
+
- **Seed**: Reproducibility (-1 for random)
|
| 140 |
+
|
| 141 |
+
## 🎯 Provider-Specific Features
|
| 142 |
+
|
| 143 |
+
### Chat Providers
|
| 144 |
+
- **Fireworks AI**: Fast and reliable inference service
|
| 145 |
+
- **Cerebras**: High-performance inference with low latency
|
| 146 |
+
- **Cohere**: Advanced language models with multilingual support
|
| 147 |
+
- **Groq**: Ultra-fast inference, optimized for speed
|
| 148 |
+
- **Together**: Collaborative AI hosting, wide model support
|
| 149 |
+
- **Featherless AI**: Specialized fine-tuned models
|
| 150 |
+
|
| 151 |
+
### Image Providers
|
| 152 |
+
- **HF Inference**: Core API with comprehensive model support
|
| 153 |
+
- **Fal.ai**: High-quality image generation with fast processing
|
| 154 |
+
- **Nebius**: Cloud-native services with enterprise features
|
| 155 |
+
- **NScale**: Optimized inference performance
|
| 156 |
+
- **Replicate**: Collaborative AI hosting with version control
|
| 157 |
+
- **Together**: Fast inference service with wide model support
|
| 158 |
+
|
| 159 |
+
## 💡 Tips for Better Results
|
| 160 |
+
|
| 161 |
+
### Chat Tips
|
| 162 |
+
- **Clear Instructions**: Be specific about what you want
|
| 163 |
+
- **System Messages**: Set context and personality upfront
|
| 164 |
+
- **Model Selection**: Choose appropriate models for your task
|
| 165 |
+
- **Parameter Tuning**: Lower temperature for factual responses, higher for creativity
|
| 166 |
+
|
| 167 |
+
### Image Tips
|
| 168 |
+
- **Detailed Prompts**: Use specific, descriptive language
|
| 169 |
+
- **Style Keywords**: Include art style, lighting, and quality descriptors
|
| 170 |
+
- **Negative Prompts**: Specify what you don't want to avoid common issues
|
| 171 |
+
- **Aspect Ratios**: Consider the subject when choosing dimensions
|
| 172 |
+
- **Provider Testing**: Try different providers for varied artistic styles
|
| 173 |
+
|
| 174 |
+
### Example Prompts
|
| 175 |
+
|
| 176 |
+
#### Chat Examples
|
| 177 |
+
```
|
| 178 |
+
"Explain quantum computing in simple terms"
|
| 179 |
+
"Help me debug this Python code: [paste code]"
|
| 180 |
+
"Write a creative story about a time-traveling cat"
|
| 181 |
+
"What are the pros and cons of renewable energy?"
|
| 182 |
+
```
|
| 183 |
+
|
| 184 |
+
#### Image Examples
|
| 185 |
+
```
|
| 186 |
+
"A majestic dragon flying over a medieval castle, epic fantasy art, detailed, 8k"
|
| 187 |
+
"A serene Japanese garden with cherry blossoms, zen atmosphere, peaceful, high quality"
|
| 188 |
+
"A futuristic cityscape with flying cars and neon lights, cyberpunk style, cinematic"
|
| 189 |
+
"Portrait of a wise old wizard with flowing robes, magical aura, fantasy character art"
|
| 190 |
+
```
|
| 191 |
+
|
| 192 |
+
## 🔒 Security & Authentication
|
| 193 |
+
|
| 194 |
+
### RBAC System
|
| 195 |
+
- All operations require authentication with the HF-Inferoxy proxy server
|
| 196 |
+
- API keys are managed securely through HuggingFace Space secrets
|
| 197 |
+
- No sensitive information is logged or exposed
|
| 198 |
+
|
| 199 |
+
### Token Security
|
| 200 |
+
- Tokens are automatically rotated when they fail or reach limits
|
| 201 |
+
- Failed tokens are quarantined to prevent repeated failures
|
| 202 |
+
- Usage is tracked comprehensively for monitoring and optimization
|
| 203 |
+
|
| 204 |
+
## 🐛 Troubleshooting
|
| 205 |
+
|
| 206 |
+
### Common Issues
|
| 207 |
+
|
| 208 |
+
#### Setup Issues
|
| 209 |
+
1. **PROXY_KEY Missing**: Ensure the secret is set in your HuggingFace Space settings
|
| 210 |
+
2. **Connection Errors**: Verify the HF-Inferoxy server is accessible
|
| 211 |
+
3. **Import Errors**: Check that all dependencies are properly installed
|
| 212 |
+
|
| 213 |
+
#### Chat Issues
|
| 214 |
+
1. **No Response**: Check model name format and provider availability
|
| 215 |
+
2. **Slow Responses**: Try different providers or smaller models
|
| 216 |
+
3. **Poor Quality**: Adjust temperature and top-p parameters
|
| 217 |
+
|
| 218 |
+
#### Image Issues
|
| 219 |
+
1. **Generation Fails**: Verify model supports text-to-image generation
|
| 220 |
+
2. **Dimension Errors**: Ensure width and height are divisible by 8
|
| 221 |
+
3. **Poor Quality**: Increase inference steps or adjust guidance scale
|
| 222 |
+
|
| 223 |
+
### Error Types
|
| 224 |
+
- **401 Errors**: Authentication issues (handled automatically by token rotation)
|
| 225 |
+
- **402 Errors**: Credit limit exceeded (reported to proxy for token management)
|
| 226 |
+
- **Network Errors**: Connection issues (reported to proxy for monitoring)
|
| 227 |
+
- **Model Errors**: Invalid model or provider combinations
|
| 228 |
+
|
| 229 |
+
## 📚 Additional Resources
|
| 230 |
+
|
| 231 |
+
- **[HF-Inferoxy Documentation](https://nazdridoy.github.io/hf-inferoxy/)**: Complete platform documentation
|
| 232 |
+
- **[HuggingFace Hub Integration Guide](https://nazdridoy.github.io/hf-inferoxy/huggingface-hub-integration/)**: Detailed integration instructions
|
| 233 |
+
- **[Provider Examples](https://nazdridoy.github.io/hf-inferoxy/examples/)**: Code examples for different providers
|
| 234 |
+
- **[Gradio Documentation](https://gradio.app/docs/)**: Interface framework documentation
|
| 235 |
+
|
| 236 |
+
## 🤝 Contributing
|
| 237 |
+
|
| 238 |
+
This application is part of the HF-Inferoxy ecosystem. For contributions or issues:
|
| 239 |
+
|
| 240 |
+
1. Review the [HF-Inferoxy documentation](https://nazdridoy.github.io/hf-inferoxy/)
|
| 241 |
+
2. Test with different models and providers
|
| 242 |
+
3. Report any issues or suggest improvements
|
| 243 |
+
4. Contribute examples and use cases
|
| 244 |
+
|
| 245 |
+
## 🚀 Advanced Usage
|
| 246 |
+
|
| 247 |
+
### Environment Variables
|
| 248 |
+
|
| 249 |
+
You can customize the proxy URL using environment variables:
|
| 250 |
+
|
| 251 |
+
```python
|
| 252 |
+
import os
|
| 253 |
+
os.environ["HF_PROXY_URL"] = "http://your-proxy-server:8000"
|
| 254 |
+
```
|
| 255 |
+
|
| 256 |
+
### Custom Providers
|
| 257 |
+
|
| 258 |
+
The app supports any provider that works with HF-Inferoxy. Simply specify the provider name when entering model information.
|
| 259 |
+
|
| 260 |
+
### Batch Operations
|
| 261 |
+
|
| 262 |
+
For multiple operations, consider the token reuse patterns documented in the HF-Inferoxy integration guide.
|
| 263 |
+
|
| 264 |
+
## 📄 License
|
| 265 |
+
|
| 266 |
+
This project is part of the HF-Inferoxy ecosystem. Please refer to the main project for licensing information.
|
| 267 |
+
|
| 268 |
+
---
|
| 269 |
+
|
| 270 |
+
**Built with ❤️ using [HF-Inferoxy](https://nazdridoy.github.io/hf-inferoxy/) for intelligent token management**
|
| 271 |
+
|
| 272 |
+
**Ready to explore AI? Start chatting or generating images above! 🚀**
|
app.py
ADDED
|
@@ -0,0 +1,432 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import os
|
| 3 |
+
from huggingface_hub import InferenceClient
|
| 4 |
+
from huggingface_hub.errors import HfHubHTTPError
|
| 5 |
+
from hf_token_utils import get_proxy_token, report_token_status
|
| 6 |
+
import PIL.Image
|
| 7 |
+
import io
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def chat_respond(
|
| 11 |
+
message,
|
| 12 |
+
history: list[dict[str, str]],
|
| 13 |
+
system_message,
|
| 14 |
+
model_name,
|
| 15 |
+
max_tokens,
|
| 16 |
+
temperature,
|
| 17 |
+
top_p,
|
| 18 |
+
):
|
| 19 |
+
"""
|
| 20 |
+
Chat completion function using HF-Inferoxy token management.
|
| 21 |
+
"""
|
| 22 |
+
# Get proxy API key from environment variable (set in HuggingFace Space secrets)
|
| 23 |
+
proxy_api_key = os.getenv("PROXY_KEY")
|
| 24 |
+
if not proxy_api_key:
|
| 25 |
+
yield "❌ Error: PROXY_KEY not found in environment variables. Please set it in your HuggingFace Space secrets."
|
| 26 |
+
return
|
| 27 |
+
|
| 28 |
+
try:
|
| 29 |
+
# Get token from HF-Inferoxy proxy server
|
| 30 |
+
print(f"🔑 Chat: Requesting token from proxy...")
|
| 31 |
+
token, token_id = get_proxy_token(api_key=proxy_api_key)
|
| 32 |
+
print(f"✅ Chat: Got token: {token_id}")
|
| 33 |
+
|
| 34 |
+
# Parse model name and provider if specified
|
| 35 |
+
if ":" in model_name:
|
| 36 |
+
model, provider = model_name.split(":", 1)
|
| 37 |
+
else:
|
| 38 |
+
model = model_name
|
| 39 |
+
provider = None
|
| 40 |
+
|
| 41 |
+
print(f"🤖 Chat: Using model='{model}', provider='{provider if provider else 'auto'}'")
|
| 42 |
+
|
| 43 |
+
# Prepare messages first
|
| 44 |
+
messages = [{"role": "system", "content": system_message}]
|
| 45 |
+
messages.extend(history)
|
| 46 |
+
messages.append({"role": "user", "content": message})
|
| 47 |
+
|
| 48 |
+
print(f"💬 Chat: Prepared {len(messages)} messages, creating client...")
|
| 49 |
+
|
| 50 |
+
# Create client with provider (auto if none specified) and always pass model
|
| 51 |
+
client = InferenceClient(
|
| 52 |
+
provider=provider if provider else "auto",
|
| 53 |
+
api_key=token
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
print(f"🚀 Chat: Client created, starting inference...")
|
| 57 |
+
|
| 58 |
+
chat_completion_kwargs = {
|
| 59 |
+
"model": model,
|
| 60 |
+
"messages": messages,
|
| 61 |
+
"max_tokens": max_tokens,
|
| 62 |
+
"stream": True,
|
| 63 |
+
"temperature": temperature,
|
| 64 |
+
"top_p": top_p,
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
response = ""
|
| 68 |
+
|
| 69 |
+
print(f"📡 Chat: Making streaming request...")
|
| 70 |
+
stream = client.chat_completion(**chat_completion_kwargs)
|
| 71 |
+
print(f"🔄 Chat: Got stream, starting to iterate...")
|
| 72 |
+
|
| 73 |
+
for message in stream:
|
| 74 |
+
choices = message.choices
|
| 75 |
+
token_content = ""
|
| 76 |
+
if len(choices) and choices[0].delta.content:
|
| 77 |
+
token_content = choices[0].delta.content
|
| 78 |
+
|
| 79 |
+
response += token_content
|
| 80 |
+
yield response
|
| 81 |
+
|
| 82 |
+
# Report successful token usage
|
| 83 |
+
report_token_status(token_id, "success", api_key=proxy_api_key)
|
| 84 |
+
|
| 85 |
+
except HfHubHTTPError as e:
|
| 86 |
+
# Report HF Hub errors
|
| 87 |
+
if 'token_id' in locals():
|
| 88 |
+
report_token_status(token_id, "error", str(e), api_key=proxy_api_key)
|
| 89 |
+
yield f"❌ HuggingFace API Error: {str(e)}"
|
| 90 |
+
|
| 91 |
+
except Exception as e:
|
| 92 |
+
# Report other errors
|
| 93 |
+
if 'token_id' in locals():
|
| 94 |
+
report_token_status(token_id, "error", str(e), api_key=proxy_api_key)
|
| 95 |
+
yield f"❌ Unexpected Error: {str(e)}"
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
def generate_image(
|
| 99 |
+
prompt: str,
|
| 100 |
+
model_name: str,
|
| 101 |
+
provider: str,
|
| 102 |
+
negative_prompt: str = "",
|
| 103 |
+
width: int = 1024,
|
| 104 |
+
height: int = 1024,
|
| 105 |
+
num_inference_steps: int = 20,
|
| 106 |
+
guidance_scale: float = 7.5,
|
| 107 |
+
seed: int = -1,
|
| 108 |
+
):
|
| 109 |
+
"""
|
| 110 |
+
Generate an image using the specified model and provider through HF-Inferoxy.
|
| 111 |
+
"""
|
| 112 |
+
# Get proxy API key from environment variable (set in HuggingFace Space secrets)
|
| 113 |
+
proxy_api_key = os.getenv("PROXY_KEY")
|
| 114 |
+
if not proxy_api_key:
|
| 115 |
+
return None, "❌ Error: PROXY_KEY not found in environment variables. Please set it in your HuggingFace Space secrets."
|
| 116 |
+
|
| 117 |
+
try:
|
| 118 |
+
# Get token from HF-Inferoxy proxy server
|
| 119 |
+
token, token_id = get_proxy_token(api_key=proxy_api_key)
|
| 120 |
+
|
| 121 |
+
# Create client with specified provider
|
| 122 |
+
client = InferenceClient(
|
| 123 |
+
provider=provider,
|
| 124 |
+
api_key=token
|
| 125 |
+
)
|
| 126 |
+
|
| 127 |
+
# Prepare generation parameters
|
| 128 |
+
generation_params = {
|
| 129 |
+
"model": model_name,
|
| 130 |
+
"prompt": prompt,
|
| 131 |
+
"width": width,
|
| 132 |
+
"height": height,
|
| 133 |
+
"num_inference_steps": num_inference_steps,
|
| 134 |
+
"guidance_scale": guidance_scale,
|
| 135 |
+
}
|
| 136 |
+
|
| 137 |
+
# Add optional parameters if provided
|
| 138 |
+
if negative_prompt:
|
| 139 |
+
generation_params["negative_prompt"] = negative_prompt
|
| 140 |
+
if seed != -1:
|
| 141 |
+
generation_params["seed"] = seed
|
| 142 |
+
|
| 143 |
+
# Generate image
|
| 144 |
+
image = client.text_to_image(**generation_params)
|
| 145 |
+
|
| 146 |
+
# Report successful token usage
|
| 147 |
+
report_token_status(token_id, "success", api_key=proxy_api_key)
|
| 148 |
+
|
| 149 |
+
return image, f"✅ Image generated successfully using {model_name} on {provider}!"
|
| 150 |
+
|
| 151 |
+
except HfHubHTTPError as e:
|
| 152 |
+
# Report HF Hub errors
|
| 153 |
+
if 'token_id' in locals():
|
| 154 |
+
report_token_status(token_id, "error", str(e), api_key=proxy_api_key)
|
| 155 |
+
return None, f"❌ HuggingFace API Error: {str(e)}"
|
| 156 |
+
|
| 157 |
+
except Exception as e:
|
| 158 |
+
# Report other errors
|
| 159 |
+
if 'token_id' in locals():
|
| 160 |
+
report_token_status(token_id, "error", str(e), api_key=proxy_api_key)
|
| 161 |
+
return None, f"❌ Unexpected Error: {str(e)}"
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
def validate_dimensions(width, height):
|
| 165 |
+
"""Validate that dimensions are divisible by 8 (required by most diffusion models)"""
|
| 166 |
+
if width % 8 != 0 or height % 8 != 0:
|
| 167 |
+
return False, "Width and height must be divisible by 8"
|
| 168 |
+
return True, ""
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
# Create the main Gradio interface with tabs
|
| 172 |
+
with gr.Blocks(title="HF-Inferoxy AI Hub", theme=gr.themes.Soft()) as demo:
|
| 173 |
+
|
| 174 |
+
# Main header
|
| 175 |
+
gr.Markdown("""
|
| 176 |
+
# 🚀 HF-Inferoxy AI Hub
|
| 177 |
+
|
| 178 |
+
A comprehensive AI platform combining chat and image generation capabilities with intelligent token management through HF-Inferoxy.
|
| 179 |
+
|
| 180 |
+
**Features:**
|
| 181 |
+
- 💬 **Smart Chat**: Conversational AI with streaming responses
|
| 182 |
+
- 🎨 **Image Generation**: Text-to-image creation with multiple providers
|
| 183 |
+
- 🔄 **Intelligent Token Management**: Automatic token rotation and error handling
|
| 184 |
+
- 🌐 **Multi-Provider Support**: Works with HF Inference, Cerebras, Cohere, Groq, Together, Fal.ai, and more
|
| 185 |
+
""")
|
| 186 |
+
|
| 187 |
+
with gr.Tabs() as tabs:
|
| 188 |
+
|
| 189 |
+
# ==================== CHAT TAB ====================
|
| 190 |
+
with gr.Tab("💬 Chat Assistant", id="chat"):
|
| 191 |
+
with gr.Row():
|
| 192 |
+
with gr.Column(scale=3):
|
| 193 |
+
# Create chat interface
|
| 194 |
+
chatbot = gr.ChatInterface(
|
| 195 |
+
chat_respond,
|
| 196 |
+
type="messages",
|
| 197 |
+
title="",
|
| 198 |
+
description="",
|
| 199 |
+
additional_inputs=[
|
| 200 |
+
gr.Textbox(
|
| 201 |
+
value="You are a helpful and friendly AI assistant. Provide clear, accurate, and helpful responses.",
|
| 202 |
+
label="System Message",
|
| 203 |
+
lines=2,
|
| 204 |
+
placeholder="Define the assistant's personality and behavior..."
|
| 205 |
+
),
|
| 206 |
+
gr.Textbox(
|
| 207 |
+
value="openai/gpt-oss-20b:fireworks-ai",
|
| 208 |
+
label="Model Name",
|
| 209 |
+
placeholder="e.g., openai/gpt-oss-20b:fireworks-ai or mistralai/Mistral-7B-Instruct-v0.2:groq"
|
| 210 |
+
),
|
| 211 |
+
gr.Slider(
|
| 212 |
+
minimum=1, maximum=4096, value=1024, step=1,
|
| 213 |
+
label="Max New Tokens"
|
| 214 |
+
),
|
| 215 |
+
gr.Slider(
|
| 216 |
+
minimum=0.1, maximum=2.0, value=0.7, step=0.1,
|
| 217 |
+
label="Temperature"
|
| 218 |
+
),
|
| 219 |
+
gr.Slider(
|
| 220 |
+
minimum=0.1, maximum=1.0, value=0.95, step=0.05,
|
| 221 |
+
label="Top-p (nucleus sampling)"
|
| 222 |
+
),
|
| 223 |
+
],
|
| 224 |
+
)
|
| 225 |
+
|
| 226 |
+
with gr.Column(scale=1):
|
| 227 |
+
gr.Markdown("""
|
| 228 |
+
### 💡 Chat Tips
|
| 229 |
+
|
| 230 |
+
**Model Format:**
|
| 231 |
+
- Single model: `openai/gpt-oss-20b`
|
| 232 |
+
- With provider: `model:provider`
|
| 233 |
+
|
| 234 |
+
**Popular Models:**
|
| 235 |
+
- `openai/gpt-oss-20b` - Fast general purpose
|
| 236 |
+
- `meta-llama/Llama-2-7b-chat-hf` - Chat optimized
|
| 237 |
+
- `microsoft/DialoGPT-medium` - Conversation
|
| 238 |
+
- `google/flan-t5-base` - Instruction following
|
| 239 |
+
|
| 240 |
+
**Popular Providers:**
|
| 241 |
+
- `fireworks-ai` - Fast and reliable
|
| 242 |
+
- `cerebras` - High performance
|
| 243 |
+
- `groq` - Ultra-fast inference
|
| 244 |
+
- `together` - Wide model support
|
| 245 |
+
- `cohere` - Advanced language models
|
| 246 |
+
|
| 247 |
+
**Example:**
|
| 248 |
+
`openai/gpt-oss-20b:fireworks-ai`
|
| 249 |
+
""")
|
| 250 |
+
|
| 251 |
+
# ==================== IMAGE GENERATION TAB ====================
|
| 252 |
+
with gr.Tab("🎨 Image Generator", id="image"):
|
| 253 |
+
with gr.Row():
|
| 254 |
+
with gr.Column(scale=2):
|
| 255 |
+
# Image output
|
| 256 |
+
output_image = gr.Image(
|
| 257 |
+
label="Generated Image",
|
| 258 |
+
type="pil",
|
| 259 |
+
height=600,
|
| 260 |
+
show_download_button=True
|
| 261 |
+
)
|
| 262 |
+
status_text = gr.Textbox(
|
| 263 |
+
label="Generation Status",
|
| 264 |
+
interactive=False,
|
| 265 |
+
lines=2
|
| 266 |
+
)
|
| 267 |
+
|
| 268 |
+
with gr.Column(scale=1):
|
| 269 |
+
# Model and provider inputs
|
| 270 |
+
with gr.Group():
|
| 271 |
+
gr.Markdown("**🤖 Model & Provider**")
|
| 272 |
+
img_model_name = gr.Textbox(
|
| 273 |
+
value="stabilityai/stable-diffusion-xl-base-1.0",
|
| 274 |
+
label="Model Name",
|
| 275 |
+
placeholder="e.g., stabilityai/stable-diffusion-xl-base-1.0"
|
| 276 |
+
)
|
| 277 |
+
img_provider = gr.Dropdown(
|
| 278 |
+
choices=["hf-inference", "fal-ai", "nebius", "nscale", "replicate", "together"],
|
| 279 |
+
value="hf-inference",
|
| 280 |
+
label="Provider",
|
| 281 |
+
interactive=True
|
| 282 |
+
)
|
| 283 |
+
|
| 284 |
+
# Generation parameters
|
| 285 |
+
with gr.Group():
|
| 286 |
+
gr.Markdown("**📝 Prompts**")
|
| 287 |
+
img_prompt = gr.Textbox(
|
| 288 |
+
value="A beautiful landscape with mountains and a lake at sunset, photorealistic, 8k, highly detailed",
|
| 289 |
+
label="Prompt",
|
| 290 |
+
lines=3,
|
| 291 |
+
placeholder="Describe the image you want to generate..."
|
| 292 |
+
)
|
| 293 |
+
img_negative_prompt = gr.Textbox(
|
| 294 |
+
value="blurry, low quality, distorted, deformed, ugly, bad anatomy",
|
| 295 |
+
label="Negative Prompt",
|
| 296 |
+
lines=2,
|
| 297 |
+
placeholder="Describe what you DON'T want in the image..."
|
| 298 |
+
)
|
| 299 |
+
|
| 300 |
+
with gr.Group():
|
| 301 |
+
gr.Markdown("**⚙️ Generation Settings**")
|
| 302 |
+
with gr.Row():
|
| 303 |
+
img_width = gr.Slider(
|
| 304 |
+
minimum=256, maximum=2048, value=1024, step=64,
|
| 305 |
+
label="Width", info="Must be divisible by 8"
|
| 306 |
+
)
|
| 307 |
+
img_height = gr.Slider(
|
| 308 |
+
minimum=256, maximum=2048, value=1024, step=64,
|
| 309 |
+
label="Height", info="Must be divisible by 8"
|
| 310 |
+
)
|
| 311 |
+
|
| 312 |
+
with gr.Row():
|
| 313 |
+
img_steps = gr.Slider(
|
| 314 |
+
minimum=10, maximum=100, value=20, step=1,
|
| 315 |
+
label="Inference Steps", info="More steps = better quality"
|
| 316 |
+
)
|
| 317 |
+
img_guidance = gr.Slider(
|
| 318 |
+
minimum=1.0, maximum=20.0, value=7.5, step=0.5,
|
| 319 |
+
label="Guidance Scale", info="How closely to follow prompt"
|
| 320 |
+
)
|
| 321 |
+
|
| 322 |
+
img_seed = gr.Slider(
|
| 323 |
+
minimum=-1, maximum=999999, value=-1, step=1,
|
| 324 |
+
label="Seed", info="-1 for random"
|
| 325 |
+
)
|
| 326 |
+
|
| 327 |
+
# Generate button
|
| 328 |
+
generate_btn = gr.Button(
|
| 329 |
+
"🎨 Generate Image",
|
| 330 |
+
variant="primary",
|
| 331 |
+
size="lg",
|
| 332 |
+
scale=2
|
| 333 |
+
)
|
| 334 |
+
|
| 335 |
+
# Quick model presets
|
| 336 |
+
with gr.Group():
|
| 337 |
+
gr.Markdown("**🎯 Popular Presets**")
|
| 338 |
+
preset_buttons = []
|
| 339 |
+
presets = [
|
| 340 |
+
("SDXL (HF)", "stabilityai/stable-diffusion-xl-base-1.0", "hf-inference"),
|
| 341 |
+
("FLUX.1 (Nebius)", "black-forest-labs/FLUX.1-dev", "nebius"),
|
| 342 |
+
("Qwen (Fal.ai)", "Qwen/Qwen-Image", "fal-ai"),
|
| 343 |
+
("SDXL (NScale)", "stabilityai/stable-diffusion-xl-base-1.0", "nscale"),
|
| 344 |
+
]
|
| 345 |
+
|
| 346 |
+
for name, model, provider in presets:
|
| 347 |
+
btn = gr.Button(name, size="sm")
|
| 348 |
+
btn.click(
|
| 349 |
+
lambda m=model, p=provider: (m, p),
|
| 350 |
+
outputs=[img_model_name, img_provider]
|
| 351 |
+
)
|
| 352 |
+
|
| 353 |
+
# Examples for image generation
|
| 354 |
+
with gr.Group():
|
| 355 |
+
gr.Markdown("**🌟 Example Prompts**")
|
| 356 |
+
img_examples = gr.Examples(
|
| 357 |
+
examples=[
|
| 358 |
+
["A majestic dragon flying over a medieval castle, epic fantasy art, detailed, 8k"],
|
| 359 |
+
["A serene Japanese garden with cherry blossoms, zen atmosphere, peaceful, high quality"],
|
| 360 |
+
["A futuristic cityscape with flying cars and neon lights, cyberpunk style, cinematic"],
|
| 361 |
+
["A cute robot cat playing with yarn, adorable, cartoon style, vibrant colors"],
|
| 362 |
+
["A magical forest with glowing mushrooms and fairy lights, fantasy, ethereal beauty"],
|
| 363 |
+
["Portrait of a wise old wizard with flowing robes, magical aura, fantasy character art"],
|
| 364 |
+
["A cozy coffee shop on a rainy day, warm lighting, peaceful atmosphere, detailed"],
|
| 365 |
+
["An astronaut floating in space with Earth in background, photorealistic, stunning"]
|
| 366 |
+
],
|
| 367 |
+
inputs=img_prompt
|
| 368 |
+
)
|
| 369 |
+
|
| 370 |
+
# Event handlers for image generation
|
| 371 |
+
def on_generate_image(prompt_val, model_val, provider_val, negative_prompt_val, width_val, height_val, steps_val, guidance_val, seed_val):
|
| 372 |
+
# Validate dimensions
|
| 373 |
+
is_valid, error_msg = validate_dimensions(width_val, height_val)
|
| 374 |
+
if not is_valid:
|
| 375 |
+
return None, f"❌ Validation Error: {error_msg}"
|
| 376 |
+
|
| 377 |
+
# Generate image
|
| 378 |
+
return generate_image(
|
| 379 |
+
prompt=prompt_val,
|
| 380 |
+
model_name=model_val,
|
| 381 |
+
provider=provider_val,
|
| 382 |
+
negative_prompt=negative_prompt_val,
|
| 383 |
+
width=width_val,
|
| 384 |
+
height=height_val,
|
| 385 |
+
num_inference_steps=steps_val,
|
| 386 |
+
guidance_scale=guidance_val,
|
| 387 |
+
seed=seed_val
|
| 388 |
+
)
|
| 389 |
+
|
| 390 |
+
# Connect image generation events
|
| 391 |
+
generate_btn.click(
|
| 392 |
+
fn=on_generate_image,
|
| 393 |
+
inputs=[
|
| 394 |
+
img_prompt, img_model_name, img_provider, img_negative_prompt,
|
| 395 |
+
img_width, img_height, img_steps, img_guidance, img_seed
|
| 396 |
+
],
|
| 397 |
+
outputs=[output_image, status_text]
|
| 398 |
+
)
|
| 399 |
+
|
| 400 |
+
# Footer with helpful information
|
| 401 |
+
gr.Markdown("""
|
| 402 |
+
---
|
| 403 |
+
### 📚 How to Use
|
| 404 |
+
|
| 405 |
+
**Chat Tab:**
|
| 406 |
+
- Enter your message and customize the AI's behavior with system messages
|
| 407 |
+
- Choose models and providers using the format `model:provider`
|
| 408 |
+
- Adjust temperature for creativity and top-p for response diversity
|
| 409 |
+
|
| 410 |
+
**Image Tab:**
|
| 411 |
+
- Write detailed prompts describing your desired image
|
| 412 |
+
- Use negative prompts to avoid unwanted elements
|
| 413 |
+
- Experiment with different models and providers for varied styles
|
| 414 |
+
- Higher inference steps = better quality but slower generation
|
| 415 |
+
|
| 416 |
+
**Supported Providers:**
|
| 417 |
+
- **hf-inference**: Core API with comprehensive model support
|
| 418 |
+
- **cerebras**: High-performance inference
|
| 419 |
+
- **cohere**: Advanced language models with multilingual support
|
| 420 |
+
- **groq**: Ultra-fast inference, optimized for speed
|
| 421 |
+
- **together**: Collaborative AI hosting, wide model support
|
| 422 |
+
- **fal-ai**: High-quality image generation
|
| 423 |
+
- **nebius**: Cloud-native services with enterprise features
|
| 424 |
+
- **nscale**: Optimized inference performance
|
| 425 |
+
- **replicate**: Collaborative AI hosting
|
| 426 |
+
|
| 427 |
+
**Built with ❤️ using [HF-Inferoxy](https://nazdridoy.github.io/hf-inferoxy/) for intelligent token management**
|
| 428 |
+
""")
|
| 429 |
+
|
| 430 |
+
|
| 431 |
+
if __name__ == "__main__":
|
| 432 |
+
demo.launch()
|
hf_token_utils.py
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# hf_token_utils.py
|
| 2 |
+
import os
|
| 3 |
+
import requests
|
| 4 |
+
import json
|
| 5 |
+
from typing import Dict, Optional, Any, Tuple
|
| 6 |
+
|
| 7 |
+
def get_proxy_token(proxy_url: str = "http://scw.nazdev.tech:11155", api_key: str = None) -> Tuple[str, str]:
|
| 8 |
+
"""
|
| 9 |
+
Get a valid token from the proxy server.
|
| 10 |
+
|
| 11 |
+
Args:
|
| 12 |
+
proxy_url: URL of the HF-Inferoxy server
|
| 13 |
+
api_key: Your API key for authenticating with the proxy server
|
| 14 |
+
|
| 15 |
+
Returns:
|
| 16 |
+
Tuple of (token, token_id)
|
| 17 |
+
|
| 18 |
+
Raises:
|
| 19 |
+
Exception: If token provisioning fails
|
| 20 |
+
"""
|
| 21 |
+
headers = {}
|
| 22 |
+
if api_key:
|
| 23 |
+
headers["Authorization"] = f"Bearer {api_key}"
|
| 24 |
+
|
| 25 |
+
response = requests.get(f"{proxy_url}/keys/provision", headers=headers)
|
| 26 |
+
if response.status_code != 200:
|
| 27 |
+
raise Exception(f"Failed to provision token: {response.text}")
|
| 28 |
+
|
| 29 |
+
data = response.json()
|
| 30 |
+
token = data["token"]
|
| 31 |
+
token_id = data["token_id"]
|
| 32 |
+
|
| 33 |
+
# For convenience, also set environment variable
|
| 34 |
+
os.environ["HF_TOKEN"] = token
|
| 35 |
+
|
| 36 |
+
return token, token_id
|
| 37 |
+
|
| 38 |
+
def report_token_status(
|
| 39 |
+
token_id: str,
|
| 40 |
+
status: str = "success",
|
| 41 |
+
error: Optional[str] = None,
|
| 42 |
+
proxy_url: str = "http://scw.nazdev.tech:11155",
|
| 43 |
+
api_key: str = None
|
| 44 |
+
) -> bool:
|
| 45 |
+
"""
|
| 46 |
+
Report token usage status back to the proxy server.
|
| 47 |
+
|
| 48 |
+
Args:
|
| 49 |
+
token_id: ID of the token to report (from get_proxy_token)
|
| 50 |
+
status: Status to report ('success' or 'error')
|
| 51 |
+
error: Error message if status is 'error'
|
| 52 |
+
proxy_url: URL of the HF-Inferoxy server
|
| 53 |
+
api_key: Your API key for authenticating with the proxy server
|
| 54 |
+
|
| 55 |
+
Returns:
|
| 56 |
+
True if report was accepted, False otherwise
|
| 57 |
+
"""
|
| 58 |
+
payload = {"token_id": token_id, "status": status}
|
| 59 |
+
|
| 60 |
+
if error:
|
| 61 |
+
payload["error"] = error
|
| 62 |
+
|
| 63 |
+
# Extract error classification based on actual HF error patterns
|
| 64 |
+
error_type = None
|
| 65 |
+
if "401 Client Error" in error:
|
| 66 |
+
error_type = "invalid_credentials"
|
| 67 |
+
elif "402 Client Error" in error and "exceeded your monthly included credits" in error:
|
| 68 |
+
error_type = "credits_exceeded"
|
| 69 |
+
|
| 70 |
+
if error_type:
|
| 71 |
+
payload["error_type"] = error_type
|
| 72 |
+
|
| 73 |
+
headers = {"Content-Type": "application/json"}
|
| 74 |
+
if api_key:
|
| 75 |
+
headers["Authorization"] = f"Bearer {api_key}"
|
| 76 |
+
|
| 77 |
+
try:
|
| 78 |
+
response = requests.post(f"{proxy_url}/keys/report", json=payload, headers=headers)
|
| 79 |
+
return response.status_code == 200
|
| 80 |
+
except Exception as e:
|
| 81 |
+
# Silently fail to avoid breaking the client application
|
| 82 |
+
# In production, consider logging this error
|
| 83 |
+
return False
|
requirements.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio
|
| 2 |
+
huggingface-hub
|
| 3 |
+
requests
|
| 4 |
+
Pillow
|