agentic-system / space.yml
Cascade Bot
Added Groq streaming support and optimizations - clean version
1d75522
raw
history blame
2.6 kB
title: Advanced Agentic System
emoji: πŸ€–
colorFrom: indigo
colorTo: purple
sdk: gradio
sdk_version: latest
app_file: startup.sh
pinned: true
license: apache-2.0
duplicated_from: nananie143/agentic-system
python_version: "3.10"
cuda: "11.8"
hardware: t4-medium
# System requirements
compute:
instance: t4-medium
storage: large
# Environment setup
env:
- MODEL_BACKEND=groq
- GROQ_API_KEY
- HUGGINGFACE_TOKEN
- ENABLE_LOCAL_FALLBACK=true
- CACHE_MODELS=false
- GRADIO_SERVER_PORT=7860
- GRADIO_SERVER_NAME=0.0.0.0
- MAX_PARALLEL_REQUESTS=10
- REQUEST_TIMEOUT=30
- BATCH_SIZE=4
- GRADIO_ANALYTICS_ENABLED=false
- PYTHONUNBUFFERED=1
- SPACE_CACHE_DIR=/data/models
- TORCH_CUDA_ARCH_LIST="7.5"
- CUDA_VISIBLE_DEVICES=0
# Model configurations
models:
- rrbale/pruned-qwen-moe/model-Q6_K.gguf
- YorkieOH10/deepseek-coder-6.7B-kexer-Q8_0-GGUF/model.gguf
- Nidum-Llama-3.2-3B-Uncensored-GGUF/model-Q6_K.gguf
- deepseek-ai/JanusFlow-1.3B/model.gguf
- prithivMLmods/QwQ-4B-Instruct/model.gguf
- gpt-omni/mini-omni2/mini-omni2.gguf
# Dependencies
dependencies:
python:
- "gradio==4.44.1"
- "groq>=0.4.1"
- "huggingface-hub>=0.19.4"
- "fastapi>=0.68.0"
- "uvicorn>=0.15.0"
- "pydantic>=2.0.0"
- "python-dotenv>=0.19.0"
- "aiohttp>=3.8.0"
- "asyncio>=3.4.3"
- "numpy>=1.24.0"
- "pandas>=2.1.0"
- "scikit-learn>=1.3.2"
- "plotly>=5.18.0"
system:
- git-lfs
- cmake
- build-essential
- cuda-toolkit-11-8
- nvidia-cuda-toolkit
- libcudnn8
# Inference settings
inference:
model_backend: groq
models:
- name: mixtral-8x7b-32768
provider: groq
max_tokens: 32768
- name: llama2-70b-4096
provider: groq
max_tokens: 4096
fallback:
enabled: true
provider: huggingface
model: mistral-7b-instruct-v0.2
# Resource limits
resources:
memory: 16
cpu: 4
gpu: 1
gpu_memory: 16
disk: 50
# Monitoring
monitoring:
enable_logging: true
log_level: INFO
metrics_enabled: true
# Build configuration
build:
system_packages:
- cmake
- build-essential
- cuda-toolkit-11-8
- nvidia-cuda-toolkit
- libcudnn8
python_packages:
- --upgrade pip
- -r requirements.txt
- torch --index-url https://download.pytorch.org/whl/cu118
- llama-cpp-python --no-cache-dir
# Runtime configuration
runtime:
build:
cuda: "11.8"
python: "3.10"
env:
- PYTHONUNBUFFERED=1
- GRADIO_SERVER_NAME=0.0.0.0
- TORCH_CUDA_ARCH_LIST="7.5"
- CUDA_VISIBLE_DEVICES=0
- GRADIO_ANALYTICS_ENABLED=false