Spaces:
Runtime error
Runtime error
| title: Advanced Agentic System | |
| emoji: 🤖 | |
| colorFrom: indigo | |
| colorTo: purple | |
| sdk: gradio | |
| sdk_version: latest | |
| app_file: startup.sh | |
| pinned: true | |
| license: apache-2.0 | |
| duplicated_from: nananie143/agentic-system | |
| python_version: "3.10" | |
| cuda: "11.8" | |
| hardware: t4-medium | |
| # System requirements | |
| compute: | |
| instance: t4-medium | |
| storage: large | |
| # Environment setup | |
| env: | |
| - MODEL_BACKEND=groq | |
| - GROQ_API_KEY # This will be loaded from repository secrets | |
| - ENABLE_LOCAL_FALLBACK=true | |
| - CACHE_MODELS=false | |
| - GRADIO_SERVER_PORT=7860 | |
| - GRADIO_SERVER_NAME=0.0.0.0 | |
| - MAX_PARALLEL_REQUESTS=10 | |
| - REQUEST_TIMEOUT=30 | |
| - BATCH_SIZE=4 | |
| - GRADIO_ANALYTICS_ENABLED=false | |
| - PYTHONUNBUFFERED=1 | |
| - SPACE_CACHE_DIR=/data/models | |
| - TORCH_CUDA_ARCH_LIST="7.5" | |
| - CUDA_VISIBLE_DEVICES=0 | |
| # Model configurations | |
| models: | |
| - rrbale/pruned-qwen-moe/model-Q6_K.gguf | |
| - YorkieOH10/deepseek-coder-6.7B-kexer-Q8_0-GGUF/model.gguf | |
| - Nidum-Llama-3.2-3B-Uncensored-GGUF/model-Q6_K.gguf | |
| - deepseek-ai/JanusFlow-1.3B/model.gguf | |
| - prithivMLmods/QwQ-4B-Instruct/model.gguf | |
| - gpt-omni/mini-omni2/mini-omni2.gguf | |
| # Dependencies | |
| dependencies: | |
| python: | |
| - "gradio>=4.44.1" | |
| - "groq>=0.4.1" | |
| - "fastapi>=0.68.0" | |
| - "uvicorn>=0.15.0" | |
| - "pydantic>=2.0.0" | |
| - "python-dotenv>=0.19.0" | |
| - "aiohttp>=3.8.0" | |
| - "asyncio>=3.4.3" | |
| - "numpy>=1.24.0" | |
| - "pandas>=2.1.0" | |
| - "scikit-learn>=1.3.2" | |
| - "plotly>=5.18.0" | |
| - "networkx>=3.2.1" | |
| - "llama-cpp-python>=0.2.23" # Added for local LLM support | |
| system: | |
| - git-lfs | |
| - cmake | |
| - ninja-build # For faster builds | |
| - build-essential # Required for compilation | |
| - cuda-toolkit-11-8 | |
| - nvidia-cuda-toolkit | |
| - libcudnn8 | |
| # Inference settings | |
| inference: | |
| model_backend: groq | |
| models: | |
| - name: mixtral-8x7b-32768 | |
| provider: groq | |
| max_tokens: 32768 | |
| - name: llama2-70b-4096 | |
| provider: groq | |
| max_tokens: 4096 | |
| fallback: | |
| enabled: true | |
| provider: huggingface | |
| model: mistral-7b-instruct-v0.2 | |
| # Resource limits | |
| resources: | |
| memory: 16 | |
| cpu: 4 | |
| gpu: 1 | |
| gpu_memory: 16 | |
| disk: 50 | |
| # Monitoring | |
| monitoring: | |
| enable_logging: true | |
| log_level: INFO | |
| metrics_enabled: true | |
| # Build configuration | |
| build: | |
| system_packages: | |
| - cmake | |
| - build-essential | |
| - cuda-toolkit-11-8 | |
| - nvidia-cuda-toolkit | |
| - libcudnn8 | |
| python_packages: | |
| - --upgrade pip | |
| - -r requirements.txt | |
| - torch --index-url https://download.pytorch.org/whl/cu118 | |
| - llama-cpp-python --no-cache-dir | |
| # Runtime configuration | |
| runtime: | |
| build: | |
| cuda: "11.8" | |
| python: "3.10" | |
| env: | |
| - PYTHONUNBUFFERED=1 | |
| - GRADIO_SERVER_NAME=0.0.0.0 | |
| - TORCH_CUDA_ARCH_LIST="7.5" | |
| - CUDA_VISIBLE_DEVICES=0 | |
| - GRADIO_ANALYTICS_ENABLED=false | |