File size: 2,745 Bytes
1d75522
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
865ec2f
1d75522
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
865ec2f
1d75522
 
 
 
 
 
 
 
 
 
 
6cc7431
 
1d75522
 
 
6cc7431
 
1d75522
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
title: Advanced Agentic System
emoji: 🤖
colorFrom: indigo
colorTo: purple
sdk: gradio
sdk_version: latest
app_file: startup.sh
pinned: true
license: apache-2.0
duplicated_from: nananie143/agentic-system
python_version: "3.10"
cuda: "11.8"
hardware: t4-medium

# System requirements
compute:
  instance: t4-medium
  storage: large

# Environment setup
env:
  - MODEL_BACKEND=groq
  - GROQ_API_KEY  # This will be loaded from repository secrets
  - ENABLE_LOCAL_FALLBACK=true
  - CACHE_MODELS=false
  - GRADIO_SERVER_PORT=7860
  - GRADIO_SERVER_NAME=0.0.0.0
  - MAX_PARALLEL_REQUESTS=10
  - REQUEST_TIMEOUT=30
  - BATCH_SIZE=4
  - GRADIO_ANALYTICS_ENABLED=false
  - PYTHONUNBUFFERED=1
  - SPACE_CACHE_DIR=/data/models
  - TORCH_CUDA_ARCH_LIST="7.5"
  - CUDA_VISIBLE_DEVICES=0

# Model configurations
models:
  - rrbale/pruned-qwen-moe/model-Q6_K.gguf
  - YorkieOH10/deepseek-coder-6.7B-kexer-Q8_0-GGUF/model.gguf
  - Nidum-Llama-3.2-3B-Uncensored-GGUF/model-Q6_K.gguf
  - deepseek-ai/JanusFlow-1.3B/model.gguf
  - prithivMLmods/QwQ-4B-Instruct/model.gguf
  - gpt-omni/mini-omni2/mini-omni2.gguf

# Dependencies
dependencies:
  python: 
    - "gradio>=4.44.1"
    - "groq>=0.4.1"
    - "fastapi>=0.68.0"
    - "uvicorn>=0.15.0"
    - "pydantic>=2.0.0"
    - "python-dotenv>=0.19.0"
    - "aiohttp>=3.8.0"
    - "asyncio>=3.4.3"
    - "numpy>=1.24.0"
    - "pandas>=2.1.0"
    - "scikit-learn>=1.3.2"
    - "plotly>=5.18.0"
    - "networkx>=3.2.1"
    - "llama-cpp-python>=0.2.23"  # Added for local LLM support
  system:
    - git-lfs
    - cmake
    - ninja-build  # For faster builds
    - build-essential  # Required for compilation
    - cuda-toolkit-11-8
    - nvidia-cuda-toolkit
    - libcudnn8

# Inference settings
inference:
  model_backend: groq
  models:
    - name: mixtral-8x7b-32768
      provider: groq
      max_tokens: 32768
    - name: llama2-70b-4096
      provider: groq
      max_tokens: 4096
  fallback:
    enabled: true
    provider: huggingface
    model: mistral-7b-instruct-v0.2

# Resource limits
resources:
  memory: 16
  cpu: 4
  gpu: 1
  gpu_memory: 16
  disk: 50

# Monitoring
monitoring:
  enable_logging: true
  log_level: INFO
  metrics_enabled: true

# Build configuration
build:
  system_packages:
    - cmake
    - build-essential
    - cuda-toolkit-11-8
    - nvidia-cuda-toolkit
    - libcudnn8
  python_packages:
    - --upgrade pip
    - -r requirements.txt
    - torch --index-url https://download.pytorch.org/whl/cu118
    - llama-cpp-python --no-cache-dir

# Runtime configuration
runtime:
  build:
    cuda: "11.8"
    python: "3.10"
  env:
    - PYTHONUNBUFFERED=1
    - GRADIO_SERVER_NAME=0.0.0.0
    - TORCH_CUDA_ARCH_LIST="7.5"
    - CUDA_VISIBLE_DEVICES=0
    - GRADIO_ANALYTICS_ENABLED=false